import requests from bs4 import BeautifulSoup def fetch_job_info1(url, output_file1): head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } r = requests.get(url, headers=head) r.encoding = 'utf-8' mysoup = BeautifulSoup(r.text, 'lxml') result = mysoup.select("p") job_info = "" for t in result: job_info += t.get_text(strip=True) + "\n" # 将信息写入到文件中 with open(output_file1, 'a', encoding='utf-8') as f: f.write(job_info) def fetch_job_info2(url1): head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } r = requests.get(url1, headers=head) r.encoding = 'utf-8' mysoup = BeautifulSoup(r.text, 'lxml') result = mysoup.select("p.name") for t in result: job_info = t.get_text() return (job_info) base_url = "http://www.jvrmusic.com" url = "http://www.jvrmusic.com/artist" output_file1 = "artist.txt" output_file2 = "artist_info.txt" head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } r = requests.get(url, headers=head) r.encoding = 'utf-8' mysoup = BeautifulSoup(r.text, 'lxml') artist_links = mysoup.select("div.item a") for t in artist_links: ttext = t.get_text() tattr = t.get("href") url1 = base_url + tattr test = fetch_job_info2(url1) fetch_job_info1(url1, output_file1) r = requests.get(url1, headers=head) mysoup2 = BeautifulSoup(r.text, 'lxml') artist_links2 = mysoup2.select("div.sub-menu a") info = fetch_job_info2(url1) + "\n" for t in artist_links2: ttext = t.get_text() tattr = t.get("href") url2 = base_url + tattr +"\n" info += url2 with open(output_file2, 'a', encoding='utf-8') as f: f.write(info)