From f7dae943658a8a242f986cb44f258a4b66f6d455 Mon Sep 17 00:00:00 2001 From: p8cl2y4fn <13097044987@163.com> Date: Wed, 12 Jun 2024 14:07:30 +0800 Subject: [PATCH] ADD file via upload --- 1.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 1.py diff --git a/1.py b/1.py new file mode 100644 index 0000000..3375e3d --- /dev/null +++ b/1.py @@ -0,0 +1,62 @@ +import requests +from bs4 import BeautifulSoup + + +def fetch_job_info1(url, output_file1): + head = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' + } + r = requests.get(url, headers=head) + r.encoding = 'utf-8' + mysoup = BeautifulSoup(r.text, 'lxml') + result = mysoup.select("p") + job_info = "" + for t in result: + job_info += t.get_text(strip=True) + "\n" + # 将信息写入到文件中 + with open(output_file1, 'a', encoding='utf-8') as f: + f.write(job_info) + +def fetch_job_info2(url1): + head = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' + } + r = requests.get(url1, headers=head) + r.encoding = 'utf-8' + mysoup = BeautifulSoup(r.text, 'lxml') + result = mysoup.select("p.name") + for t in result: + job_info = t.get_text() + + return (job_info) + +base_url = "http://www.jvrmusic.com" +url = "http://www.jvrmusic.com/artist" +output_file1 = "artist.txt" +output_file2 = "artist_info.txt" + +head = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' +} +r = requests.get(url, headers=head) +r.encoding = 'utf-8' +mysoup = BeautifulSoup(r.text, 'lxml') +artist_links = mysoup.select("div.item a") + +for t in artist_links: + ttext = t.get_text() + tattr = t.get("href") + url1 = base_url + tattr + test = fetch_job_info2(url1) + fetch_job_info1(url1, output_file1) + r = requests.get(url1, headers=head) + mysoup2 = BeautifulSoup(r.text, 'lxml') + artist_links2 = mysoup2.select("div.sub-menu a") + info = fetch_job_info2(url1) + "\n" + for t in artist_links2: + ttext = t.get_text() + tattr = t.get("href") + url2 = base_url + tattr +"\n" + info += url2 + with open(output_file2, 'a', encoding='utf-8') as f: + f.write(info) \ No newline at end of file