From f7dae943658a8a242f986cb44f258a4b66f6d455 Mon Sep 17 00:00:00 2001
From: p8cl2y4fn <13097044987@163.com>
Date: Wed, 12 Jun 2024 14:07:30 +0800
Subject: [PATCH] ADD file via upload

---
 1.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 1.py

diff --git a/1.py b/1.py
new file mode 100644
index 0000000..3375e3d
--- /dev/null
+++ b/1.py
@@ -0,0 +1,62 @@
+import requests
+from bs4 import BeautifulSoup
+
+
+def fetch_job_info1(url, output_file1):
+    head = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
+    }
+    r = requests.get(url, headers=head)
+    r.encoding = 'utf-8'
+    mysoup = BeautifulSoup(r.text, 'lxml')
+    result = mysoup.select("p")
+    job_info = ""
+    for t in result:
+        job_info += t.get_text(strip=True) + "\n"
+    # 将信息写入到文件中
+    with open(output_file1, 'a', encoding='utf-8') as f:
+        f.write(job_info)
+
+def fetch_job_info2(url1):
+    head = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
+    }
+    r = requests.get(url1, headers=head)
+    r.encoding = 'utf-8'
+    mysoup = BeautifulSoup(r.text, 'lxml')
+    result = mysoup.select("p.name")
+    for t in result:
+        job_info = t.get_text()
+
+        return (job_info)
+
+base_url = "http://www.jvrmusic.com"
+url = "http://www.jvrmusic.com/artist"
+output_file1 = "artist.txt"
+output_file2 = "artist_info.txt"
+
+head = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
+}
+r = requests.get(url, headers=head)
+r.encoding = 'utf-8'
+mysoup = BeautifulSoup(r.text, 'lxml')
+artist_links = mysoup.select("div.item a")
+
+for t in artist_links:
+    ttext = t.get_text()
+    tattr = t.get("href")
+    url1 = base_url + tattr
+    test = fetch_job_info2(url1)
+    fetch_job_info1(url1, output_file1)
+    r = requests.get(url1, headers=head)
+    mysoup2 = BeautifulSoup(r.text, 'lxml')
+    artist_links2 = mysoup2.select("div.sub-menu a")
+    info = fetch_job_info2(url1) + "\n"
+    for t in artist_links2:
+        ttext = t.get_text()
+        tattr = t.get("href")
+        url2 = base_url + tattr +"\n"
+        info += url2
+    with open(output_file2, 'a', encoding='utf-8') as f:
+        f.write(info)
\ No newline at end of file