dabian/1.py

import requests
from bs4 import BeautifulSoup


def fetch_job_info1(url, output_file1):
    head = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
    }
    r = requests.get(url, headers=head)
    r.encoding = 'utf-8'
    mysoup = BeautifulSoup(r.text, 'lxml')
    result = mysoup.select("p")
    job_info = ""
    for t in result:
        job_info += t.get_text(strip=True) + "\n"
    # 将信息写入到文件中
    with open(output_file1, 'a', encoding='utf-8') as f:
        f.write(job_info)

def fetch_job_info2(url1):
    head = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
    }
    r = requests.get(url1, headers=head)
    r.encoding = 'utf-8'
    mysoup = BeautifulSoup(r.text, 'lxml')
    result = mysoup.select("p.name")
    for t in result:
        job_info = t.get_text()

        return (job_info)

base_url = "http://www.jvrmusic.com"
url = "http://www.jvrmusic.com/artist"
output_file1 = "artist.txt"
output_file2 = "artist_info.txt"

head = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
r = requests.get(url, headers=head)
r.encoding = 'utf-8'
mysoup = BeautifulSoup(r.text, 'lxml')
artist_links = mysoup.select("div.item a")

for t in artist_links:
    ttext = t.get_text()
    tattr = t.get("href")
    url1 = base_url + tattr
    test = fetch_job_info2(url1)
    fetch_job_info1(url1, output_file1)
    r = requests.get(url1, headers=head)
    mysoup2 = BeautifulSoup(r.text, 'lxml')
    artist_links2 = mysoup2.select("div.sub-menu a")
    info = fetch_job_info2(url1) + "\n"
    for t in artist_links2:
        ttext = t.get_text()
        tattr = t.get("href")
        url2 = base_url + tattr +"\n"
        info += url2
    with open(output_file2, 'a', encoding='utf-8') as f:
        f.write(info)