import re import bs4 import lxml import pymysql import requests def urlList(): url = 'https://store.steampowered.com/search/?filter=popularnew&sort_by=Released_DESC&os=win' head = { 'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0' } r = requests.get(url,headers=head) r.encoding = 'utf-8' # print(r.text) urllist = [] bea = bs4.BeautifulSoup(r.text,'lxml') text = bea.select("div#search_resultsRows>a") # print(len(text)) for i in text: urllist.append(i.get("href")) # print(i.get("href")) return urllist def text(url): head = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0' } r = requests.get(url, headers=head) r.encoding = 'utf-8' # print(r.text) arr = [] # 标题 zz = '
(.*?)
' title = re.findall(zz, r.text) if(len(title) == 0): return "Huh" else: arr.append(title[0]) # 价格 bea = bs4.BeautifulSoup(r.text, 'lxml') text = bea.select("div.game_purchase_price") if (len(text) == 0): zz = '
(.*?)
' price = re.findall(zz, r.text) if (len(price) == 0): arr.append("Just start playing") else: arr.append(price[0]) else: arr.append(text[0].get_text(strip=True)) # 发行时间 zz = 'Release Date:(.*?)
' time = re.findall(zz, r.text) arr.append(time[0]) # 评价 zz = '(.*?)' appraise = re.findall(zz, r.text) if (len(appraise) == 0): zz = '(.*?)' appraise = re.findall(zz, r.text) arr.append(appraise[0]) else: arr.append(appraise[0]) # 简介 bea = bs4.BeautifulSoup(r.text, 'lxml') text = bea.select("div#game_area_description") Brief_introduction = "" for i in text: Brief_introduction = Brief_introduction + i.get_text(strip=True) arr.append(Brief_introduction) # 标签 bea = bs4.BeautifulSoup(r.text, 'lxml') text = bea.select("div.glance_tags.popular_tags>a") label = "" for i in text: label = label + i.get_text(strip=True) arr.append(label) # 系统需求 bea = bs4.BeautifulSoup(r.text, 'lxml') text = bea.select("div.sys_req.game_page_autocollapse") system_requirements = "" for i in text: system_requirements = system_requirements + "\n" + i.get_text(strip=True) arr.append(system_requirements) print("一次") return arr def mysql(arr): db = pymysql.connect(user='root', password='12345678', host='localhost', db='spark') cur = db.cursor() sql = ''' INSERT INTO steam1(name,price,time,appraise,Brief_introduction,label,system_requirements) VALUES (%s,%s,%s,%s,%s,%s,%s) ''' cur.execute(sql, arr) db.commit() db.close() cur.close() print("mysql") urllist = urlList() for url in urllist: print(url) arr = text(url) if(arr == "Huh"): print("这个要认证") else: mysql(arr)