diff --git a/steam+.py b/steam+.py new file mode 100644 index 0000000..a8c479b --- /dev/null +++ b/steam+.py @@ -0,0 +1,114 @@ +import re +import bs4 +import lxml +import pymysql +import requests + +def urlList(): + url = 'https://store.steampowered.com/search/?filter=popularnew&sort_by=Released_DESC&os=win' + head = { + 'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0' + } + r = requests.get(url,headers=head) + r.encoding = 'utf-8' + # print(r.text) + urllist = [] + bea = bs4.BeautifulSoup(r.text,'lxml') + text = bea.select("div#search_resultsRows>a") + # print(len(text)) + for i in text: + urllist.append(i.get("href")) + # print(i.get("href")) + return urllist + +def text(url): + head = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0' + } + r = requests.get(url, headers=head) + r.encoding = 'utf-8' + # print(r.text) + arr = [] + + # 标题 + zz = '
(.*?)
' + title = re.findall(zz, r.text) + if(len(title) == 0): + return "Huh" + else: + arr.append(title[0]) + + # 价格 + bea = bs4.BeautifulSoup(r.text, 'lxml') + text = bea.select("div.game_purchase_price") + if (len(text) == 0): + zz = '
(.*?)
' + price = re.findall(zz, r.text) + if (len(price) == 0): + arr.append("Just start playing") + else: + arr.append(price[0]) + else: + arr.append(text[0].get_text(strip=True)) + + # 发行时间 + zz = 'Release Date:(.*?)
' + time = re.findall(zz, r.text) + arr.append(time[0]) + + # 评价 + zz = '(.*?)' + appraise = re.findall(zz, r.text) + if (len(appraise) == 0): + zz = '(.*?)' + appraise = re.findall(zz, r.text) + arr.append(appraise[0]) + else: + arr.append(appraise[0]) + + # 简介 + bea = bs4.BeautifulSoup(r.text, 'lxml') + text = bea.select("div#game_area_description") + Brief_introduction = "" + for i in text: + Brief_introduction = Brief_introduction + i.get_text(strip=True) + arr.append(Brief_introduction) + + # 标签 + bea = bs4.BeautifulSoup(r.text, 'lxml') + text = bea.select("div.glance_tags.popular_tags>a") + label = "" + for i in text: + label = label + i.get_text(strip=True) + arr.append(label) + + # 系统需求 + bea = bs4.BeautifulSoup(r.text, 'lxml') + text = bea.select("div.sys_req.game_page_autocollapse") + system_requirements = "" + for i in text: + system_requirements = system_requirements + "\n" + i.get_text(strip=True) + arr.append(system_requirements) + print("一次") + return arr + +def mysql(arr): + db = pymysql.connect(user='root', password='12345678', host='localhost', db='spark') + cur = db.cursor() + sql = ''' + INSERT INTO steam1(name,price,time,appraise,Brief_introduction,label,system_requirements) VALUES (%s,%s,%s,%s,%s,%s,%s) + ''' + cur.execute(sql, arr) + db.commit() + db.close() + cur.close() + print("mysql") + +urllist = urlList() +for url in urllist: + print(url) + arr = text(url) + if(arr == "Huh"): + print("这个要认证") + else: + mysql(arr) \ No newline at end of file