import re
import bs4
import lxml
import pymysql
import requests
def urlList():
url = 'https://store.steampowered.com/search/?filter=popularnew&sort_by=Released_DESC&os=win'
head = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'
}
r = requests.get(url,headers=head)
r.encoding = 'utf-8'
# print(r.text)
urllist = []
bea = bs4.BeautifulSoup(r.text,'lxml')
text = bea.select("div#search_resultsRows>a")
# print(len(text))
for i in text:
urllist.append(i.get("href"))
# print(i.get("href"))
return urllist
def text(url):
head = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'
}
r = requests.get(url, headers=head)
r.encoding = 'utf-8'
# print(r.text)
arr = []
# 标题
zz = '
(.*?)
'
title = re.findall(zz, r.text)
if(len(title) == 0):
return "Huh"
else:
arr.append(title[0])
# 价格
bea = bs4.BeautifulSoup(r.text, 'lxml')
text = bea.select("div.game_purchase_price")
if (len(text) == 0):
zz = '(.*?)
'
price = re.findall(zz, r.text)
if (len(price) == 0):
arr.append("Just start playing")
else:
arr.append(price[0])
else:
arr.append(text[0].get_text(strip=True))
# 发行时间
zz = 'Release Date:(.*?)
'
time = re.findall(zz, r.text)
arr.append(time[0])
# 评价
zz = '(.*?)'
appraise = re.findall(zz, r.text)
if (len(appraise) == 0):
zz = '(.*?)'
appraise = re.findall(zz, r.text)
arr.append(appraise[0])
else:
arr.append(appraise[0])
# 简介
bea = bs4.BeautifulSoup(r.text, 'lxml')
text = bea.select("div#game_area_description")
Brief_introduction = ""
for i in text:
Brief_introduction = Brief_introduction + i.get_text(strip=True)
arr.append(Brief_introduction)
# 标签
bea = bs4.BeautifulSoup(r.text, 'lxml')
text = bea.select("div.glance_tags.popular_tags>a")
label = ""
for i in text:
label = label + i.get_text(strip=True)
arr.append(label)
# 系统需求
bea = bs4.BeautifulSoup(r.text, 'lxml')
text = bea.select("div.sys_req.game_page_autocollapse")
system_requirements = ""
for i in text:
system_requirements = system_requirements + "\n" + i.get_text(strip=True)
arr.append(system_requirements)
print("一次")
return arr
def mysql(arr):
db = pymysql.connect(user='root', password='12345678', host='localhost', db='spark')
cur = db.cursor()
sql = '''
INSERT INTO steam1(name,price,time,appraise,Brief_introduction,label,system_requirements) VALUES (%s,%s,%s,%s,%s,%s,%s)
'''
cur.execute(sql, arr)
db.commit()
db.close()
cur.close()
print("mysql")
urllist = urlList()
for url in urllist:
print(url)
arr = text(url)
if(arr == "Huh"):
print("这个要认证")
else:
mysql(arr)