parent
c0a8d80912
commit
e96d517cb2
@ -0,0 +1,113 @@
|
|||||||
|
import re
|
||||||
|
import bs4
|
||||||
|
import lxml
|
||||||
|
import pymysql
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def urlList():
|
||||||
|
url = 'https://store.steampowered.com/explore/new/'
|
||||||
|
head = {
|
||||||
|
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'
|
||||||
|
}
|
||||||
|
r = requests.get(url,headers=head)
|
||||||
|
r.encoding = 'utf-8'
|
||||||
|
# print(r.text)
|
||||||
|
urllist = []
|
||||||
|
bea = bs4.BeautifulSoup(r.text,'lxml')
|
||||||
|
text = bea.select("div#tab_newreleases_content>div.tab_content_items>a")
|
||||||
|
for i in text:
|
||||||
|
urllist.append(i.get("href"))
|
||||||
|
return urllist
|
||||||
|
|
||||||
|
def text(url):
|
||||||
|
head = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0'
|
||||||
|
}
|
||||||
|
r = requests.get(url, headers=head)
|
||||||
|
r.encoding = 'utf-8'
|
||||||
|
# print(r.text)
|
||||||
|
arr = []
|
||||||
|
|
||||||
|
# 标题
|
||||||
|
zz = '<div id="appHubAppName" class="apphub_AppName">(.*?)</div>'
|
||||||
|
title = re.findall(zz, r.text)
|
||||||
|
if(len(title) == 0):
|
||||||
|
return "Huh"
|
||||||
|
else:
|
||||||
|
arr.append(title[0])
|
||||||
|
|
||||||
|
# 价格
|
||||||
|
bea = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
|
text = bea.select("div.game_purchase_price")
|
||||||
|
if (len(text) == 0):
|
||||||
|
zz = '<div class="discount_final_price">(.*?)</div>'
|
||||||
|
price = re.findall(zz, r.text)
|
||||||
|
if (len(price) == 0):
|
||||||
|
arr.append("Just start playing")
|
||||||
|
else:
|
||||||
|
arr.append(price[0])
|
||||||
|
else:
|
||||||
|
arr.append(text[0].get_text(strip=True))
|
||||||
|
|
||||||
|
# 发行时间
|
||||||
|
zz = '<b>Release Date:</b>(.*?)<br>'
|
||||||
|
time = re.findall(zz, r.text)
|
||||||
|
arr.append(time[0])
|
||||||
|
|
||||||
|
# 评价
|
||||||
|
zz = '<span class="game_review_summary positive" itemprop="description">(.*?)</span>'
|
||||||
|
appraise = re.findall(zz, r.text)
|
||||||
|
if (len(appraise) == 0):
|
||||||
|
zz = '<span class="game_review_summary mixed" itemprop="description">(.*?)</span>'
|
||||||
|
appraise = re.findall(zz, r.text)
|
||||||
|
arr.append(appraise[0])
|
||||||
|
else:
|
||||||
|
arr.append(appraise[0])
|
||||||
|
|
||||||
|
# 简介
|
||||||
|
bea = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
|
text = bea.select("div#game_area_description")
|
||||||
|
Brief_introduction = ""
|
||||||
|
for i in text:
|
||||||
|
Brief_introduction = Brief_introduction + i.get_text(strip=True)
|
||||||
|
arr.append(Brief_introduction)
|
||||||
|
|
||||||
|
# 标签
|
||||||
|
bea = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
|
text = bea.select("div.glance_tags.popular_tags>a")
|
||||||
|
label = ""
|
||||||
|
for i in text:
|
||||||
|
label = label + i.get_text(strip=True)
|
||||||
|
arr.append(label)
|
||||||
|
|
||||||
|
# 系统需求
|
||||||
|
bea = bs4.BeautifulSoup(r.text, 'lxml')
|
||||||
|
text = bea.select("div.sys_req.game_page_autocollapse")
|
||||||
|
system_requirements = ""
|
||||||
|
for i in text:
|
||||||
|
system_requirements = system_requirements + "\n" + i.get_text(strip=True)
|
||||||
|
arr.append(system_requirements)
|
||||||
|
print("一次")
|
||||||
|
return arr
|
||||||
|
|
||||||
|
def mysql(arr):
|
||||||
|
db = pymysql.connect(user='root', password='12345678', host='localhost', db='spark')
|
||||||
|
cur = db.cursor()
|
||||||
|
sql = '''
|
||||||
|
INSERT INTO steam(name,price,time,appraise,Brief_introduction,label,system_requirements) VALUES (%s,%s,%s,%s,%s,%s,%s)
|
||||||
|
'''
|
||||||
|
cur.execute(sql, arr)
|
||||||
|
db.commit()
|
||||||
|
db.close()
|
||||||
|
cur.close()
|
||||||
|
print("mysql")
|
||||||
|
|
||||||
|
|
||||||
|
urllist = urlList()
|
||||||
|
for url in urllist:
|
||||||
|
print(url)
|
||||||
|
arr = text(url)
|
||||||
|
if(arr == "Huh"):
|
||||||
|
print("这个要认证")
|
||||||
|
else:
|
||||||
|
mysql(arr)
|
Loading…
Reference in new issue