You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.5 KiB
38 lines
1.5 KiB
import pymysql
|
|
import requests
|
|
from lxml import etree
|
|
def getDB():
|
|
db = pymysql.connect(host='localhost',user='root',password='123456',database='douban')
|
|
return db
|
|
def Agent_info():
|
|
headers={
|
|
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
|
|
}
|
|
return headers
|
|
def get_url_info(nba_url):
|
|
print("抓取网址", nba_url)
|
|
headers = Agent_info()
|
|
res = requests.get(nba_url, headers=headers)
|
|
e = etree.HTML(res.text)
|
|
names = e.xpath('//table[@class="players_table"]//tr/td[2]/a/text()')
|
|
teams = e.xpath('//table[@class="players_table"]//tr/td[3]/a/text()')
|
|
nos = e.xpath('//table[@class="players_table"]//tr/td[1]/text()')
|
|
nos = nos[1::]
|
|
scores = e.xpath('//table[@class="players_table"]//tr/td[4]/text()')
|
|
scores = scores[1::]
|
|
infos = e.xpath('//table[@class="players_table"]//tr/td[2]/a/@href')
|
|
for i in range(len(names)):
|
|
sql = 'insert into nba (no,name,team,score,info) values ("{}","{}","{}","{}","{}")'.format(nos[i],names[i],teams[i],scores[i],infos[i])
|
|
db = getDB()
|
|
cursor = db.cursor()
|
|
try:
|
|
cursor.execute(sql)
|
|
db.commit()
|
|
except Exception as e:
|
|
print(e)
|
|
db.rollback()
|
|
result = cursor.fetchone()
|
|
cursor.close()
|
|
db.close()
|
|
if __name__ == '__main__':
|
|
get_url_info(nba_url="https://nba.hupu.com/stats/players") |