You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.5 KiB

import pymysql
import requests
from lxml import etree
def getDB():
db = pymysql.connect(host='localhost',user='root',password='123456',database='douban')
return db
def Agent_info():
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'
}
return headers
def get_url_info(nba_url):
print("抓取网址", nba_url)
headers = Agent_info()
res = requests.get(nba_url, headers=headers)
e = etree.HTML(res.text)
names = e.xpath('//table[@class="players_table"]//tr/td[2]/a/text()')
teams = e.xpath('//table[@class="players_table"]//tr/td[3]/a/text()')
nos = e.xpath('//table[@class="players_table"]//tr/td[1]/text()')
nos = nos[1::]
scores = e.xpath('//table[@class="players_table"]//tr/td[4]/text()')
scores = scores[1::]
infos = e.xpath('//table[@class="players_table"]//tr/td[2]/a/@href')
for i in range(len(names)):
sql = 'insert into nba (no,name,team,score,info) values ("{}","{}","{}","{}","{}")'.format(nos[i],names[i],teams[i],scores[i],infos[i])
db = getDB()
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except Exception as e:
print(e)
db.rollback()
result = cursor.fetchone()
cursor.close()
db.close()
if __name__ == '__main__':
get_url_info(nba_url="https://nba.hupu.com/stats/players")