import pymysql import requests from lxml import etree def getDB(): db = pymysql.connect(host='localhost',user='root',password='123456',database='douban') return db def Agent_info(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0' } return headers def get_url(): headers=Agent_info() manh_url = 'https://ac.qq.com/Rank/comicRank/type/top' res = requests.get(url=manh_url, headers=headers) e = etree.HTML(res.text) hrefs = e.xpath('//div/ul/li/a/@href') hrefs = hrefs[16:38] for i in range(22): hrefs[i] = "https://ac.qq.com/" + hrefs[i] return hrefs def get_url_info(manh_url): print("抓取网址", manh_url) headers = Agent_info() res = requests.get(manh_url, headers=headers) e = etree.HTML(res.text) ranks = e.xpath('//*[@id="special_bg"]/div[3]/div/div/div[2]/div/div[2]/p/strong/text()') ranks=str(ranks).strip('[').strip(']') names = e.xpath('//*[@id="special_bg"]/div[3]/div/div/div[2]/div/div[1]/h2/strong/text()') names=str(names).strip('[').strip(']') authors = e.xpath('//*[@id="special_bg"]/div[3]/div/div/div[2]/div/p/span/em/text()') authors1 = e.xpath('//*[@id="special_bg"]/div[3]/div/div/div[2]/div/p/span/em/text()') authors2 = e.xpath('//*[@id="special_bg"]/div[3]/div/div/div[2]/div/p/span/em/text()') authors0 = str(authors).split(',')[0] authors0 = authors0[2:-5] renqi = str(authors1).split(',')[1] shoucang = str(authors2).split(',')[2] shoucang=shoucang[:-1] print(authors) sql = 'insert into manh (names,ranks,renqi,shoucang,urls,authors) values ("{}","{}","{}","{}","{}","{}")'.format(names,ranks,renqi,shoucang,manh_url,authors0) db = getDB() cursor = db.cursor() try: cursor.execute(sql) db.commit() except Exception as e: print(e) db.rollback() cursor.close() db.close() if __name__ == '__main__': manh_urls = get_url() for url in manh_urls: get_url_info(url)