# -*- coding: utf-8 -*- """ Created on Tue Oct 18 15:05:38 2022 @author: HUAWEI """ from urllib.parse import urlencode import requests from lxml import etree import openpyxl # 创建Excel表并写入数据 book=openpyxl.Workbook() sh=book.active sh.title='食物信息' sh['A1'] = '食物名称' sh['B1'] = '热量(千卡)' sh['C1'] = '硫胺素(毫克)' sh['D1'] = '钙(毫克)' sh['E1'] = '蛋白质(克)' sh['F1'] = '核黄素(毫克)' sh['G1'] = '镁(毫克)' sh['H1'] = '脂肪(克)' sh['I1'] = '烟酸(毫克)' sh['J1'] = '铁(毫克)' sh['K1'] = '碳水化合物(克)' sh['L1'] = '维生素C(毫克)' sh['M1'] = '锰(毫克)' sh['N1'] = '膳食纤维(克)' sh['O1'] = '维生素E(毫克)' sh['P1'] = '锌(毫克)' sh['Q1'] = '维生素A(微克)' sh['R1'] = '胆固醇(毫克)' sh['S1'] = '铜(毫克)' sh['T1'] = '胡罗卜素(微克)' sh['U1'] = '钾(毫克)' sh['V1'] = '磷(毫克)' sh['W1'] = '视黄醇当量(微克)' sh['X1'] = '钠(毫克)' sh['Y1'] = '硒(微克)' sh['Z1'] = '网址' row=2 ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240" #如url = "http://db.foodmate.net/yingyang/type_0%3A6%3A0_6.html" #多页爬取 food=[] nutrient=[] number=[] address=[] for i in range(19,20): #食物种类 for j in range(0,50): #食物种类下各种食物 url = "http://db.foodmate.net/yingyang/type_0%3A" + str(i) +"%3A" + str(j) +"_"+str(i) +".html"#网址 with requests.request('GET',url,headers = {'User-agent':ua}) as res: content = res.text #获取HTML的内容 html = etree.HTML(content) #分析HTML,返回DOM根节点 name = html.xpath("//*[@id='rightlist']/center/font/b/text()")#食物名称 information_name = html.xpath( "//*[@id='rightlist']/div/div/text()") #营养素 information_number = html.xpath( "//*[@id='rightlist']/div/text()") #含量 for j in range(0,len(information_name)+100): sh.cell(row,j+2).value=str(information_number[j]) for m in range(0,len(name)): sh.cell(row,26).value=str(url) sh.cell(row,1).value=str(name[m]) row+=1 #print(1) #food.append(name) #nutrient.append(information_name) #number.append(information_number) #address.append(url) book.save('E:\CAUC\Computer\软件工程课程设计\油脂类.xlsx') #print(food) #print(nutrient) #print(number) #print(address)