master
qianjingLi 3 years ago
parent baa1485ea0
commit 65e445a50b

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 18 15:05:38 2022
@author: HUAWEI
"""
from urllib.parse import urlencode
import requests
from lxml import etree
import openpyxl
# 创建Excel表并写入数据
book=openpyxl.Workbook()
sh=book.active
sh.title='食物信息'
sh['A1'] = '食物名称'
sh['B1'] = '热量(千卡)'
sh['C1'] = '硫胺素(毫克)'
sh['D1'] = '钙(毫克)'
sh['E1'] = '蛋白质(克)'
sh['F1'] = '核黄素(毫克)'
sh['G1'] = '镁(毫克)'
sh['H1'] = '脂肪(克)'
sh['I1'] = '烟酸(毫克)'
sh['J1'] = '铁(毫克)'
sh['K1'] = '碳水化合物(克)'
sh['L1'] = '维生素C(毫克)'
sh['M1'] = '锰(毫克)'
sh['N1'] = '膳食纤维(克)'
sh['O1'] = '维生素E(毫克)'
sh['P1'] = '锌(毫克)'
sh['Q1'] = '维生素A(微克)'
sh['R1'] = '胆固醇(毫克)'
sh['S1'] = '铜(毫克)'
sh['T1'] = '胡罗卜素(微克)'
sh['U1'] = '钾(毫克)'
sh['V1'] = '磷(毫克)'
sh['W1'] = '视黄醇当量(微克)'
sh['X1'] = '钠(毫克)'
sh['Y1'] = '硒(微克)'
sh['Z1'] = '网址'
row=2
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240"
#如url = "http://db.foodmate.net/yingyang/type_0%3A6%3A0_6.html"
#多页爬取
food=[]
nutrient=[]
number=[]
address=[]
for i in range(19,20): #食物种类
for j in range(0,50): #食物种类下各种食物
url = "http://db.foodmate.net/yingyang/type_0%3A" + str(i) +"%3A" + str(j) +"_"+str(i) +".html"#网址
with requests.request('GET',url,headers = {'User-agent':ua}) as res:
content = res.text #获取HTML的内容
html = etree.HTML(content) #分析HTML返回DOM根节点
name = html.xpath("//*[@id='rightlist']/center/font/b/text()")#食物名称
information_name = html.xpath( "//*[@id='rightlist']/div/div/text()") #营养素
information_number = html.xpath( "//*[@id='rightlist']/div/text()") #含量
for j in range(0,len(information_name)+100):
sh.cell(row,j+2).value=str(information_number[j])
for m in range(0,len(name)):
sh.cell(row,26).value=str(url)
sh.cell(row,1).value=str(name[m])
row+=1
#print(1)
#food.append(name)
#nutrient.append(information_name)
#number.append(information_number)
#address.append(url)
book.save('E:\CAUC\Computer\软件工程课程设计\油脂类.xlsx')
#print(food)
#print(nutrient)
#print(number)
#print(address)
Loading…
Cancel
Save