new nutrition

master
qianjingLi 2 years ago
parent 2920d17894
commit 0f4d6cb26f

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 18 15:05:38 2022
@author: HUAWEI
"""
from urllib.parse import urlencode
import requests
from lxml import etree
import openpyxl
# 创建Excel表并写入数据
book=openpyxl.Workbook()
sh=book.active
sh.title='食物信息'
sh['A1'] = '食物名称'
sh['B1'] = '热量(千卡)'
sh['C1'] = '硫胺素(毫克)'
sh['D1'] = '钙(毫克)'
sh['E1'] = '蛋白质(克)'
sh['F1'] = '核黄素(毫克)'
sh['G1'] = '镁(毫克)'
sh['H1'] = '脂肪(克)'
sh['I1'] = '烟酸(毫克)'
sh['J1'] = '铁(毫克)'
sh['K1'] = '碳水化合物(克)'
sh['L1'] = '维生素C(毫克)'
sh['M1'] = '锰(毫克)'
sh['N1'] = '膳食纤维(克)'
sh['O1'] = '维生素E(毫克)'
sh['P1'] = '锌(毫克)'
sh['Q1'] = '维生素A(微克)'
sh['R1'] = '胆固醇(毫克)'
sh['S1'] = '铜(毫克)'
sh['T1'] = '胡罗卜素(微克)'
sh['U1'] = '钾(毫克)'
sh['V1'] = '磷(毫克)'
sh['W1'] = '视黄醇当量(微克)'
sh['X1'] = '钠(毫克)'
sh['Y1'] = '硒(微克)'
sh['Z1'] = '网址'
row=2
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240"
#如url = "http://db.foodmate.net/yingyang/type_0%3A6%3A0_6.html"
#多页爬取
food=[]
nutrient=[]
number=[]
address=[]
for i in range(19,20): #食物种类
for j in range(0,50): #食物种类下各种食物
url = "http://db.foodmate.net/yingyang/type_0%3A" + str(i) +"%3A" + str(j) +"_"+str(i) +".html"#网址
with requests.request('GET',url,headers = {'User-agent':ua}) as res:
content = res.text #获取HTML的内容
html = etree.HTML(content) #分析HTML返回DOM根节点
name = html.xpath("//*[@id='rightlist']/center/font/b/text()")#食物名称
information_name = html.xpath( "//*[@id='rightlist']/div/div/text()") #营养素
information_number = html.xpath( "//*[@id='rightlist']/div/text()") #含量
for j in range(0,len(information_name)+100):
sh.cell(row,j+2).value=str(information_number[j])
for m in range(0,len(name)):
sh.cell(row,26).value=str(url)
sh.cell(row,1).value=str(name[m])
row+=1
#print(1)
#food.append(name)
#nutrient.append(information_name)
#number.append(information_number)
#address.append(url)
book.save('E:\CAUC\Computer\软件工程课程设计\油脂类.xlsx')
#print(food)
#print(nutrient)
#print(number)
#print(address)
Loading…
Cancel
Save