# -*- coding: utf-8 -*-
"""
Created on Tue Oct 18 15:05:38 2022

@author: HUAWEI
"""

from urllib.parse import urlencode
import requests
from lxml import etree
import openpyxl

#  创建Excel表并写入数据
book=openpyxl.Workbook()
sh=book.active
sh.title='食物信息'
sh['A1'] = '食物名称'
sh['B1'] = '热量（千卡）'
sh['C1'] = '硫胺素(毫克)'
sh['D1'] = '钙(毫克)'
sh['E1'] = '蛋白质(克)'
sh['F1'] = '核黄素(毫克)'
sh['G1'] = '镁(毫克)'
sh['H1'] = '脂肪(克)'
sh['I1'] = '烟酸(毫克)'
sh['J1'] = '铁(毫克)'
sh['K1'] = '碳水化合物(克)'
sh['L1'] = '维生素C(毫克)'
sh['M1'] = '锰(毫克)'
sh['N1'] = '膳食纤维(克)'
sh['O1'] = '维生素E(毫克)'
sh['P1'] = '锌(毫克)'
sh['Q1'] = '维生素A(微克)'
sh['R1'] = '胆固醇(毫克)'
sh['S1'] = '铜(毫克)'
sh['T1'] = '胡罗卜素(微克)'
sh['U1'] = '钾(毫克)'
sh['V1'] = '磷(毫克)'
sh['W1'] = '视黄醇当量(微克)'
sh['X1'] = '钠(毫克)'
sh['Y1'] = '硒(微克)'
sh['Z1'] = '网址'

row=2
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240"
#如url = "http://db.foodmate.net/yingyang/type_0%3A6%3A0_6.html"
#多页爬取
food=[]
nutrient=[]
number=[]
address=[]
for i in range(19,20): #食物种类
    for j in range(0,50): #食物种类下各种食物
        url = "http://db.foodmate.net/yingyang/type_0%3A" + str(i) +"%3A" + str(j) +"_"+str(i) +".html"#网址
        with requests.request('GET',url,headers = {'User-agent':ua}) as res:
            content = res.text          #获取HTML的内容
            html = etree.HTML(content)  #分析HTML，返回DOM根节点
            name = html.xpath("//*[@id='rightlist']/center/font/b/text()")#食物名称
            information_name = html.xpath( "//*[@id='rightlist']/div/div/text()") #营养素
            information_number = html.xpath( "//*[@id='rightlist']/div/text()")  #含量
            for j in range(0,len(information_name)+100):
                sh.cell(row,j+2).value=str(information_number[j])
            for m in range(0,len(name)):
                sh.cell(row,26).value=str(url)
                sh.cell(row,1).value=str(name[m])
            row+=1
            #print(1)
            #food.append(name)
            #nutrient.append(information_name)
            #number.append(information_number)   
            #address.append(url)
            
book.save('E:\CAUC\Computer\软件工程课程设计\油脂类.xlsx')
#print(food)
#print(nutrient)
#print(number)
#print(address)