+ 䬺小二北京涮羊肉(湘春路店) +
++ + + + +
++ 商家概述 +
+diff --git a/src/webspider/README.md b/src/webspider/README.md new file mode 100644 index 0000000..397e46b --- /dev/null +++ b/src/webspider/README.md @@ -0,0 +1,6 @@ +# 爬虫文件 +**meituan.py**:爬取美团数据的文件,里面爬取了店铺名字、评分、地址等信息,修改数据库参数后直接就可以运行 \ +**stringTodict.py**:将字符串转化为字典的文件,例如"{key1:value1,key2:value2}"转化为dict{key1:value1,key2:value2},和json.load不一样,改data或者headers的一个辅助文件 \ +**shop.txt**:存储店铺html的文件 \ +**shopurl.txt**:存储店铺详情页的url文件 \ +**string.txt**:搭配stringTodict.py使用,将需要转化的字符串写在里面 \ No newline at end of file diff --git a/src/webspider/meituan.py b/src/webspider/meituan.py new file mode 100644 index 0000000..68b04f6 --- /dev/null +++ b/src/webspider/meituan.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Oct 23 09:38:28 2020 + +@author: 坤林 +""" +#this is the version * about successfully get detail of 25 shops and all areas + +import requests +import json +import traceback +import chardet +import random +import pymysql +import time +from lxml import etree +from bs4 import BeautifulSoup +timing=4 +index_url="http://meishi.meituan.com/i/" +headers={'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Connection': 'keep-alive', 'Content-Length': '408', 'Content-Type': 'application/json', 'Cookie': '__mta=217497129.1605576012299.1606050406017.1606050988411.60; iuuid=61A29DA6726853A9CFAEA2909BFE7A3B399AD2E666AF89BEEE2D9E25D8151EA9; cityname=%E9%95%BF%E6%B2%99; _hc.v=543395d8-e443-3414-9405-504128fba3b1.1605576012; _lxsdk_cuid=175d3c9b0bdc8-0b981a1d414f9b-930346c-144000-175d3c9b0bdc8; _lxsdk=61A29DA6726853A9CFAEA2909BFE7A3B399AD2E666AF89BEEE2D9E25D8151EA9; _lx_utm=utm_source%3Dbing%26utm_medium%3Dorganic; rvct=70; client-id=cec617d4-b199-4a30-a095-457fa70e339d; uuid=dacd6749-edf1-446a-b70c-bc7871bd99e6; IJSESSIONID=1qgg0q4wqsn9b1kixeeefcho67; latlng=28.234713%2C113.007334%2C1606050355118; ci=70; meishi_ci=70; cityid=70; logan_session_token=3edh2pxhzgqdvgx8eq6w; _lxsdk_s=175f00f9b75-78f-829-745%7C%7C8', 'Host': 'meishi.meituan.com', 'Origin': 'https://meishi.meituan.com', 'Referer': 'https://meishi.meituan.com/i/?ci=70&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36', 'x-requested-with': 'XMLHttpRequest'} +data={'app': '', 'areaId': 0, 'cateId': 1, 'deal_attr_23': '', 'deal_attr_24': '', 'deal_attr_25': '', 'limit': 15, 'lineId': 0, 'offset': 0, 'optimusCode': 10, 'originUrl': 'https://meishi.meituan.com/i/?ci=70&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1', 'partner': 126, 'platform': 3, 'poi_attr_20033': '', 'poi_attr_20043': '', 'riskLevel': 1, 'sort': 'default', 'stationId': 0, 'uuid': '2e4de9f1ec744a9fb36b.1605626652.1.0.0', 'version': '8.2.0'} +proxies=[] +def get_proxies(proxyfile): + file=open('proxy.txt','r') + for line in file.readlines(): + proxies.append(line.strip()) + file.close() +def get_proxy(): + k=random.randint(0,len(proxies)-1) + proxy=proxies[k] + p={'http':'http://'+proxy,'https':'https://'+proxy} + return p +def plist(url,p): + + #根据链接读取美团移动端 + #url="https://meishi.meituan.com/i/api/channel/deal/list" + if(p==None): + resp=requests.post(url,headers=headers,data=data) + else: + resp=requests.post(url,headers=headers,data=data,proxies=p,timeout=timing) + try: + result=json.loads(resp.text)#字典,将字符串转化为字典 + except Exception as e: + print(result) + print("get shop list error",e) + return result + +def shopdata(shopurl): + #根据店铺链接读取单个店铺的详细信息 + headdata={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': '__mta=210687658.1605577786961.1605961346402.1605961415001.24; __mta=210687658.1605577786961.1605961276456.1605961301918.24; uuid=d8534775d38547fb847a.1605577694.1.0.0; _lxsdk_cuid=175d3e36e17c8-01633738a10df2-930346c-144000-175d3e36e17c8; iuuid=44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F; cityname=%E9%95%BF%E6%B2%99; _lxsdk=44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F; webp=1; _hc.v=8603bfe6-42d3-0a08-24bb-d4c8b9495adb.1605577787; _lx_utm=utm_source%3Dblog.csdn.net%26utm_medium%3Dreferral%26utm_content%3D%252Fxing851483876%252Farticle%252Fdetails%252F81842329; mtcdn=K; __utma=74597006.1199398655.1605577785.1605581851.1605702245.3; __utmz=74597006.1605702245.3.3.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; latlng=28.234696,113.007313,1605702248066; i_extend=C_b1Gimthomepagecategory11H__a; client-id=f25a6222-ad55-482e-a20f-2c54c8e25049; ci=70; meishi_ci=70; cityid=70; logan_session_token=yeo5wu3tkknro0rn94wr; _lxsdk_s=175eab20f38-3bc-24-ec8%7C%7C14', 'Host': 'meishi.meituan.com', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'} + loop=0 + while(loop<10): + try: + p=get_proxy() + resq=requests.get(shopurl,headers=headdata,proxies=p,timeout=timing) + #print(resq.text) + #resq.text是字符串格式 + + ##BeautifulSoup解析文本格式 + #soup=BeautifulSoup(resq.text,'lxml') + #shopfile=open('shop.txt','w',encoding='utf-8') + #soup.prettify()是字符串格式 + #shopfile.write(soup.prettify()) + #shopfile.close() + + ##xpath解析 + html=etree.HTML(resq.text) + datas=html.xpath('body/script[@crossorigin="anonymous"]') + information=[] + #print(datas) + for data in datas: + if(data.text!=None): + strs=data.text[:16] + if (strs == 'window._appState'):#查询储存信息的模块 + result = data.text[19:-1] + result = json.loads(result) + name = result['poiInfo']['name'] + addr = result['poiInfo']['addr'] + phone = result['poiInfo']['phone'] + aveprice = result['poiInfo']['avgPrice'] + opentime = result['poiInfo']['openInfo'] + opentime = opentime.replace('\n', ' ') + avescore = result['poiInfo']['avgScore'] + marknum = result['poiInfo']['MarkNumbers'] + img=result['poiInfo']['frontImg'] + lng = result['poiInfo']['lng'] + lat = result['poiInfo']['lat'] + information=[name,addr, opentime,phone, aveprice, avescore,img] + return information + except Exception as e: + #shopfile=open('shop.txt','w',encoding='utf-8') + #shopfile.write(resq.text) + #shopfile.close() + loop+=1 + if(loop==10): + + try: + resq=requests.get(shopurl,headers=headdata,timeout=timing) + html=etree.HTML(resq.text) + datas=html.xpath('body/script[@crossorigin="anonymous"]') + information=[] + #print(datas) + for data in datas: + if(data.text!=None): + strs=data.text[:16] + if (strs == 'window._appState'):#查询储存信息的模块 + result = data.text[19:-1] + result = json.loads(result) + name = result['poiInfo']['name'] + addr = result['poiInfo']['addr'] + phone = result['poiInfo']['phone'] + aveprice = result['poiInfo']['avgPrice'] + opentime = result['poiInfo']['openInfo'] + opentime = opentime.replace('\n', ' ') + avescore = result['poiInfo']['avgScore'] + marknum = result['poiInfo']['MarkNumbers'] + img=result['poiInfo']['frontImg'] + lng = result['poiInfo']['lng'] + lat = result['poiInfo']['lat'] + information=[name,addr, opentime,phone, aveprice, avescore,img] + return information + except: + print("crawer the detail of shop of %s"%shopurl) + return [] +def get_ALLareas(url): + loop=0 + head={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': '__mta=217497129.1605576012299.1606050406017.1606050988411.60; iuuid=61A29DA6726853A9CFAEA2909BFE7A3B399AD2E666AF89BEEE2D9E25D8151EA9; cityname=%E9%95%BF%E6%B2%99; _hc.v=543395d8-e443-3414-9405-504128fba3b1.1605576012; _lxsdk_cuid=175d3c9b0bdc8-0b981a1d414f9b-930346c-144000-175d3c9b0bdc8; _lxsdk=61A29DA6726853A9CFAEA2909BFE7A3B399AD2E666AF89BEEE2D9E25D8151EA9; _lx_utm=utm_source%3Dbing%26utm_medium%3Dorganic; rvct=70; client-id=cec617d4-b199-4a30-a095-457fa70e339d; uuid=dacd6749-edf1-446a-b70c-bc7871bd99e6; IJSESSIONID=1qgg0q4wqsn9b1kixeeefcho67; latlng=28.234713%2C113.007334%2C1606050355118; ci=70; meishi_ci=70; cityid=70; logan_session_token=3edh2pxhzgqdvgx8eq6w; _lxsdk_s=175f00f9b75-78f-829-745%7C%7C8', 'Host': 'meishi.meituan.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'} + while(loop<10): + try: + p=get_proxy() + r=requests.get(url,headers=head,proxies=p,timeout=timing) + html=etree.HTML(r.text) + datas=html.xpath('body/script[@crossorigin="anonymous"]') + for data in datas: + if(data.text!=None): + strs=data.text[:16] + if (strs == 'window._appState'): + result = data.text[19:-1] + result=json.loads(result) + areas=result['navBarData']['areaObj'] + return areas + except Exception as e: + loop+=1 + if(loop==10): + try: + r=requests.get(url,headers=head) + html=etree.HTML(r.text) + datas=html.xpath('body/script[@crossorigin="anonymous"]') + for data in datas: + if(data.text!=None): + strs=data.text[:16] + if (strs == 'window._appState'): + result = data.text[19:-1] + result=json.loads(result) + areas=result['navBarData']['areaObj'] + return areas + except: + print("crawer all areas error:",e) + return None +def shopwrite(url,shopfile): + loop=0 + while(loop<10): + try: + #将美团移动端数据写入文件 + result=plist(url) + #print(result) + totalcount=result['data']['poiList']['totalCount'] + #print(totalcount) + totalshop=result['data']['poiList']['poiInfos'] + break + except: + loop+=1 + file=open(shopfile,'w',encoding='utf-8') + for i in range(15): + shop_message=totalshop[i] + shop_url=index_url+'poi/'+shop_message['poiid']+'?ct_poi='+shop_message['ctPoi'] + eachshop='name:'+shop_message['name']+',url:'+shop_url+'\n' + file.write(eachshop) + file.close() + shopdata(shop_url) + +def mysqlstore(url,number,Id): + #将读取的信息存入mysql数据库 + print(data) + loop=0 + db=pymysql.connect(host='localhost',user='root',password='lkl15220319',port=3306) + cursor=db.cursor() + cursor.execute('use meituan;') + sql='create table if not exists allshops(id INT(11) primary key,name VARCHAR(250) NOT NULL,avgprice INT(10),avgscore FLOAT(2,1),img VARCHAR(360) NOT NULL,cateName VARCHAR(100) NOT NULL,url VARCHAR(250) NOT NULL);' + cursor.execute(sql) + while(loop<10): + try: + p=get_proxy() + result=plist(url,p) + totalcount=result['data']['poiList']['totalCount'] + totalshop=result['data']['poiList']['poiInfos'] + #print(result) + break + except: + loop+=1 + if(loop==10): + result=plist(url,None) + totalcount=result['data']['poiList']['totalCount'] + totalshop=result['data']['poiList']['poiInfos'] + for i in range(number): + Id+=1 + shop_message=totalshop[i] + cateName=shop_message['cateName'] + name=shop_message['name'] + img=shop_message['frontImg'] + avgprice=shop_message['avgPrice'] + avgscore=shop_message['avgScore'] + sql= 'INSERT INTO allshops(id,name,avgprice,avgscore,img,cateName,url) values(%s,%s,%s,%s,%s,%s,%s);' + shop_url=index_url+'poi/'+shop_message['poiid']+'?ct_poi='+shop_message['ctPoi'] + #information=shopdata(shop_url) + #print(Id,information,cateName) + try: + #print(type(Id),type(information[0]),type(information[1]),type(information[2]),type(information[3]),type(int(information[4])),type(int(float(information[5])*10)),type(information[6]),type(cateName)) + cursor.execute(sql,(Id,name,avgprice,avgscore,img,cateName,url)) + db.commit() + except Exception: + traceback.print_exc() + print('Fail,',Exception) + db.rollback() + return Id +url="http://meishi.meituan.com/i/api/channel/deal/list" +# shopfile="shopurl.txt" +# mysqlstore(url) +def get_alldetail(areas): + Id=0 + for areaid in areas.keys(): + regions=areas[areaid] + for region in regions: + #可以发现“全部”爬取的店铺在其他分类地区也有,其次全部上面所显示的数量与移动端上标注的数量不一致 + #而其他分类地区则表现一直,因此我们只爬取其他分类地区,跳过“全部” + if(region['id']!=int(areaid)): + print("正在爬取地区:%s"%region['name']) + offest=0 + data['areaId']=region['id'] + totalcount=region['count'] + while(totalcount>25): + data['offset']=offest + totalcount-=25 + offest+=25 + Id=mysqlstore(url,25,Id) + if(totalcount>0): + data['offest']=offest + Id=mysqlstore(url,totalcount,Id) + + +def main(): + get_proxies('./proxy.txt') + print(proxies) + areas={"149":[{"id":149,"name":"全部","regionName":"芙蓉区","count":2226},{"id":4822,"name":"马王堆","regionName":"马王堆","count":145},{"id":4821,"name":"德政园/杨家山","regionName":"德政园/杨家山","count":100},{"id":4823,"name":"五一广场","regionName":"五一广场","count":499},{"id":4824,"name":"袁家岭/火车站","regionName":"袁家岭/火车站","count":111},{"id":6020,"name":"晚报大道","regionName":"晚报大道","count":20},{"id":6026,"name":"湖南农业大学","regionName":"湖南农业大学","count":113},{"id":7161,"name":"八一路","regionName":"八一路","count":27},{"id":8051,"name":"人民中路/朝阳路/融圣国际","regionName":"人民中路/朝阳路/融圣国际","count":59},{"id":8052,"name":"司门口","regionName":"司门口","count":25},{"id":8062,"name":"黄兴路步行街","regionName":"黄兴路步行街","count":176},{"id":8064,"name":"汽车东站","regionName":"汽车东站","count":54},{"id":8067,"name":"五里牌","regionName":"五里牌","count":23},{"id":8072,"name":"芙蓉广场","regionName":"芙蓉广场","count":80},{"id":8074,"name":"韭菜园","regionName":"韭菜园","count":36},{"id":8077,"name":"省委","regionName":"省委","count":2},{"id":9026,"name":"古曲路/浏阳河婚庆文化园","regionName":"古曲路/浏阳河婚庆文化园","count":91},{"id":9244,"name":"定王台","regionName":"定王台","count":57},{"id":36483,"name":"万家丽国际MALL","regionName":"万家丽国际MALL","count":10},{"id":40199,"name":"万达公馆","regionName":"万达公馆","count":3},{"id":40938,"name":"旺旺医院/芙蓉区政府","regionName":"旺旺医院/芙蓉区政府","count":22},{"id":42376,"name":"万家丽北路","regionName":"万家丽北路","count":256},{"id":42909,"name":"梦泽园","regionName":"梦泽园","count":26},{"id":44332,"name":"火车站","regionName":"火车站","count":116},{"id":45708,"name":"浏阳河风光带","regionName":"浏阳河风光带","count":20}],"150":[{"id":150,"name":"全部","regionName":"开福区","count":1643},{"id":4825,"name":"四方坪","regionName":"四方坪","count":201},{"id":4827,"name":"伍家岭/开福寺","regionName":"伍家岭/开福寺","count":77},{"id":6019,"name":"中山亭/乐和城","regionName":"中山亭/乐和城","count":64},{"id":6021,"name":"松桂园","regionName":"松桂园","count":43},{"id":6022,"name":"湘江世纪城","regionName":"湘江世纪城","count":199},{"id":6023,"name":"湘雅附一医院","regionName":"湘雅附一医院","count":115},{"id":6024,"name":"烈士公园","regionName":"烈士公园","count":65},{"id":8060,"name":"世界之窗/国际会展中心","regionName":"世界之窗/国际会展中心","count":19},{"id":8070,"name":"月湖公园/长沙大学","regionName":"月湖公园/长沙大学","count":156},{"id":8073,"name":"八一桥","regionName":"八一桥","count":22},{"id":9016,"name":"开福万达广场","regionName":"开福万达广场","count":89},{"id":9087,"name":"珠江花城/万国城","regionName":"珠江花城/万国城","count":61},{"id":40190,"name":"山语城/金霞","regionName":"山语城/金霞","count":14},{"id":43258,"name":"北辰三角洲","regionName":"北辰三角洲","count":80},{"id":44617,"name":"兴汉门","regionName":"兴汉门","count":87}],"151":[{"id":151,"name":"全部","regionName":"天心区","count":2225},{"id":4828,"name":"天心阁/白沙井","regionName":"天心阁/白沙井","count":61},{"id":7162,"name":"坡子街","regionName":"坡子街","count":121},{"id":7163,"name":"解放西路","regionName":"解放西路","count":102},{"id":7165,"name":"新开铺","regionName":"新开铺","count":90},{"id":7166,"name":"新省政府","regionName":"新省政府","count":61},{"id":7167,"name":"南门口","regionName":"南门口","count":96},{"id":8055,"name":"贺龙体育场","regionName":"贺龙体育场","count":10},{"id":8056,"name":"黄土岭","regionName":"黄土岭","count":77},{"id":8058,"name":"识字岭","regionName":"识字岭","count":36},{"id":8065,"name":"劳动广场/书院路","regionName":"劳动广场/书院路","count":143},{"id":8068,"name":"摩天轮","regionName":"摩天轮","count":5},{"id":8071,"name":"人民西路口","regionName":"人民西路口","count":3},{"id":8075,"name":"浦沅","regionName":"浦沅","count":29},{"id":9017,"name":"铁道学院/林科大","regionName":"铁道学院/林科大","count":111},{"id":9020,"name":"天虹","regionName":"天虹","count":3},{"id":9031,"name":"友阿奥特莱斯","regionName":"友阿奥特莱斯","count":101},{"id":9176,"name":"侯家塘","regionName":"侯家塘","count":107},{"id":17001,"name":"暮云镇","regionName":"暮云镇","count":12},{"id":34832,"name":"中海环宇城","regionName":"中海环宇城","count":1},{"id":36481,"name":"悦方ID mall","regionName":"悦方ID mall","count":3},{"id":36738,"name":"云塘理工/环保科技园","regionName":"云塘理工/环保科技园","count":109},{"id":40141,"name":"药王街","regionName":"药王街","count":1},{"id":40142,"name":"太平街","regionName":"太平街","count":31},{"id":42529,"name":"芙蓉南路沿线","regionName":"芙蓉南路沿线","count":553}],"152":[{"id":152,"name":"全部","regionName":"雨花区","count":2451},{"id":4832,"name":"雨花亭","regionName":"雨花亭","count":185},{"id":4830,"name":"桂花路/车站南路","regionName":"桂花路/车站南路","count":59},{"id":4829,"name":"东塘","regionName":"东塘","count":106},{"id":7164,"name":"桂花路/左家塘","regionName":"桂花路/左家塘","count":81},{"id":7168,"name":"红星国际会展中心","regionName":"红星国际会展中心","count":131},{"id":7169,"name":"高桥","regionName":"高桥","count":211},{"id":8063,"name":"汽车南站","regionName":"汽车南站","count":63},{"id":8066,"name":"武广高铁","regionName":"武广高铁","count":85},{"id":8069,"name":"体育新城","regionName":"体育新城","count":171},{"id":8076,"name":"窑岭/长岭","regionName":"窑岭/长岭","count":101},{"id":9025,"name":"喜盈门/喜乐地","regionName":"喜盈门/喜乐地","count":124},{"id":9028,"name":"梓园路/省儿童医院","regionName":"梓园路/省儿童医院","count":37},{"id":9029,"name":"民政学院/香樟路","regionName":"民政学院/香樟路","count":126},{"id":9030,"name":"井湾子","regionName":"井湾子","count":55},{"id":9560,"name":"树木岭","regionName":"树木岭","count":128},{"id":14689,"name":"天际岭","regionName":"天际岭","count":61},{"id":14699,"name":"环保学院/科技职院","regionName":"环保学院/科技职院","count":49},{"id":19726,"name":"运达中央广场","regionName":"运达中央广场","count":33},{"id":25094,"name":"德思勤城市广场","regionName":"德思勤城市广场","count":25},{"id":34708,"name":"人民东路","regionName":"人民东路","count":1},{"id":36482,"name":"保利国际MALL","regionName":"保利国际MALL","count":""},{"id":45727,"name":"狮子山","regionName":"狮子山","count":3},{"id":46302,"name":"井圭路","regionName":"井圭路","count":32}],"153":[{"id":153,"name":"全部","regionName":"岳麓区","count":3310},{"id":6027,"name":"溁湾镇/新外滩","regionName":"溁湾镇/新外滩","count":61},{"id":6028,"name":"河西大学城","regionName":"河西大学城","count":561},{"id":6029,"name":"汽车西站/达美D6区","regionName":"汽车西站/达美D6区","count":197},{"id":7007,"name":"橘子洲公园","regionName":"橘子洲公园","count":9},{"id":7534,"name":"咸嘉新村/商学院","regionName":"咸嘉新村/商学院","count":42},{"id":7723,"name":"市政府","regionName":"市政府","count":39},{"id":7776,"name":"奥克斯广场","regionName":"奥克斯广场","count":3},{"id":8059,"name":"麓谷","regionName":"麓谷","count":143},{"id":8061,"name":"观沙岭/茶子山","regionName":"观沙岭/茶子山","count":167},{"id":8953,"name":"含浦","regionName":"含浦","count":293},{"id":9015,"name":"涉外经济学院","regionName":"涉外经济学院","count":384},{"id":37674,"name":"梅溪湖","regionName":"梅溪湖","count":175},{"id":37675,"name":"青山镇/莱茵城","regionName":"青山镇/莱茵城","count":60},{"id":37700,"name":"财经学院/西湖公园","regionName":"财经学院/西湖公园","count":12},{"id":37701,"name":"渔人码头","regionName":"渔人码头","count":1},{"id":37702,"name":"润泽园","regionName":"润泽园","count":1},{"id":37703,"name":"银盆南路","regionName":"银盆南路","count":1},{"id":37704,"name":"金茂览秀城","regionName":"金茂览秀城","count":1},{"id":37725,"name":"步步高梅溪新天地","regionName":"步步高梅溪新天地","count":45},{"id":37726,"name":"河西王府井/步步高","regionName":"河西王府井/步步高","count":7},{"id":38365,"name":"湘雅附三/肿瘤医院","regionName":"湘雅附三/肿瘤医院","count":4},{"id":40547,"name":"王家湾","regionName":"王家湾","count":15},{"id":42374,"name":"桐梓坡","regionName":"桐梓坡","count":243},{"id":43783,"name":"新民路","regionName":"新民路","count":11},{"id":44333,"name":"罗家嘴/阳光100","regionName":"罗家嘴/阳光100","count":128},{"id":45015,"name":"银盆岭","regionName":"银盆岭","count":613},{"id":45159,"name":"咸嘉湖","regionName":"咸嘉湖","count":23}],"3491":[{"id":3491,"name":"全部","regionName":"浏阳市","count":244},{"id":9525,"name":"永安镇","regionName":"永安镇","count":4},{"id":15561,"name":"浏阳工业园","regionName":"浏阳工业园","count":10},{"id":15566,"name":"天马路/将军路","regionName":"天马路/将军路","count":20},{"id":15569,"name":"行政中心","regionName":"行政中心","count":36},{"id":15570,"name":"淮川街道","regionName":"淮川街道","count":31},{"id":15571,"name":"集里街道","regionName":"集里街道","count":16},{"id":15954,"name":"大瑶镇","regionName":"大瑶镇","count":2},{"id":18488,"name":"洞阳镇","regionName":"洞阳镇","count":21},{"id":39702,"name":"汽车西站","regionName":"汽车西站","count":""},{"id":39704,"name":"观礼台","regionName":"观礼台","count":""},{"id":40763,"name":"集里医院","regionName":"集里医院","count":3},{"id":41215,"name":"开心广场","regionName":"开心广场","count":""},{"id":42796,"name":"百姓广场","regionName":"百姓广场","count":4},{"id":43385,"name":"体育中心","regionName":"体育中心","count":1},{"id":43671,"name":"大瑶通程广场","regionName":"大瑶通程广场","count":1},{"id":43812,"name":"永安大道","regionName":"永安大道","count":10},{"id":46005,"name":"美神广场","regionName":"美神广场","count":3},{"id":46407,"name":"步行街","regionName":"步行街","count":26}],"3493":[{"id":3493,"name":"全部","regionName":"望城区","count":596},{"id":20717,"name":"望城步行街","regionName":"望城步行街","count":27},{"id":20723,"name":"太平洋服饰广场","regionName":"太平洋服饰广场","count":4},{"id":36943,"name":"正荣财富中心","regionName":"正荣财富中心","count":28},{"id":37381,"name":"长沙医学院","regionName":"长沙医学院","count":77},{"id":37705,"name":"月亮岛","regionName":"月亮岛","count":237},{"id":37706,"name":"郭亮中路","regionName":"郭亮中路","count":10},{"id":37707,"name":"正荣广场","regionName":"正荣广场","count":21},{"id":38360,"name":"雷锋镇","regionName":"雷锋镇","count":34},{"id":38995,"name":"滨水新城","regionName":"滨水新城","count":8}],"3494":[{"id":3494,"name":"全部","regionName":"宁乡县","count":278},{"id":12349,"name":"沿江风光带外滩","regionName":"沿江风光带外滩","count":1},{"id":12350,"name":"春城万象/步行街","regionName":"春城万象/步行街","count":55},{"id":12351,"name":"南站","regionName":"南站","count":7},{"id":12352,"name":"东站","regionName":"东站","count":20},{"id":12353,"name":"人人乐","regionName":"人人乐","count":27},{"id":12422,"name":"灰汤温泉镇","regionName":"灰汤温泉镇","count":19},{"id":12651,"name":"一环北路","regionName":"一环北路","count":24},{"id":17337,"name":"豪德","regionName":"豪德","count":9},{"id":17339,"name":"金洲大道/一中","regionName":"金洲大道/一中","count":6},{"id":17341,"name":"大润发","regionName":"大润发","count":8},{"id":26795,"name":"玉潭镇","regionName":"玉潭镇","count":1},{"id":37540,"name":"黄材旅游区","regionName":"黄材旅游区","count":6},{"id":40304,"name":"沿江风光带","regionName":"沿江风光带","count":3},{"id":41189,"name":"宁乡大道","regionName":"宁乡大道","count":4},{"id":42271,"name":"金洲大道","regionName":"金洲大道","count":12}],"5894":[{"id":5894,"name":"全部","regionName":"长沙县","count":1355},{"id":9035,"name":"大众传媒","regionName":"大众传媒","count":49},{"id":9036,"name":"泉塘/星城国际","regionName":"泉塘/星城国际","count":234},{"id":9037,"name":"筑梦园/中南汽车世界","regionName":"筑梦园/中南汽车世界","count":53},{"id":9038,"name":"星沙一桥","regionName":"星沙一桥","count":152},{"id":9040,"name":"星沙通程广场","regionName":"星沙通程广场","count":40},{"id":10429,"name":"黄花镇/黄花机场","regionName":"黄花镇/黄花机场","count":65},{"id":10430,"name":"城西安置小区","regionName":"城西安置小区","count":90},{"id":17002,"name":"榔梨镇","regionName":"榔梨镇","count":56},{"id":36479,"name":"黄兴镇","regionName":"黄兴镇","count":27},{"id":36480,"name":"华润万象汇","regionName":"华润万象汇","count":25},{"id":40181,"name":"长沙经济技术开发区","regionName":"长沙经济技术开发区","count":412},{"id":45590,"name":"卜蜂莲花星沙店","regionName":"卜蜂莲花星沙店","count":52}]} + #areas=get_ALLareas('http://meishi.meituan.com/i/?ci=70&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1') + if(areas==None): + print("get areas error") + return None + get_alldetail(areas) +main() \ No newline at end of file diff --git a/src/webspider/proxy.txt b/src/webspider/proxy.txt new file mode 100644 index 0000000..3acaedb --- /dev/null +++ b/src/webspider/proxy.txt @@ -0,0 +1,10 @@ +118.212.107.154:9999 +175.43.58.35:9999 +218.66.253.146:8800 +115.209.125.144:3000 +222.94.196.39:3128 +106.14.214.136:3128 +49.75.59.242:3128 +171.35.215.2:9999 +113.195.153.46:9999 +113.121.39.225:9999 diff --git a/src/webspider/shop.txt b/src/webspider/shop.txt new file mode 100644 index 0000000..5f63f87 --- /dev/null +++ b/src/webspider/shop.txt @@ -0,0 +1,485 @@ + + +
+ + ++ 䬺小二北京涮羊肉(湘春路店) +
++ + + + +
++ 推荐菜 +
++ 商家概述 +
+IPå°å | +ç«¯å£ | +å¿åç级 | +代çç±»å | +ååºæ¶é´ | +å°çä½ç½® | +æè¿éªè¯æ¶é´ | +
---|---|---|---|---|---|---|
113.195.153.16 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.30 ç§ | +æ±è¥¿çæå·å¸é»å·å¿ ? | +1å°æ¶30åå | +
115.53.33.15 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.20 ç§ | +æ²³åçæ¿®é³å¸ èé | +1å°æ¶15åå | +
125.123.153.222 | +3000 | +é«å¿ | +HTTP,HTTPS | +0.11 ç§ | +æµæ±çåå ´å¸ååå¿ çµä¿¡ | +1å°æ¶34åå | +
123.131.201.167 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.12 ç§ | +å±±ä¸ç临æ²å¸ èé | +1å°æ¶27åå | +
171.35.214.10 | +9999 | +é«å¿ | +HTTP | +13.50 ç§ | +æ±è¥¿çèä¹¡å¸ èé | +1å°æ¶29åå | +
125.123.157.25 | +3000 | +é«å¿ | +HTTP,HTTPS | +9.11 ç§ | +æµæ±çåå ´å¸ååå¿ çµä¿¡ | +1å°æ¶35åå | +
220.249.149.221 | +9999 | +é«å¿ | +HTTP | +12.64 ç§ | +ç¦å»ºçåå¹³å¸ èé | +58å38ç§å | +
175.42.158.26 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.28 ç§ | +ç¦å»ºçèç°å¸ èé | +58å11ç§å | +
113.194.49.197 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.69 ç§ | +æ±è¥¿çæå·å¸ èé | +1å°æ¶30åå | +
120.83.108.23 | +9999 | +é«å¿ | +HTTP,HTTPS | +0.44 ç§ | +广ä¸çæé³å¸æ®å®å¸ èé | +1å°æ¶3åå | +
1å·å®¢æ
+ +2å·å®¢æ
+TOP
+