利用爬虫成功爬取美团店铺信息

master
liukunlin123 4 years ago
parent ac020f2430
commit 37261c2457

@ -0,0 +1,6 @@
# 爬虫文件
**meituan.py**:爬取美团数据的文件,里面爬取了店铺名字、评分、地址等信息,修改数据库参数后直接就可以运行 \
**stringTodict.py**:将字符串转化为字典的文件,例如"{key1:value1,key2:value2}"转化为dict{key1:value1,key2:value2}和json.load不一样改data或者headers的一个辅助文件 \
**shop.txt**存储店铺html的文件 \
**shopurl.txt**存储店铺详情页的url文件 \
**string.txt**搭配stringTodict.py使用将需要转化的字符串写在里面

File diff suppressed because one or more lines are too long

@ -0,0 +1,10 @@
118.212.107.154:9999
175.43.58.35:9999
218.66.253.146:8800
115.209.125.144:3000
222.94.196.39:3128
106.14.214.136:3128
49.75.59.242:3128
171.35.215.2:9999
113.195.153.46:9999
113.121.39.225:9999

@ -0,0 +1,485 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<meta content="meituan.com" name="for"/>
<title>
商家详情
</title>
<meta content="meishi" name="lx:category"/>
<meta content="imt" name="lx:appnm"/>
<meta content="c_6NCia" name="lx:cid"/>
<meta content="off" name="lx:autopv"/>
<link href="//analytics.meituan.net" rel="dns-prefetch"/>
<link href="/i/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
<script>
"use strict";!function(){var e=0<arguments.length&&void 0!==arguments[0]?arguments[0]:"_Owl_",a=window;a[e]||(a[e]={isRunning:!1,isReady:!1,preTasks:[],dataSet:[],pageData:[],disableMutaObserver:!1,observer:null,use:function(e,t){this.isReady&&a.Owl&&a.Owl[e](t),this.preTasks.push({api:e,data:[t]})},add:function(e){this.dataSet.push(e)},run:function(){var t=this;if(!this.isRunning){this.isRunning=!0;var e=a.onerror;a.onerror=function(){this.isReady||this.add({type:"jsError",data:arguments}),e&&e.apply(a,arguments)}.bind(this),(a.addEventListener||a.attachEvent)("error",function(e){t.isReady||t.add({type:"resError",data:[e]})},!0);var i=window.MutationObserver||window.WebKitMutationObserver||window.MozMutationObserver,r=window.performance||window.WebKitPerformance;if(i&&r){var n=-1,s=window.navigator.userAgent;if(-1<s.indexOf("compatible")&&-1<s.indexOf("MSIE")?(new RegExp("MSIE (\d+\.\d+);").test(s),n=parseFloat(RegExp.$1)):-1<s.indexOf("Trident")&&-1<s.indexOf("rv:11.0")&&(n=11),-1!==n&&n<=11)return void(this.disableMutaObserver=!0);try{this.observer=new i(function(e){t.pageData.push({mutations:e,startTime:r.now()})}),this.observer.observe(document,{childList:!0,subtree:!0})}catch(e){console.log("mutationObserver err")}}else this.disableMutaObserver=!0}}},a[e].run())}();
</script>
<script>
(function() { var ua = window && window.navigator && window.navigator.userAgent; if (!ua) return; var text, scale = 1.0; window.ratio = 1; if (/(ipad|iphone|ipod)/i.exec(ua)) { if (window.devicePixelRatio >= 2 && !false && window.__wxjs_environment !== 'miniprogram') { scale *= 0.5; window.ratio *= 2; } } text = '<meta name="viewport" content="initial-scale=' + scale + ',' + 'maximum-scale=' + scale +', minimum-scale=' + scale + ',' + 'width=device-width, height=device-height, user-scalable=no" />'; document.write(text); if (window.ratio) { document.documentElement.style.fontSize = 50*window.ratio + "px"; function CalcRem(){ if(false){ var defaultWidth = window.ratio === 2 ? 750 : 375; var docEl = document.documentElement; var winWidth = docEl.clientWidth || defaultWidth; if(winWidth <= 414 * window.ratio){ document.documentElement.style.fontSize = (winWidth / defaultWidth) * (50* (window.ratio || 1)) + 'px'; } } } CalcRem(); window.addEventListener('resize', CalcRem, false); document.documentElement.classList.add('ratio--' + window.ratio) } })();
</script>
<script>
!(function (win, doc, ns) {
var cacheFunName = '_MeiTuanALogObject';
win[cacheFunName] = ns;
if (!win[ns]) {
var _LX = function () {
_LX.q.push(arguments);
return _LX;
};
_LX.q = _LX.q || [];
_LX.l = +new Date();
win[ns] = _LX;
}
})(window, document, 'LXAnalytics');
;(function(){
var ua = window && window.navigator && window.navigator.userAgent;
var medium = 'Android';
if (ua && /(ipad|iphone|ipod)/i.exec(ua)) {
medium = 'iPhone';
}
var environment = {
uuid: '44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F',
userid: '0',
os: medium,
cityid: "70"
}
var valLab = {"custom":{"source":"undefined"}} || null;
LXAnalytics('pageView', valLab, environment, 'c_6NCia');
})();
</script>
<script>
!function(e,t,n){function s(){var e=t.createElement("script");e.async=!0,e.src="https://s1.meituan.net/bs/js/?f=mta-js:mta.min.js";var n=t.getElementsByTagName("script")[0];n.parentNode.insertBefore(e,n)}if(e.MeituanAnalyticsObject=n,e[n]=e[n]||function(){(e[n].q=e[n].q||[]).push(arguments)},"complete"===t.readyState)s();else{var r="addEventListener",i="attachEvent";if(e[r])e[r]("load",s,!1);else if(e[i])e[i]("onload",s);else{var a=e.onload;e.onload=function(){s(),a&&a()}}}}(window,document,"mta"),function(e,t,n){if(t&&!("_mta"in t)){t._mta=!0;var s=e.location.protocol;if("file:"!==s){var r=e.location.host,i=t.prototype.open;t.prototype.open=function(t,n,a,o,h){if(this._method="string"==typeof t?t.toUpperCase():null,n){if(0===n.indexOf("http://")||0===n.indexOf("https://")||0===n.indexOf("//"))this._url=n;else if(0===n.indexOf("/"))this._url=s+"//"+r+n;else{var l=s+"//"+r+e.location.pathname;l=l.substring(0,l.lastIndexOf("/")+1),this._url=l+n}var u=this._url.indexOf("?");-1!==u?(this._searchLength=this._url.length-1-u,this._url=this._url.substring(0,u)):this._searchLength=0}else this._url=null,this._searchLength=0;return this._startTime=(new Date).getTime(),i.apply(this,arguments)};var a="onreadystatechange",o="addEventListener",h=t.prototype.send;t.prototype.send=function(t){function n(n,r){if(0!==n._url.indexOf(s+"//frep.meituan.net/_.gif")){for(var i="browser.ajax",a=[98,114,111,119,115,101,114,46,97,106,97,120],o=0,h=i.length;h>o;o++)if(i.charCodeAt(o)!==a[o])return;var l;if(n.response)switch(n.responseType){case"json":l=JSON&&JSON.stringify(n.response).length;break;case"blob":case"moz-blob":l=n.response.size;break;case"arraybuffer":l=n.response.byteLength;case"document":l=n.response.documentElement&&n.response.documentElement.innerHTML&&n.response.documentElement.innerHTML.length+28;break;default:l=n.response.length}e.mta("send",i,{url:n._url,method:n._method,error:!(0===n.status.toString().indexOf("2")||304===n.status),responseTime:(new Date).getTime()-n._startTime,requestSize:n._searchLength+(t?t.length:0),responseSize:l||0})}}if(o in this){var r=function(e){n(this,e)};this[o]("load",r),this[o]("error",r),this[o]("abort",r)}else{var i=this[a];this[a]=function(t){i&&i.apply(this,arguments),4===this.readyState&&e.mta&&n(this,t)}}return h.apply(this,arguments)}}}}(window,window.XMLHttpRequest,"mta");
mta("create","57eccef521215c6492427c9d");
mta("config", "beaconImage", "https://frep.meituan.com/_.gif");
mta("send","page");
</script>
<link href="//s1.meituan.net/bs/cssm?f=meis/meishi.mobile:page/poi/detail/index.css@da8f38f" rel="stylesheet" type="text/css"/>
</head>
<body>
<div id="app">
<div class="main-content" data-react-checksum="899206285" data-reactid="1" data-reactroot="">
<header class="navbar" data-reactid="2">
<div class="nav-wrap-left" data-reactid="3">
<a class="react back" data-reactid="4" href="#">
<i class="text-icon icon-back" data-reactid="5">
</i>
</a>
</div>
<div class="nav-header" data-reactid="6">
商家详情
</div>
<div class="nav-wrap-right" data-reactid="7">
<a class="" data-reactid="8" href="#">
<span class="nav-btn" data-reactid="9">
<i class="text-icon icon-sp icon-collect" data-reactid="10">
</i>
<!-- react-text: 11 -->
收藏
<!-- /react-text -->
</span>
</a>
<a class="" data-reactid="12" href="#">
<span class="nav-btn" data-reactid="13">
<i class="text-icon icon-sp icon-menu" data-reactid="14">
</i>
<!-- react-text: 15 -->
导航
<!-- /react-text -->
</span>
</a>
</div>
<div class="nav-dropdown" data-reactid="16">
<ul data-reactid="17">
<li data-reactid="18">
<a class="" data-reactid="19" href="https://i.meituan.com/">
<i class="text-icon icon-sp icon-home" data-reactid="20">
</i>
<!-- react-text: 21 -->
首页
<!-- /react-text -->
</a>
</li>
<li data-reactid="22">
<a class="" data-reactid="23" href="https://i.meituan.com/account/">
<i class="text-icon icon-sp icon-person-34" data-reactid="24">
</i>
<!-- react-text: 25 -->
我的
<!-- /react-text -->
</a>
</li>
</ul>
</div>
</header>
<div class="content-wrapper" data-reactid="26">
<div class="banner-download" data-reactid="27">
<div class="imgbox" data-reactid="28" style="height:NaNrem;width:100%;">
<div class="lazyload-placeholder" data-reactid="29" style="height:0;">
</div>
</div>
<a class="call-app-btn btn-invisible" data-reactid="30" href="#">
</a>
<a class="download-btn btn-invisible" data-reactid="31" href="#">
</a>
</div>
<div class="poi-detail-wrap" data-reactid="32">
<div class="block top" data-reactid="33">
<div class="pic-wrap" data-reactid="34">
<div class="pic-area" data-reactid="35">
<div class="imgbox" data-reactid="36" style="height:3.20rem;width:100%;">
<div class="lazyload-placeholder" data-reactid="37" style="height:0;">
</div>
</div>
</div>
<div class="poi-album-tip" data-reactid="38">
<a class="react" data-reactid="39" href="/i/poi/album/67667865">
<div class="album-thumb-wrapper" data-reactid="40">
<i class="icon-sp icon-album img-tip" data-reactid="41">
</i>
</div>
</a>
</div>
</div>
<div class="poiinfo-wrap" data-reactid="42">
<div class="name" data-reactid="43">
<p class="poi-brand" data-reactid="44">
䬺小二北京涮羊肉(湘春路店)
</p>
<div class="poi-score" data-reactid="45">
<span class="score" data-reactid="46">
<span class="stars" data-reactid="47">
<i class="icon icon-star-orange-lg" data-reactid="48" style="width:0.28rem;height:0.28rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="49" style="width:0.28rem;height:0.28rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="50" style="width:0.28rem;height:0.28rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="51" style="width:0.28rem;height:0.28rem;">
</i>
<i class="icon icon-star-orange-half-lg" data-reactid="52" style="width:0.28rem;height:0.28rem;">
</i>
<em class="star-text" data-reactid="53">
4.4
</em>
</span>
</span>
<span class="avg-price" data-reactid="54">
<!-- react-text: 55 -->
人均:¥
<!-- /react-text -->
<!-- react-text: 56 -->
63
<!-- /react-text -->
</span>
</div>
</div>
<div class="addr" data-reactid="57">
<a class="react" data-reactid="58" href="https://apis.map.qq.com/tools/poimarker?type=0&amp;marker=coord:28.209151,112.980104;title:䬺小二北京涮羊肉(湘春路店);addr:开福区湘春路125号&amp;referer=meituan&amp;key=YBDBZ-HVPKW-GQ6RD-ROY6L-4YYWF-EHB3C">
<i class="icon-sp icon-location" data-reactid="59">
</i>
<div class="poi-address" data-reactid="60">
开福区湘春路125号
</div>
</a>
<p class="phone" data-reactid="61">
<a class="poi-phone" data-reactid="62" href="javascript:;">
<i class="icon-sp icon-tel" data-reactid="63">
</i>
</a>
</p>
</div>
<div class="" data-reactid="64" style="display:none;">
<div class="meishi-msg-bg" data-reactid="65">
</div>
<div class="msg-option" data-reactid="66" id="msg">
<div class="msg-bd" data-reactid="67">
拨打电话
</div>
<div class="msg-option-btns" data-reactid="68">
</div>
<button class="btn msg-btn-cancel" data-reactid="69" type="button">
取消
</button>
</div>
</div>
</div>
</div>
<div class="block app-bar" data-reactid="70" style="display:none;">
<a class="react app-link" data-reactid="71" href="https://i.meituan.com/client/">
<div class="more" data-reactid="72">
<span data-reactid="73">
App专享
</span>
<span class="more-after" data-reactid="74">
美团客户端
</span>
</div>
</a>
<ul class="app-activity" data-reactid="75">
<!-- react-text: 76 -->
<!-- /react-text -->
<!-- react-text: 77 -->
<!-- /react-text -->
</ul>
</div>
<div data-reactid="78">
</div>
<div class="block" data-reactid="79" style="display:none;">
<p class="block-title mag-left" data-reactid="80">
推荐菜
</p>
<div class="recommond-wrapper" data-reactid="81">
</div>
</div>
<div class="block all-comment" data-reactid="82">
<a class="block-title react mag-left" data-reactid="83" href="https://i.meituan.com/poi/67667865/feedbacks">
<span class="stars" data-reactid="84">
<i class="icon icon-star-orange-lg" data-reactid="85" style="width:0.34rem;height:0.34rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="86" style="width:0.34rem;height:0.34rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="87" style="width:0.34rem;height:0.34rem;">
</i>
<i class="icon icon-star-orange-lg" data-reactid="88" style="width:0.34rem;height:0.34rem;">
</i>
<i class="icon icon-star-orange-half-lg" data-reactid="89" style="width:0.34rem;height:0.34rem;">
</i>
<em class="star-text" data-reactid="90">
4.4
</em>
</span>
<span class="pull-right" data-reactid="91">
<!-- react-text: 92 -->
3333
<!-- /react-text -->
<!-- react-text: 93 -->
条评价
<!-- /react-text -->
</span>
</a>
<!-- react-text: 94 -->
<!-- /react-text -->
<div class="comment-wrapper mag-left" data-reactid="95">
</div>
<div class="buy-comments" data-reactid="96">
<a class="react pad-left" data-reactid="97" href="https://i.meituan.com/poi/67667865/feedbacks">
<div class="more" data-reactid="98">
<!-- react-text: 99 -->
查看全部
<!-- /react-text -->
<!-- react-text: 100 -->
3333
<!-- /react-text -->
<!-- react-text: 101 -->
条评价
<!-- /react-text -->
</div>
</a>
</div>
</div>
<div class="block pad-left" data-reactid="102" style="display:block;">
<p class="block-title" data-reactid="103">
商家概述
</p>
<div class="row" data-reactid="104">
<span class="col-left" data-reactid="105">
WIFI
</span>
<span class="col-right" data-reactid="106">
不支持WIFI
</span>
</div>
<div class="row" data-reactid="107">
<span class="col-left" data-reactid="108">
营业时间
</span>
<span class="col-right" data-reactid="109">
周一至周日
10:00-02:00
</span>
</div>
<!-- react-text: 110 -->
<!-- /react-text -->
</div>
<div class="lazyload-placeholder" data-reactid="111">
</div>
<div class="nav-bread" data-reactid="112">
<!-- react-text: 113 -->
当前位置:
<!-- /react-text -->
<a class="" data-reactid="114" href="#">
<!-- react-text: 115 -->
长沙
<!-- /react-text -->
<!-- react-text: 116 -->
团购
<!-- /react-text -->
</a>
<span data-reactid="117">
&gt;
</span>
<a class="" data-reactid="118" href="#">
<!-- react-text: 119 -->
䬺小二北京涮羊肉(湘春路店)
<!-- /react-text -->
<!-- react-text: 120 -->
团购
<!-- /react-text -->
</a>
<!-- react-text: 121 -->
<!-- /react-text -->
</div>
</div>
</div>
<a class="top-btn" data-reactid="122" href="javascript:;">
<span class="icon icon-top" data-reactid="123">
</span>
</a>
<div class="footer-container" data-reactid="124">
<div class="footer-bar" data-reactid="125">
<div class="pull-right" data-reactid="126">
<span data-reactid="127">
城市:
</span>
<a class="btn btn-weak footer-city-btn" data-reactid="128" href="https://i.meituan.com/index/changecity/">
长沙
</a>
</div>
<div class="login-container" data-reactid="129">
<a class="btn btn-weak" data-reactid="130" href="https://i.meituan.com/account/login">
登录
</a>
<a class="btn btn-weak" data-reactid="131" href="https://i.meituan.com/account/signup">
注册
</a>
</div>
</div>
<div class="footer-nav" data-reactid="132">
<ul data-reactid="133">
<li data-reactid="134">
<a class="" data-reactid="135" href="https://i.meituan.com/">
首页
</a>
</li>
<li data-reactid="136">
<a class="" data-reactid="137" href="https://i.meituan.com/orders/all">
订单
</a>
</li>
<li data-reactid="138">
<a class="" data-reactid="139" href="https://i.meituan.com/client">
客户端
</a>
</li>
<li data-reactid="140">
<a class="" data-reactid="141" href="https://www.meituan.com?pcstyle=1">
电脑版
</a>
</li>
<li data-reactid="142">
<a class="" data-reactid="143" href="https://i.meituan.com/help/">
帮助
</a>
</li>
</ul>
</div>
<div class="footer-links" data-reactid="144">
<!-- react-text: 145 -->
友情链接:
<!-- /react-text -->
<a class="" data-reactid="146" href="https://m.maoyan.com/?channel=touch_group">
猫眼电影
</a>
</div>
<div class="footer-copy-right" data-reactid="147">
<div class="hr" data-reactid="148">
</div>
<span class="footer-copy-right-text" data-reactid="149">
<!-- react-text: 150 -->
©2016 美团网
<!-- /react-text -->
<!-- react-text: 151 -->
<!-- /react-text -->
<a class="" data-reactid="152" href="http://www.miibeian.gov.cn/">
京ICP证070791号
</a>
</span>
</div>
</div>
<div class="meishi-msg-toast" data-reactid="153" style="display:none;">
<div class="meishi-msg-toast-content" data-reactid="154">
</div>
</div>
<div class="" data-reactid="155" style="display:none;">
<div class="meishi-msg-bg" data-reactid="156">
</div>
<div class="meishi-msg-doc" data-reactid="157">
<!-- react-text: 158 -->
<!-- /react-text -->
<div class="meishi-msg-bd" data-reactid="159">
</div>
<div class="meishi-msg-ft" data-reactid="160">
<a class="meishi-msg-btn meishi-msg-btn-cancel" data-reactid="161" href="javascript:;">
取消
</a>
<a class="meishi-msg-btn meishi-msg-btn-ok" data-reactid="162" href="javascript:;">
确定
</a>
</div>
</div>
</div>
<!-- react-text: 163 -->
<!-- /react-text -->
</div>
</div>
<script>
window.domLoadedTime = new Date().valueOf();
</script>
<script crossorigin="anonymous" src="//www.dpfile.com/app/owl/static/owl_latest.js">
</script>
<script>
Owl.start({ devMode: false, project: 'com.sankuai.meishi.fe.i', onErrorPush: function(error) {
// 将一部分级别为 error 的错误降级为 warn
if (error.sec_category === "unhandledrejection" || error.sec_category === "Cannot redefine property: platform" || error.sec_category === "Cannot read property 'trigger' of undefined") {
error.level = 'warn';
}
return error;
}, pageUrl: window.location.href, page: { sample: 1, sensoryIndex: true, disableSensoryImageIndex: true, interactToStopObserver: true, getFirstScreenTime90: true, auto: true }, metric: { sample: 1, combo: false }, resource: { sampleApi: 1 }, logan: { enable: true } });
</script>
<script>
</script>
<script crossorigin="anonymous" src="//s1.meituan.net/bs/jsm?f=meis/meishi.mobile:vendor/core.min.js,vendor/fastclick.js@da8f38f">
</script>
<script crossorigin="anonymous" src="//awp-assets.meituan.net/hfe/hfe-assets/ravenjs/1.3.0/raven.min.js">
</script>
<script crossorigin="anonymous" src="//s1.meituan.net/bs/knb/v1.6.5/knb.js">
</script>
<script crossorigin="anonymous">
</script>
<script crossorigin="anonymous" src="//s1.meituan.net/bs/jsm?f=meis/meishi.mobile:page/common.js@da8f38f">
</script>
<script crossorigin="anonymous">
window.appConfig={"buildservice":{"repo":"meis/meishi.mobile","host":"s1.meituan.net/bs","hash":"da8f38f","folder":"static"},"isDebug":false,"isOnline":true}
</script>
<script crossorigin="anonymous">
window._appState = {"$meta":{"knbJS":"//s1.meituan.net/bs/knb/v1.6.5/knb.js","adunionJS":"//h5.dianping.com/app/adu-track/adunion-track.js","catJs":"//www.dpfile.com/app/owl/static/owl_latest.js","catDevMode":false,"uuid":"44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F","iuuid":"44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F","userId":"0","cityId":"70","userName":"","scene":"i","cityName":"长沙","id":"67667865","showAppDownload":false,"appDownloadBannerConfig":{"isAndroid":false,"isIPhone":false,"isWx":false,"isInApp":false,"isOpenApp":true,"isShow":true,"requestUrl":"https://i.meituan.com/poi/category/1/banner?userId=0&cityId=70&utm_source=","openAppUrl":"https://m.dianping.com/cube/evoke/meituan.html?url=imeituan%3A%2F%2Fwww.meituan.com%2Fmerchant%3Fchannel%3Dfood%26id%3D67667865&web=https%3A%2F%2Fmeishi.meituan.com%2Fi%2Fpoi%2F67667865%3Fct_poi%3D039983686503578720714434927158109738584_a67667865_c3_e6115592339103147033","appUrl":"imeituan://www.meituan.com/merchant?channel=food&id=67667865","doNotOpenApp":false},"appDownloadConfig":{"requestUrl":"https://i.meituan.com/poi/category/1/banner?userId=0&cityId=70&utm_source=","openAppUrl":"https://m.dianping.com/cube/evoke/meituan.html?url=imeituan%3A%2F%2Fwww.meituan.com%2Fmerchant%3Fchannel%3Dfood%26id%3D67667865&web=https%3A%2F%2Fmeishi.meituan.com%2Fi%2Fpoi%2F67667865%3Fct_poi%3D039983686503578720714434927158109738584_a67667865_c3_e6115592339103147033","appUrl":"imeituan://www.meituan.com/merchant?channel=food&id=67667865","doNotOpenApp":false},"pvLab":{"custom":{"source":"undefined"}},"pageId":"c_6NCia","title":"商家详情"},"pageId":"c_6NCia","title":"商家详情","poiInfo":{"poiId":67667865,"wifi":0,"name":"䬺小二北京涮羊肉(湘春路店)","frontImg":"https://img.meituan.net/msmerchant/fbe5739ab3f8ad2bde31cc54889bfb1675499.jpg","avgScore":4.4,"avgPrice":63,"addr":"开福区湘春路125号","phone":"0731-88611922","lng":112.980104,"lat":28.209151,"isWaimai":0,"isQueuing":0,"isPay":0,"parkingInfo":"","hasParking":0,"openInfo":"周一至周日\n10:00-02:00","cates":"1,20632,20426,17,20625,20633,20427,20639","subcate":[17,20625,20633,20427,20639],"areaId":6019,"cityId":70,"showStatus":1,"brandId":0,"mallId":0,"mallName":"","showType":"food","isNativeSm":0,"MarkNumbers":3333,"scoreSource":0,"monthEatCount":-1,"officialFrontImgs":[]},"crawlerMeta":{"uuid":"d8534775d38547fb847a.1605577694.1.0.0","version":"8.2.0","platform":3,"app":"","partner":126,"riskLevel":1,"optimusCode":10,"originUrl":"http://meishi.meituan.com/i/poi/67667865?ct_poi=039983686503578720714434927158109738584_a67667865_c3_e6115592339103147033"}};
</script>
<script crossorigin="anonymous" src="//s1.meituan.net/bs/jsm?f=meis/meishi.mobile:page/poi/detail/index.js@da8f38f">
</script>
<script crossorigin="anonymous">
if (!window._turboClasses) {
console.error("turbo class not found");
} else {
var PageClass = window._turboClasses['client/page/poi/detail1'];
if (PageClass) {
window._turboRoot = (window.ReactDOM || window.React).render(React.createElement(PageClass), document.getElementById('app'));
}
}
</script>
<script async="" charset="utf-8" crossorigin="anonymous" src="//analytics.meituan.net/analytics.js" type="text/javascript">
</script>
<script crossorigin="anonymous" src="//h5.dianping.com/app/adu-track/adunion-track.js">
</script>
</body>
</html>

@ -0,0 +1,4 @@
name:霸碗盖码饭泊富I CITY店,url:https://meishi.meituan.com/i/poi/181444337?ct_poi=039983686503578720714434927158109738584_a181444337_c0_e7949362056828733497
name:炒珍香风干腊鲢鱼(湘春路店),url:https://meishi.meituan.com/i/poi/974042912?ct_poi=039983686503578720714434927158109738584_a974042912_c1_e7949362056828733497
name:大龙燚火锅泊富ICITY店,url:https://meishi.meituan.com/i/poi/195013870?ct_poi=122158265828281293714884202732868835265_a195013870_c2_e7949362056828733497_v5316079820897191544__295
name:288黄焖鸡米饭,url:https://meishi.meituan.com/i/poi/93070532?ct_poi=039983686503578720714434927158109738584_a93070532_c3_e7949362056828733497

@ -0,0 +1,9 @@
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9
Connection: keep-alive
Cookie: td_cookie=4183583641; Hm_lvt_9bfa8deaeafc6083c5e4683d7892f23d=1605961786; Hm_lpvt_9bfa8deaeafc6083c5e4683d7892f23d=1605961926
Host: www.xiladaili.com
Referer: http://www.xiladaili.com/gaoni/2/
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 28 09:50:29 2020
@author: 坤林
"""
def stringtodict(filename):
infile=open(filename,"r")
lines=infile.readlines()
dict={}
for line in lines:
sd=line.strip().split(":")
if(sd[1][1]=="\""):
dict[sd[0]]=sd[1][2:-1]
else:
dict[sd[0]]=str(sd[1][1:])
print(dict)
stringtodict("string.txt")

@ -0,0 +1,348 @@
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head lang="en">
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<title>免费代理IP - 大陆高匿代理IP - 开心代理IP平台|高质量代理IP|免费代理IP</title>
<meta name="Keywords" content="代理IP|开心代理IP平台|买代理IP|提取代理IP|高质量代理IP|免费代理IP|国内ip代理|IP代理œ€æ–°ä»£ç†IP|今日可用代理IP">
<meta name="Description" content="开心代理IP平台是国内领先的代理IP平台,时时刻刻更新代理IP,代理IP有效率高达99%,每天流水40万代理IP,深受广大用户喜爱的代理IP平台,使用代理IP就上开心代理IP平台。">
<meta name="baidu-site-verification" content="TtodfZQ0uA" />
<link rel="shortcut icon" href="/img/product/fav.ico" type="image/x-icon">
<link rel="stylesheet" href="/css/common.css?ver=02" >
<link rel="stylesheet" href="/libs/skins/zhujiwuDialog.css">
<link rel="stylesheet" href="/css/bannerSource.css?ver=02">
<link rel="stylesheet" href="/css/home.css" >
<link rel="stylesheet" href="/css/front.css" >
<meta name="baidu-site-verification" content="cBXi6llu7Y" />
<script src="/libs/jquery.min.js"></script>
<script src="/libs/jquery.dialog.js"></script>
<script src="/libs/jquery.cookie.js"></script>
<script src="/js/front.js"></script>
<script src="/js/index.js"></script>
<script src="/js/module/home/index.js"></script>
<script src="/libs/iframeTools.js"></script>
<script src="/js/common.js"></script>
<script src="/libs/public.js"></script>
</head>
<!--header-->
<div class="banner-box">
<div class="top-nav header-main">
<div class="header default-transition-fast">
<div class="header-wrapper auto clearfix">
<div class="header-left">
<div class="header-logo">
<a href="/" tppabs="http://ip.kxdaili.com/" class="logo hide-text"></a>
</div>
<div class="header-nav">
<ul>
<li class="header-nav-li">
<a href="/" tppabs="http://ip.kxdaili.com/">首页</a>
</li>
<li class="header-nav-li">
<a href="/dailiip.html" tppabs="http://ip.kxdaili.com/dailiip.asp">免费代理</a>
</li>
<li class="header-nav-li">
<a href="/ActiveProxy.html" tppabs="http://ip.kxdaili.com/ActiveProxy.asp">动态Http代理</a>
</li>
<li class="header-nav-li">
<a href="/Socks5ActiveProxy.html" tppabs="http://ip.kxdaili.com/Socks5ActiveProxy.asp">动态Socks5代理</a>
</li>
<li class="header-nav-li">
<a href="/daili.html" tppabs="http://ip.kxdaili.com/news.asp">资讯中心</a>
</li>
<li class="header-nav-li">
<a href="/about.html" tppabs="http://ip.kxdaili.com/about.asp">联系我们</a>
</li>
</ul>
</div>
</div>
<div class="header-nav-right">
<div class="topbar-right">
<div class="welcome-container" id="userinfoContainer">
<div class="logout-container clearfix">
<a href="/login.html" class="login-button">请登录</a>
<a href="/reg.html" class="register-button">免费注册</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!--header end-->
<link rel="stylesheet" href="/css/index.css">
<body class="banner-engine">
<!--header-->
<div class="header-container">
<div class="domain-search-outer">
<div class="domain-title">
<span style="margin-left:-200px;">关注用户体验</span><br/><span style="margin-left:200px;">提升服务价值</span>
</div>
</div>
<!--header end-->
<!--免费代理-->
<div class="domain-block price-block" >
<div class="module-title">
免费HTTP代理IP
</div>
<div class="auto">
<div class="hot-product">
<div class="hot-product-title">
<ul class="clearfix" id="productTitle">
<a href="/dailiip.html"><li data-index="0" class="active">高匿代理</li></a>
<a href="/dailiip/2/1.html"><li data-index="1">普匿代理</li></a>
</ul>
<div style="float:right;margin-top:-25px;"><a href="/ActiveProxy.html">想要更多、质量更好的代理IP,请点这里购买</a></div>
</div>
<div class="hot-product-content">
<!-- 高匿 -->
<table class="active">
<thead>
<tr>
<th>IP地址</th>
<th>端口</th>
<th>匿名等级</th>
<th>代理类型</th>
<th>响应时间</th>
<th>地理位置</th>
<th>最近验证时间</th>
</tr>
</thead>
<tbody>
<tr>
<td>113.195.153.16</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.30 秒</td>
<td>江西省抚州市黎川县 ?</td>
<td>1小时30分前</td>
</tr>
<tr class="warning">
<td>115.53.33.15</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.20 秒</td>
<td>河南省濮阳市 联通</td>
<td>1小时15分前</td>
</tr>
<tr>
<td>125.123.153.222</td>
<td>3000</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.11 秒</td>
<td>浙江省嘉兴市嘉善县 电信</td>
<td>1小时34分前</td>
</tr>
<tr class="warning">
<td>123.131.201.167</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.12 秒</td>
<td>山东省临沂市 联通</td>
<td>1小时27分前</td>
</tr>
<tr>
<td>171.35.214.10</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP</td>
<td>13.50 秒</td>
<td>江西省萍乡市 联通</td>
<td>1小时29分前</td>
</tr>
<tr class="warning">
<td>125.123.157.25</td>
<td>3000</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>9.11 秒</td>
<td>浙江省嘉兴市嘉善县 电信</td>
<td>1小时35分前</td>
</tr>
<tr>
<td>220.249.149.221</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP</td>
<td>12.64 秒</td>
<td>福建省南平市 联通</td>
<td>58分38秒前</td>
</tr>
<tr class="warning">
<td>175.42.158.26</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.28 秒</td>
<td>福建省莆田市 联通</td>
<td>58分11秒前</td>
</tr>
<tr>
<td>113.194.49.197</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.69 秒</td>
<td>江西省抚州市 联通</td>
<td>1小时30分前</td>
</tr>
<tr class="warning">
<td>120.83.108.23</td>
<td>9999</td>
<td>高匿</td>
<td>HTTP,HTTPS</td>
<td>0.44 秒</td>
<td>广东省揭阳市普宁市 联通</td>
<td>1小时3分前</td>
</tr>
</tbody>
</table>
</div>
</div>
<!-- 提示语 -->
<div class="table-sub-reminder">
<div id="listnav">
<ul><li>第</li>
1&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/2.html">2</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/3.html">3</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/4.html">4</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/5.html">5</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/6.html">6</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/7.html">7</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/8.html">8</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/9.html">9</a></li>&nbsp;&nbsp;&nbsp;<li><a href="/dailiip/1/10.html">10</a></li><li>页</li></ul>
</div>
</div>
</div>
</div>
<!--footer-->
<div class="footer-banner">
<div class="footer-banner-button">
<div style="width:1200px;">
温馨提示:<br>
€ 免费代理是通过网络扫描得来,用的人多,俗称“万人骑”;<br>
€ 表中的响应速度仅供参考,因网络环境不同有差异;<br>
€ 本站对免费代理的有效性、稳定性等不负责任;<br>
€ 建议合法使用免费代理,因网络用户使用免费代理带来的法律责任,与本站无关。
</div>
</div>
</div>
<!--footer-->
<footer class="footer">
<div class="inner">
<div class="main">
<a href="#" class="logo logo_footer">
<span class="h1">开心代理</span>
<span class="h2">专业提供国内高质量IP</span>
</a>
<div class="address ">
<p class="change_address">警告:本站不搜集数据不存储数据,也不买卖数据,本站资源仅限用来计算机技术学习参考及大数据应用等合法行为,用户所有操作行为均有日志记录存档并保留¸ªæœˆï¼Œç”¨æˆ·è‹¥æ“…自利用本站资源从事任何违反本国(地区)法律法规的活动,由此引起的一切后果与本站无关。</p>
</div>
</div>
<p class="contact">
版权所有&nbsp;<span class="company-name">苏州渡云科技有限公司</span>&nbsp;&nbsp;<span class="host_hide">Powered by Tuling tech</span>&nbsp;
<a class="host_hide" href="http://beian.miit.gov.cn/" target="_blank">ICP经营许可证:苏B-20190126</a>&nbsp;
VPN经营许可证:B1-20190662
</p>
</div>
</footer>
<div class="float-consult"><ul>
<li>
<p class="cloumn cs1">1号客服</p>
<div class="consult preale" style="display: none;">
<h2>1号客服<span>服务时间:08:30AM-21:00PM</span></h2>
<div class="list">
<div class="contact-item">
<i class="icon-qq"></i>
<a href="http://wpa.qq.com/msgrd?v=3&uin=3415411572&site=qq&menu=yes;" target=_blank class="qq-line">
<i class="icon-QQ"></i>
<div class="contact-row">
·å®¢æœä¸ºæ‚¨æœåŠ¡
</div>
<div class="contact-row">
QQ号码:3415411572
</div>
</a>
</div>
<div class="contact-item">
<i class="icon-gongdan"></i>
<a href="http://wpa.qq.com/msgrd?v=3&uin=3415411572&site=qq&menu=yes;" target=_blank class="submit-gongdan">
联系在线QQ
</a>
</div>
</div>
</div>
</li>
<li>
<p class="cloumn cs2">2号客服</p>
<div class="consult preale" style="display: none;">
<h2>2号客服<span>服务时间:08:30AM-21:00PM</span></h2>
<div class="list">
<div class="contact-item">
<i class="icon-qq"></i>
<a href="http://wpa.qq.com/msgrd?v=3&uin=3415411572&site=qq&menu=yes;" target=_blank class="qq-line">
<i class="icon-QQ"></i>
<div class="contact-row">
·å®¢æœä¸ºæ‚¨æœåŠ¡
</div>
<div class="contact-row">
QQ号码:1665559929
</div>
</a>
</div>
<div class="contact-item">
<i class="icon-gongdan"></i>
<a href="http://wpa.qq.com/msgrd?v=3&uin=1665559929&site=qq&menu=yes;" target=_blank class="submit-gongdan">
联系在线QQ
</a>
</div>
</div>
</div>
</li>
<li class="back-top" style="visibility: visible;">
<p class="cloumn cs8">TOP</p>
</li>
</ul></div>
<!--footer end-->
<div style="display:none"><script type="text/javascript" src="https://js.users.51.la/17751595.js"></script></div>
</body>
</html>

File diff suppressed because one or more lines are too long

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 21 12:41:01 2020
@author: lenovo
"""
#爬取各类代理服务器网站,测试代理服务器是否正常工作
import requests
import json
import time
from lxml import etree
#headers={'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Length': '409', 'Content-Type': 'application/json', 'Cookie': '__mta=210687658.1605577786961.1605758347437.1605946218758.18; uuid=d8534775d38547fb847a.1605577694.1.0.0; _lxsdk_cuid=175d3e36e17c8-01633738a10df2-930346c-144000-175d3e36e17c8; iuuid=44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F; cityname=%E9%95%BF%E6%B2%99; _lxsdk=44343588721193C3A0F12E707D0D0797385C06FC2CE3FA10A9175C2100CBCF5F; webp=1; _hc.v=8603bfe6-42d3-0a08-24bb-d4c8b9495adb.1605577787; _lx_utm=utm_source%3Dblog.csdn.net%26utm_medium%3Dreferral%26utm_content%3D%252Fxing851483876%252Farticle%252Fdetails%252F81842329; mtcdn=K; __utma=74597006.1199398655.1605577785.1605581851.1605702245.3; __utmz=74597006.1605702245.3.3.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; latlng=28.234696,113.007313,1605702248066; i_extend=C_b1Gimthomepagecategory11H__a; client-id=f25a6222-ad55-482e-a20f-2c54c8e25049; ci=70; meishi_ci=70; cityid=70; logan_session_token=0xvxlr0um1t9b83vjde2; _lxsdk_s=175e9da988d-781-e72-fb4%7C%7C2', 'Host': 'meishi.meituan.com', 'Origin': 'https://meishi.meituan.com', 'Referer': 'https://meishi.meituan.com/i/?ci=70&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'x-requested-with': 'XMLHttpRequest'}
test_url="http://httpbin.org/get"
#url="https://meishi.meituan.com/i/?ci=70&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1"
proxy1='171.35.146.184:9999'
proxy='171.35.146.128:9999'
def crawl_kxdaili():
header={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Cookie': 'td_cookie=4173532687; ASPSESSIONIDCSQRARTB=KPIMEAGABNEDNIFKBIFOOADC; __51cke__=; td_cookie=4169960367; ASPSESSIONIDASRSDSTA=MMDMFOGAOHIJLFNAIMAAHHPC; __tins__17751595=%7B%22sid%22%3A%201605951710898%2C%20%22vd%22%3A%201%2C%20%22expires%22%3A%201605953510898%7D; __51laig__=16', 'Host': 'www.kxdaili.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
file=open('proxy.txt','w')
for i in range(1,11):
print('get kx page %d'%i)
url='http://www.kxdaili.com/dailiip/1/%d.html'%i
r=requests.get(url,headers=header)
# file=open("first.txt",'w',encoding='utf-8')
# file.write(r.text)
# file.close()
html=etree.HTML(r.text)
datas=html.xpath('body/div[@class="banner-box"]/div[@class="header-container"]/div[@class="domain-block price-block"]/div[@class="auto"]/div[@class="hot-product"]/div[@class="hot-product-content"]/table[@class="active"]/tbody/tr')
for data in datas:
tds=data.xpath('td')
ip=tds[0].text
port=tds[1].text
proxy=ip+':'+port
if(tds[3].text=='HTTP,HTTPS'):
print(proxy)
if(test(proxy)):
file.write(proxy+'\n')
file.close()
return True
def xila():
url='http://www.xiladaili.com/gaoni/'
headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': 'td_cookie=4183583641; Hm_lvt_9bfa8deaeafc6083c5e4683d7892f23d=1605961786,1606045616; Hm_lpvt_9bfa8deaeafc6083c5e4683d7892f23d=1606045616', 'Host': 'www.xiladaili.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
file=open('proxy.txt','a')
for i in range(1,4):
print("get xila page %d"%i)
if(i>=2):
goalurl=url+str(i)+'/'
else:
goalurl=url
time.sleep(10)
r=requests.get(goalurl,headers=headers)
#print(r.text)
html=etree.HTML(r.text)
datas=html.xpath('body/div[@class="position-relative"]/div[@class="container mt-4"]/div[@class="mt-0 mb-2 table-responsive"]/table/tbody/tr')
if(datas==[]):
print(r.text)
for data in datas:
tds=data.xpath('td')
if(tds[1].text=='HTTP,HTTPS代理'):
proxy=tds[0].text
print(proxy)
if(test(proxy)):
file.write(proxy+'\n')
file.close()
def test(proxy):
p={'http':'http://'+proxy,'https':'https://'+proxy}
try:
r=requests.get(test_url,proxies=p,timeout=16)
print('success')
return True
except:
print('timeout')
return False
def low_risk():
file1=open('proxy.txt','r')
file2=open('../proxy.txt','w')
for line in file1:
file2.write(line)
file1.close()
file2.close()
# html=etree.HTML(r.text)
# datas=html.xpath('body/script[@crossorigin="anonymous"]')
# for data in datas:
# if(data.text!=None):
# strs=data.text[:16]
# if (strs == 'window._appState'):
# result = data.text[19:-1]
# result=json.loads(result)
# print(result['navBarData']['areaObj'])
# file=open("first.txt",'w',encoding='utf-8')
# file.write(r.text)
# file.close()
#crawl_kxdaili()
#test('218.66.253.144:8800')
crawl_kxdaili()
xila()
low_risk()

@ -0,0 +1,10 @@
118.212.107.154:9999
175.43.58.35:9999
218.66.253.146:8800
115.209.125.144:3000
222.94.196.39:3128
106.14.214.136:3128
49.75.59.242:3128
171.35.215.2:9999
113.195.153.46:9999
113.121.39.225:9999

@ -0,0 +1,71 @@
## 美团(美食)店铺信息爬虫
&emsp; 通过接口抓取美团美食店铺信息,并做相关的数据分析。
## 项目目录
```html
Meituan
│ common.py
│ config.py
│ meituan.py
│ parse.py
│ token_.py
│ visual.py
│ requirements.txt
├─utils
│ br.json
│ cities.json
│ ua.log
│ uuid.log
└─view
FZSTK.TTF
key.png
qin.png
title.txt
```
## 环境依赖
```python
pip3 install -r requirements.txt
```
## 解释说明
1. 接口动态参数uuid, _token。
2. 接口参数 uuid 需要不定时从网页源码获取 否则_token 的 uuid 就会失效。
3. 接口 _token 参数加密二进制压缩、Base64 编码, 解密Base64 解码、二进制解压。另外、生成 token 的 sign 参数加密解密过程与 _token 相同。
## 运行
&emsp; 切换至 Meituan 文件夹的根目录执行:
```
# pip3 install -r requirements.txt
python common.py
python meituan.py
```
## 数据分析
- 美食店铺名称词云
![词云](https://github.com/Northxw/Meituan/blob/master/view/key.png)
- 西安美食店铺排行榜前10名( 仅限美团数据 )
![2](https://github.com/Northxw/Meituan/blob/master/view/top10.jpg)
- MySql 数据
![4](https://github.com/Northxw/Meituan/blob/master/view/db.png)
&emsp; 另外,还做了美食店铺评分占比、人均用餐价与评论数量的相关联性分析。
## 公告
&emsp; **本代码仅作学习交流,切勿用于商业用途,否则后果自负。若涉及美团侵权,请邮箱联系,会尽快处理。**

@ -0,0 +1,21 @@
from config import HEADERS,CITIES,CITYNAME,TIMEOUT
from lxml import etree
import requests
def meishi_cateId():
meishi_url='https://chs.meituan.com/meishi/'
resq=requests.get(meishi_url,headers=HEADERS,timeout=TIMEOUT)
# print(resq)
cateIds=dict()
html=etree.HTML(resq.text)
datas=html.xpath('body/div[@id="app"]/section[@class="poiList-wrap clear"]/div[@class="content clear"]/div[@class="left"]/div[@class="filter"]/div[@class="condition"]/div[@class="cont clear"][@data-reactid="17"]/ul[@class="more clear"]/li')
for data in datas:
href=data.xpath('a/@href')
li=data.xpath('a')
cateIds[href[0][31:-1]]=li[0].text
# print(href[0])
# print(li[0].text)
print(cateIds)
return cateIds
if __name__ == '__main__':
meishi_cateId()

@ -0,0 +1,81 @@
# -*- coding:utf-8 -*-
import requests
from pyquery import PyQuery as pq
import hashlib
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import logging
import random
import json
from config import *
import re
def get_cities():
"""城市名称-拼音简写对照字典"""
doc = pq(requests.get('https://www.meituan.com/changecity/').text)
a_lists = doc('.cities a').items()
cities = {}
[cities.update({a.text(): a.attr('href').replace('.', '/').split('/')[2]}) for a in a_lists]
print(cities)
with open('./utils/cities.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(cities, indent=2, ensure_ascii=False))
def get_uuid():
"""获取uuid"""
url = 'https://bj.meituan.com/meishi/'
# url = "http://localhost:8050/render.html?url=https://bj.meituan.com/meishi/&wait=5"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"
}
res = requests.get(url, headers=headers).text
uuid = re.findall(r'"uuid":"(.*?)"', res, re.S)[0]
with open('./utils/uuid.log', 'w') as f:
f.write(uuid)
def save(data):
"""存储数据"""
engine = create_engine('mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8'.format(USER, PASS, HOST, PORT, DB))
connect = engine.connect()
try:
df = pd.DataFrame(data, index=[0])
df.to_sql(name=TABLE, con=connect, if_exists='append', index=False)
except Exception as e:
logging.error("\nError: %s, Please check the error.\n" % e.args)
_ = e
def get_md5(url):
"""md5处理"""
if isinstance(url, str):
url = url.encode('utf-8')
m = hashlib.md5()
m.update(url)
return m.hexdigest()
def xdaili_proxy():
results = requests.get(url=API).json()['RESULT']
agents = ["http://{}:{}".format(res['ip'], res['port']) for res in results]
proxies = {
"http": random.choice(agents),
"https": random.choice(agents)
}
return proxies
def abuyun_proxy():
proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
"host": PROXY_HOST,
"port": PROXY_PORT,
"user": PROXY_USER,
"pass": PROXY_PASS,
}
proxies = {
"http": proxyMeta,
"https": proxyMeta,
}
return proxies
if __name__ == '__main__':
get_cities()
get_uuid()

@ -0,0 +1,111 @@
# -*- coding:utf-8 -*-
from fake_useragent import UserAgent
import random
import pandas as pd
import os
CITYNAME = '长沙'
cities_path = os.path.dirname(os.path.realpath(__file__)) + '\\utils\\cities.json'
with open(cities_path, encoding='utf-8') as f:
CITIES = eval(f.read())
BASE_URL = "https://{}.meituan.com/meishi/api/poi/getPoiList?".format(CITIES[CITYNAME])
# USER-AGENT
log_path = os.path.dirname(os.path.realpath(__file__)) + '\\utils\\ua.log'
df = pd.read_csv(log_path, sep='\t')
user_agent = df["UA"].iloc[random.randint(0,1000)]
HEADERS = {
"Accept": "application/json",
"Referer": "https://{}.meituan.com/".format(CITIES[CITYNAME]),
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36",
# "User-Agent": UserAgent().random,
# "User-Agent": user_agent
}
# UUID
uuid_path = os.path.dirname(os.path.realpath(__file__)) + '\\utils\\uuid.log'
with open(uuid_path) as f:
UUID = f.read()
DATA = {
"cityName": CITYNAME,
"cateId": '0',
"areaId": "0",
"sort": "",
"dinnerCountAttrId": "",
"page": "1",
"userId": "",
"uuid": UUID,
# "uuid": "5a794ab1247b427fb2c8.1556452305.1.0.0",
"platform": "1",
"partner": "126",
"originUrl": "https://{}.meituan.com/meishi/".format(CITIES[CITYNAME]),
"riskLevel": "1",
"optimusCode": "1"
}
# GET PARAMETER
GET_PARAM = {
"cityName": DATA["cityName"],
"cateId": DATA["cateId"],
"areaId": DATA["areaId"],
"sort": DATA["sort"],
"dinnerCountAttrId": DATA["dinnerCountAttrId"],
"page": DATA["page"],
"userId": DATA["userId"],
"uuid": DATA["uuid"],
"platform": DATA["platform"],
"partner": DATA["partner"],
"originUrl": DATA["originUrl"],
"riskLevel": DATA["riskLevel"],
"optimusCode": DATA["optimusCode"],
# "_token": encrypt_token()
}
# SIGN PARAMETER
SIGN_PARAM = "areaId={}&cateId={}&cityName={}&dinnerCountAttrId={}&optimusCode={}&originUrl={}&page={}&partner={}&platform={}&riskLevel={}&sort={}&userId={}&uuid={}".format(
DATA["areaId"],
DATA["cateId"],
DATA["cityName"],
DATA["dinnerCountAttrId"],
DATA["optimusCode"],
DATA["originUrl"],
DATA["page"],
DATA["partner"],
DATA["platform"],
DATA["riskLevel"],
DATA["sort"],
DATA["userId"],
DATA["uuid"]
)
# TIME OUT
TIMEOUT = 5
# MAX PAGES
MAX_PAGES = 2
# MYSQL SETTINGS
HOST = 'localhost'
USER = 'root'
PASS = 'lkl15220319'
PORT = 3306
DB = 'meituan'
TABLE = 'meishi'
# PROXY API
API = ''
# PROXY SETTINGS
PROXY_HOST = "http-dyn.abuyun.com"
PROXY_PORT = "9020"
PROXY_USER = "HU4C31nmfiDR57D"
PROXY_PASS = "2D4F3B8489F5FC91"
if __name__ == '__main__':
# print(os.path.dirname(os.path.realpath(__file__)))
pass

@ -0,0 +1,67 @@
# -*- coding:utf-8 -*-
from token_ import encrypt_token
from urllib.parse import urlencode
from common import save, xdaili_proxy, abuyun_proxy
from parse import parse_json
import logging
import json
import requests
import time
import random
import multiprocessing
from config import GET_PARAM, HEADERS, TIMEOUT, MAX_PAGES, BASE_URL
from cateId import meishi_cateId
#from visual import View
def main(base_url, page, cateid):
"""主函数"""
# 添加_token参数
GET_PARAM['cateId']=str(cateid)
GET_PARAM["_token"] = encrypt_token()
GET_PARAM['page'] = str(page)
url = base_url + urlencode(GET_PARAM)
# proxies = xdaili_proxy()
# session = requests.Session()
# response = json.loads(session.get(url, headers=HEADERS, proxies=proxies, timeout=TIMEOUT).text)
response = json.loads(requests.get(url, headers=HEADERS, timeout=TIMEOUT).text)
totalcount=15
try:
totalcount=response['data']['totalCounts']
infos = response['data']['poiInfos']
for info in infos:
data = parse_json(info)
data['cateId']=cateIds[cateid]
print(data, sep='\n')
save(data)
except Exception as e:
logging.warning(" Response status code: {}, Requests was found, no target data was obtained!".format(response['code']))
_ = e
return totalcount
if __name__ == '__main__':
# 多进程
# pool = multiprocessing.Pool(multiprocessing.cpu_count())
# for page in range(1, MAX_PAGES + 1):
# pool.apply_async(main, (BASE_URL, page))
# pool.close()
# pool.join()
# 获取数据
cateIds=meishi_cateId()
print(cateIds)
for cateid in cateIds.keys():
page=1
totalcount=15
while((page-1)*15<totalcount):
totalcount=main(BASE_URL, page, cateid)
page+=1
time.sleep(3)
# 可视化分析
# view = View()
# view.meishi_top10()
# view.avgprice_comments()
# view.avgscore_ratio()
# view.wrodcloud()

@ -0,0 +1,28 @@
# -*- coding:utf-8 -*-
from config import HOST,CITIES,CITYNAME
from common import get_md5
import re
def parse_json(info):
"""解析JSON"""
data = dict()
detail_url = 'http://{host}.meituan.com/meishi/{id}/'.format(host=CITIES[CITYNAME], id=info['poiId'])
data['id'] = get_md5(detail_url)
data['detail'] = detail_url
data['title'] = info['title']
#data['frontImg'] = info['frontImg']
data['avgprice'] = info['avgPrice']
data['avgscore'] = info['avgScore']
data['comments'] = info['allCommentNum']
data['frontimg'] = info['frontImg']
data['address'] = info['address']
return data
def parse_detail_page(response):
"""解析详情页"""
data = dict()
data['phone'] = re.findall('"phone":"(.*?)"', response.text, re.S)[0]
data['opentime'] = re.findall('"openTime":"(.*?)"', response.text, re.S)[0]
data['tags'] = '|'.join(re.findall('"text":"(.*?)"', response.text, re.S))
return data

@ -0,0 +1,8 @@
fake-useragent>=0.1.11
requests>=2.21.0
pandas>=0.24.1
PyMysql>=0.9.3
pyquery>=1.4.0
SQLAlchemy>=1.3.3
wordcloud>=1.5.0
jieba>=0.39

@ -0,0 +1,62 @@
# -*- coding:utf-8 -*-
import base64, zlib
import time
import random
import pandas as pd
import os
from config import SIGN_PARAM
def sign():
"""生成sign参数"""
# 默认编码
# coding = sys.getdefaultencoding()
# 二进制压缩
binary_data = zlib.compress(SIGN_PARAM.encode())
# base64编码
base64_data = base64.b64encode(binary_data)
# 返回utf8编码的字符串
return base64_data.decode()
def encrypt_token():
"""生成_token参数"""
ts = int(time.time() * 1000) # time.time()返回1970年至今的时间(以秒为单位)
# 伪装机型
json_path = os.path.dirname(os.path.realpath(__file__)) + '\\utils\\br.json'
df = pd.read_json(json_path)
brVD, brR_one, brR_two = df.iloc[random.randint(0, len(df)-1)]
token_data = {
"rId": 100900,
"ver": "1.0.6",
"ts": ts,
"cts": ts + random.randint(100,120), # 经测,cts - ts 的差值大致在 90-130 之间
# "cts": ts + 100,
"brVD": eval(brVD),
"brR": [eval(brR_one), eval(brR_two), 24, 24],
"bI":["https://bj.meituan.com/meishi/",""],
"mT": [],
"kT": [],
"aT": [],
"tT": [],
"aM": "",
"sign": sign()
}
# 二进制压缩
binary_data = zlib.compress(str(token_data).encode())
# base64编码
base64_data = base64.b64encode(binary_data)
return base64_data.decode()
# 解码解压逻辑测试
def decrypt_token_sign(token_sign):
"""base64解码, 二进制解压"""
token_decode = base64.b64decode(token_sign.encode())
return zlib.decompress(token_decode)
if __name__ == '__main__':
# sign = 'eJxVjl1vgkAQRf/LvkrcXRAoJj5gEYRikI8q2vQBcaTIx1pAqm3637umbdImk9w7Z87DfKDG3qMxJUQjREA9NGiM6JAMFSSgruUXWVZkIioqGamqgNL/TNZGAto1KwONn6ikEEEk2vONBBx8E1W5exb+VHHE5+bYXEEvXXdqxxjvjsMK8u6c1MOUVZj39iXH/AfE1SriKs/iJ5Of7H73BX+au22e1byB81YeI+rp7zM/OA/Ct45WIty3hmO7pc4cElv6Om7Fah1pDM82edT76ahKs6Xnw53vSHuvPmHosqkJfUx9I4twBlEz993kig9LaaBtYcMC2O5fy2ka7Jj8sA6Pj23RgproIZHVfHrFou24bqppy4u00ovsdFFXLrPk1NDCxsDnbb2xStMTL1YZgO/FJlvQhdzDrHYZdTeN9Wo27wf62IBSmwOxyox6WxziJJ1Th0x7oJl/PgSVVYAE7GEeOvZVn0zQ5xf47IrC'
# print(decrypt_token_sign(sign).decode())
print(encrypt_token())

@ -0,0 +1,87 @@
[
{
"barVD": "[150,625]",
"brR_one": "[1366, 768]",
"brR_two": "[1366, 728]"
},
{
"barVD": "[886,635]",
"brR_one": "[1366,768]",
"brR_two": "[1366,738]"
},
{
"barVD": "[1560,219]",
"brR_one": "[1600,900]",
"brR_two": "[1600,860]"
},
{
"barVD": "[1366,225]",
"brR_one": "[1366,768]",
"brR_two": "[1366,768]"
},
{
"barVD": "[1366,209]",
"brR_one": "[1366,768]",
"brR_two": "[1366,768]"
},
{
"barVD": "[265,689]",
"brR_one": "[1280,800]",
"brR_two": "[1280,760]"
},
{
"barVD": "[1440,264]",
"brR_one": "[1440,900]",
"brR_two": "[1440,877]"
},
{
"barVD": "[800,150]",
"brR_one": "[800,600]",
"brR_two": "[800,560]"
},
{
"barVD": "[1024,318]",
"brR_one": "[1024,768]",
"brR_two": "[1024,728]"
},
{
"barVD": "[1280,150]",
"brR_one": "[1280,600]",
"brR_two": "[1280,560]"
},
{
"barVD": "[1280,150]",
"brR_one": "[1280,600]",
"brR_two": "[1280,600]"
},
{
"barVD": "[1280,270]",
"brR_one": "[1280,720]",
"brR_two": "[1280,680]"
},
{
"barVD": "[1280,161]",
"brR_one": "[1280,720]",
"brR_two": "[1280,720]"
},
{
"barVD": "[1280,198]",
"brR_one": "[1280,768]",
"brR_two": "[1280,728]"
},
{
"barVD": "[1280,209]",
"brR_one": "[1280,768]",
"brR_two": "[1280,768]"
},
{
"barVD": "[1360,198]",
"brR_one": "[1360,768]",
"brR_two": "[1360,728]"
},
{
"barVD": "[1360,209]",
"brR_one": "[1360,768]",
"brR_two": "[1360,768]"
}
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
a409f599-76b4-45ce-b8c3-bf519eebfbba

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

@ -0,0 +1,750 @@
大刀涮肉
鑫海汇海鲜烤肉火锅自助
米斯特比萨
清真五味时代小郡肝串串香
喜多屋国际海鲜自助餐厅
山葵铁板烧
火红印象
鱼厂·专注香辣烤鱼
金福海自助餐厅
花田煮·臻牛
蝎王府羊蝎子
嗨捞猪肚鸡
春熙怀旧火锅
飘香盛世小阿妹米线
过锅瘾三汁焖锅
春熙怀旧火锅
辣香婆时尚自助火锅
清真·马骥家泡馍
花田煮·臻牛
胖之味果木烤鸭
飞鹿餐饮
花田煮·臻牛
鑫和炝锅鱼
百富烤霸
花田煮·臻牛
鑫海汇海鲜烤肉火锅自助
欧巴来了韩式料理
雁鸣印象
金迈源自助涮烤王
正新鸡排
花田煮·臻牛
老城味道
大益膳房
花田煮·臻牛
2068香辣虾
赵家腊汁肉
大秦彩狼球友酒吧俱乐部
老北京烤鸭
千福聚北京果木烤鸭
京御煌三汁焖锅
大城小鸡黄焖鸡排骨米饭
和福顺养生焖锅
萤初炉端烧
清真·穆巴拉克西餐厅
82°C养生焖锅
家一加火锅
老车家牛羊肉泡馍馆
聚丰苑烤鸭家常菜
丽江龙丽斑鱼庄
山葵铁板烧
上岛咖啡
花田煮·臻牛
牛一嘴·中国兰州牛肉拉面
喜多屋国际海鲜自助餐厅
萤初炉端烧
蘑菇爱上饭
啪啪派西餐厅
牵肠挂肚市井火锅
米斯特比萨
老李家黄焖鸡
麦格特西餐厅·Magnet牛排
聚丰苑烤鸭家常菜
西安辣欢天火锅美食城
花哪鲜炖燕窝
塞纳左岸咖啡
张生记重庆砂锅王
刘老四牛肉擀面庄
麦之香蛋糕
汉釜宫韩式烤肉
TheLittles亲子餐厅
清真·穆巴拉克西餐厅
老车家牛羊肉泡馍馆
金牌泰香米泰式咖喱蟹
瑞可爷爷的店
0057香辣虾
d.餐厅
黄品煌三汁焖锅
82°C养生焖锅
山河馋嘴诱惑精品川菜
啪啪派西餐厅
子云升牛肉面
重庆袍哥码头老火锅
味都西饼店
小蛮子纸包鱼
徐记海鲜
鑫和炝锅鱼
北京唐顺肥牛海鲜火锅
鱼酷活力烤鱼
飞鹿餐饮
川江滋香老火锅
椒伍佬豆花鱼
辣香婆时尚自助火锅
老李家黄焖鸡
艾神家咖啡体验店
韩味坊
和福顺养生焖锅
陈聚德牛肉馆
赵家腊汁肉
唐乐宫-法兰西百谷特面包屋
鱼酷活力烤鱼
王魁腊汁肉夹馍
臻膳轩自助涮烤城
御尚鲜纸包鱼
正新鸡排
潮尊牛丸火锅
沙湾桥头大盘鸡
云贵过桥米线
辣白菜韩式炭烧烤肉时尚馆
满贯融合烧烤
方燕烤猪蹄
韩味坊
兰花花 LANFAFA
麦之香蛋糕
臻膳轩自助涮烤城
鮨期Sushi Ichie
味都西饼店
东东包
重庆渝味晓宇火锅
金福海自助餐厅
黄品煌三汁焖锅
肆门火锅
维克蛋糕
西安君乐城堡酒店·雅庭西餐厅
鱼酷活力烤鱼
文老根米线
Vesweet cake威斯特蛋糕
我和青春有个约会·炭火烤肉
京祥铜锅涮肉
上岛咖啡
铁锅记印象老灶台
熙裕阁
兰州马子禄牛肉面
鱼鲜人家鱼火锅
三顾香牛羊肉泡馍
三原老黄家
有才叔的小馆
月红仟禧自助涮烤
毛记冒菜
思刻鲜作蛋糕工坊
清真渔坊秘制烤鱼
韩记饺子馆
五叔家串串老火锅
第一佳大鸡排
门里香葫芦头泡馍馆
遇见长安
荣辉园自助火锅
北京涮羊肉
斗牛士牛排餐厅
迈德思客
98私房牛肉面
鱼酷活力烤鱼
Giacomo's手风琴西餐厅
龙腾火锅
五叔家串串老火锅
爱悠悠时尚自助涮烤
猫咪餐厅
匠·和牛寿喜烧专门店
满香居香辣干锅
飞鹿火锅
一坐一忘丽江主题餐厅
ccake蛋糕
聚丰楼香辣虾
来波美蛙鱼火锅
挞挞家秘汁焖锅
姚家闻香铁锅炖
来波美蛙鱼火锅
老东关葫芦头
玉肠香葫芦头泡馍
大汗金刀涮肉
爱尚蛋糕
张记老蒲城羊肉
五叔家串串老火锅
薛垻子重庆老味火锅
天下第一面
李想大虾
王锦记冒菜香锅
金皇冠蛋糕
萤初炉端烧
玲祥大盘鸡
寿司和ta的朋友们
忒有范·串we will tei you
福气焖锅烤肉
安旗西点
味鲜炝锅鱼
我在七楼电影咖啡馆
曼岛咖啡
泓日料
干锅居
喵星人的猫咖啡
辣家私厨
北京烤鸭
北京烤鸭
张老坎儿麻辣烫
澄城崇斌水盆羊肉
M12铁板餐厅
江城小馆陕南妈妈菜
袁记肉夹馍
食色火锅
胡老三泥炉烤肉
阿姨奶茶专卖
火焰山自助美食汇
biangbiang面
蒂奥莎烘焙
闻道听香火锅
窄巷子陕菜馆
伊诺仕蛋糕
碰碰凉
糀·匠日式花式料理
飘香盛世小阿妹米线
土耳其卡巴布清真餐厅
糀·匠日式花式料理
御品轩
欧味轩艺术蛋糕
微糖烘焙
云南过桥米线
秦风肥牛
婷婷家的火锅
吉美诺蛋糕工坊
小伙伴铜盘火锅
重庆麻神辣将毛肚火锅
上岛咖啡
UD coffee 西餐厅
串一火锅
桥头米线涮牛肚
臻味醉淡泊铜锅涮肉
传之承铁锅炖
澄城水盆羊肉
五味小馆
食驿老北京涮肉
三姊妹香辣土豆片夹馍
ME CAKE西饼屋
意如意
莲花餐饮
维也纳咖啡
宁夏国强手抓羊肉馆·清真菜
老刘家牛羊肉泡馍
马有才牛肉拉面
串一火锅
皇冠假日酒店·海鲜自助餐厅
Venus Cake 维纳斯蛋糕工坊
鱼酷活力烤鱼
诚意和葫芦头
香客源自助火锅城
鱼酷活力烤鱼
花腰部落
克拉拉
日尝·美好包子铺
薡茶
兄弟大灶台
美阳馆
唐乐宫二十四节气调养火锅
食色火锅
长安大牌档之西游漫记
聚朋自助火锅
重庆渝味晓宇火锅
遇见长安
老单家葫芦头泡馍
关中大锅炖
不忘初心蛋糕
楼北楼牛羊肉泡馍
Enoch Cake 以诺蛋糕
小九老火锅
福临北京烤鸭
悠味烘焙坊
M2 Coffee
金花豪生国际大酒店餐厅
鲜羔楼三味火锅
重庆渝味晓宇火锅
拾堂
小田螺自助火锅
重庆渝味晓宇火锅
蜀渝香自助小火锅
老徐记葫芦头泡馍店
东北铁锅炖鱼
盛秦阁泡馍
聚福生葫芦头
怡香缘蛋糕
望江楼
老机场重庆跳水鱼老店
妙妙北京涮肉坊
赵家腊汁肉
秦巴源民间陕菜
鸿记煌三汁焖锅
思氏横山铁锅炖羊肉
京都肉饼王
妙妙北京涮肉坊
元木烘焙蛋糕坊
Cake for you啃啃面包
鸿记煌三汁焖锅
中田餐饮古城老菜馆
醉仙楼
重庆渝味晓宇火锅
黑土情酱骨庄
福膳居葫芦头
心诺蛋糕
希雅美烘焙
诚净和
心诺蛋糕
继光香香鸡
荷韵泡姜鸡特色火锅
星期五蛋糕
员苑咖啡
尚学社的咖啡馆
楚门7号油焖大虾
麦香蛋糕
石记铜盘火锅
心诺蛋糕
尚品蛋糕
台湾大鸡排
有才叔的小馆
Cake Talk 心语蛋糕房
清真尚品旋转自助火锅
鲜果时光
田园泡芙
佳美蛋糕
HappyMeet地表最强气泡水
陕南人家私房菜
老狼大盘鸡
甜咧咧烘焙店
新一天烘焙工作室
林江鲜米线店
麦斯特蛋糕
乐享食光烘焙工坊
新元海鲜粥城
鱼你搭档酸菜鱼·馋猫鱼饭
铭香楼蒲城水盆羊肉
㝇香过桥米线
朵莎蛋糕店
王府川菜馆
面道
鱼你搭档酸菜鱼·馋猫鱼饭
爱心蛋糕坊
旗王蛋糕
朵莎蛋糕店
老柳大块牛肉面
红红鱼庄
星期五蛋糕工坊
济州家韩国自助烧烤
分享时光
优·食社甜品定制
美好时光凯斯乐蛋糕店
小杨的店三番五次精品菜
麦斯特蛋糕
长寿米粉
聚珍园
麦草香蛋糕
百姓葫芦头泡馍
滚雪球小奶糕
提拉米苏蛋糕
小食光·年意式复合餐吧
传承山谷粥铺
坊上老马家泡馍·回坊烧烤
中田川菜
屋头串串香
优茶达人店
西安唐隆国际酒店百香西餐厅
麦香村蛋糕
美吉心语蛋糕
成都川菜鱼庄
佳香堡
泓沼轩热米皮
辣动时代自助小火锅
傻得冒冒菜馆
泓沼轩热米皮旗舰店
澄城张记水盆羊肉
同盛德泡馍
泓沼轩热米皮
香香大盘鸡
珍味林饺子馆
一家咖啡
岐风烙面皮
旺客基黄焖鸡米饭
一芳台湾水果茶
御品轩
澄城水盆羊肉
鱼你搭档酸菜鱼·馋猫鱼饭
香榭咖啡西餐厅
新美心蛋糕
嗨岛兔成都烤兔兔头
润雨·尚品海鲜大闸蟹
辣动时代自助小火锅
卡布奇诺蛋糕店
乐乐黄焖鸡排骨米饭
重庆大宇火锅
轻语法式蛋糕定制
小山峡巫山烤全鱼纸包鱼
晓丰快乐西饼
焦记biangbiang面
云南生态石锅鱼
鲜果爱尚粉
慕可可蛋糕
caffebene 咖啡陪你
同州大老碗手擀面
石洪腊牛肉夹馍
思味蛋糕屋
麦克士
舍外胡辣汤·轻食博物馆
胡杨林餐厅
湖南土菜馆
一期一笑
老陕老味
食色火锅
马文学腊牛羊肉
你懂的西饼屋
三鲜葫芦头泡馍
熊猫7茶
尕娃椒麻鸡
腾云铁板烧
则味咖啡馆
重庆德庄火锅
清真·聚鲜阁威海海鲜私房菜
老虎堂黑糖專売
米乐西饼店
金乐爱特蛋糕
biangbiang面
启明腊汁肉揪面片
云山餐饮陕北铁锅炖羊肉
BIU cafe&bar
东北人自助小火锅
三聚饺子馆
酒盏·甜品
四季香葫芦头
四季尚品西饼屋
安旗
三姐香辣涮冒菜世家
久沐餐厅
一芳台湾水果茶
云毅过桥米线
虎子鱻石锅鱼
焦记biangbiang面
雷超正宗水盆羊肉
火车头煲仔饭
海鲜先生爆肚面
沸腾鱼庄
竹辉石锅坊
稻本稻烫饭
卧龙烤鱼餐厅
老重庆活鱼庄
方记葫芦头泡馍馆
牛冲天麻辣香锅
周公岐山面
臻品老潼关肉夹馍
小天地川菜主题餐厅
大秦火锅
熊猫7茶
彭福记罐罐面
曲江国际饭店餐厅
长征国际酒店自助餐厅
福春缘葫芦头
口袋G排
建国饭店·蓉园川粤坊
曹记重庆老火锅
小竹签烤肉
杨家泡馍
奶语茶香
阿霞柳州螺蛳粉
香叶草蛋糕
英伦御品
重庆渝鸿生态火锅
成都小面
欧美特蛋糕
王二肉夹馍
清真老城涮烤
思彤驴蹄子面
百姓葫芦头
傻得冒
老东关饺子店
韩式纸上烧烤
七里香蒸饺
桥梓口牛羊肉泡馍
古都·金麦西饼屋
小竹签烤肉新派川菜
巴里岛咖啡
尚典咖啡
大街小巷 茶饮·小吃·冰淇淋
老白家牛羊肉泡馍
秦轩澄城水盆羊肉
鲜饮空间
阿强面馆
RF正宗重庆鸡公煲
29元尚上捞自助火锅
艾米客蛋糕
赵西安三鲜煮馍馆
帕帕亚Papaya意大利餐厅
安旗
小张鸡汤刀削面
汉釜宫韩式自助烧烤火锅
漫顿咖啡
乐美客
月盛斋
味一米线
张记饺子馆
馋喵海鲜私房馆
白菜心
会展国际酒店餐厅
吃货冒菜
洪家脊骨汤
同麦德老任家泡馍
VI cake
7273猫屁股猫咪主题西餐厅
艺术咖啡馆
沃稞面包工厂店
和运生葫芦头
大嘻咹biangbiang面
彤德莱
六婆串串香
姚家老潼关肉夹馍
恩希家农家乐
青都里·司厨
姥家大锅台
宝龙雪花陕北宝龙月饼
清欢咖啡馆
丫丫冒菜
巫山城外特色烤鱼
袁记肉夹馍
重庆石锅鱼
麦香园蛋糕
重庆鸡公煲
天朗时代大酒店餐厅
乐滋西点
欧乐米·cake
第五咖啡
紫来轩
古搜咖啡
口口私房锅
70年代土灶台
香香乐土豆粉
铭善园肥牛火锅
爱慕蛋糕烘培坊
赛尚西饼
70年代土灶台一锅炖
汉中黄辣丁私房菜馆
YJ重庆鸡公煲
老苗炸鸡
8090私房火锅
北京唐顺肥牛
云豪过桥米线
味品轩蛋糕屋
味香特蛋糕
韩式面包坊
尚品苑蛋糕工厂店
三顺紫菜包饭
皇家蛋糕房
左右奶茶
吴师傅芝麻酱烧饼
欧菲克生日蛋糕
馋莱客炸鸡汉堡
国秀新苑
老上海蛋糕
樊家肉夹馍
舒心缘精品家常菜
大家烤羊庄
火焰山牛排海鲜自助美食汇
老唐红鸭脖
蒙古王烤羊腿
朱雀国家森林公园
农耕年代
麦啦屋西饼房
螺小胖柳州螺蛳粉
嘉佳乐汉堡
老味道蛋糕坊
西安曲江寒窑遗址公园
妙妙屋西饼
柴门鱼庐餐厅
楚湘人家
老刘家葫芦头泡馍
龙泊宾馆
天佑国际酒店贵宾楼
汤峪温馨农家乐
蒂奥莎烘焙
长安雅集大厦餐厅
小厨味道
袁记肉夹馍
扯扯面馆
小样儿私房秘制焖锅
微甜生活
蜀椒鱼妹私房小馆
普素茶房
兄弟大灶台
重庆鸡公煲
京和鱼府
盛夏光年e号冰站
八喜冰淇淋蛋糕
川福小厨
西安中兴和泰酒店
米香园蛋糕
Merrykin麦里金
外婆家私房火锅
瑞云蛋糕房
大唐西市酒店
辣小兵串串火锅
玩味音乐海鲜自助火锅
清真•锦翔炝锅城
顺风肥牛
炉诱·香辣烤鱼
盛运涮烤自助餐厅
82℃养生焖锅
上野日本料理
千家粗粮王
保尔森国际美食汇
辣宴火锅
同盛祥
吉布鲁牛排海鲜自助
千家粗粮王
千家粗粮王
一尊皇牛
豪上鲜文鱼庄
厚府火锅
鼎尚鲜火锅传奇
好利来蛋糕世界
伯爵工房
德发长
渔歌·活鱼现烤
君为善·粤餐厅
巷子火锅
清真·同鑫楼炝锅鱼
西班牙玛萨塔咖啡屋
俏江南
千家粗粮王
仟禧自助涮烤
love kitty 喵星人餐厅猫咪餐厅
红透天自助涮烤
大龙燚火锅
锦里火锅
大队长主题火锅
麻辣空间
竹园村酸菜鱼火锅
王府臻品
新辣道鱼火锅
红透天自助涮烤
CoCo都可
九洲八拖二火锅
拉菲达牛排自助(赛格国际店)
仟禧自助鱼庄
吴铭火锅
白鹿原猪蹄坊
玉海港2068香辣虾
伯爵工房
李想大虾火锅花园
刘小厨(赛格国际店)
大红袍重庆老火锅
鸭掌门特色火锅
红透天自助涮烤
快颐坊
彼德西餐
奥赛奥章鱼水煎肉
傣妹
新辣道鱼火锅
伯爵工房
赛味尔蛋糕
仟禧自助涮烤
小浪花自助涮烤
大自在火锅
吉满杯
土大力烤肉名家
同盛祥
木木寿司
宽窄巷子老火锅
旺秦川自助涮烤
大憨火锅
粉汤羊血
蘭桂坊酒吧
北京老胡同烤鸭
麦多馅饼
戴琳娜法国餐厅
农家土火锅蜀中香
白鹿原猪蹄坊
汉拿山
王府臻品火锅
乡村基
壮仔龙虾
杨翔豆皮涮牛肚
重庆地瓜老火锅
大海子西府民俗村
仟禧自助涮烤
李记搅团
米糖里西餐厅
阅悦西餐厅
姥姥春饼•烤鸭
苏浙会馆
聚丰苑北京烤鸭
飞象比萨
李想大虾火锅花园
田园牧歌饭庄
川西大宅门炝锅鱼火锅
西安富力希尔顿酒店自助餐厅
鲜尚轩斑鱼庄
雅泰来中西餐
野葡萄时尚自助餐厅
宽塘·蛙哥虾妹
东关肉夹馍
重庆巴爷香辣鸡煲
江水肴陕南私房菜
光源餐厅-菜心心
赛百味
蜀江烤鱼
安旗蛋糕
奇鱼老陕菜菜
天龙宝严素食馆
万和源炝锅鱼.活鱼现炝
小阿妹米线
颐和宫大酒店
SPR COFFEE
西安高新希尔顿咖啡露台西餐厅
知味轩自助
塞纳风情咖啡
味立方煎饼屋
福聚长饺子馆
渝老道火锅
卿木缘米粉
泰熙家
王婆大虾
中国重庆老版火锅
仟禧自助涮烤
YoYo酒吧
名羊天下陕北横山铁锅炖羊肉
毅祥斋老孙家泡馍
纳家楼·新陕菜
城市运动公园茶餐厅
嘻多多
伯爵工房
藏式秘汁烤鱼
一石锅烤啦烤肉
德圣咖啡
谷麦滋
上岛咖啡
马勇牛羊肉
正宗新疆大盘鸡
鱼旨寿司
七十二行·锦筵餐饮
和新楼
MrPig借蹄发挥
陈二毛肥牛火锅
飘香盛世小阿妹米线
故院老菜馆
顶牛兰州牛肉拉面
小阿妹米线
王妈凉皮
凌云餐厅
李想大虾火锅花园
顺峰
帝都天元酒店
华夫哥私房火锅
羽尔迦休闲主题餐厅
玉涮坊老北京涮肉·羊蝎子
家阖百岁鱼
顺水鱼馆
羊瑞堂铁锅羊肉
澳堡酒店
鼎香食府

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

@ -0,0 +1,87 @@
# -*- coding:utf-8 -*-
import pandas as pd
from pylab import *
import pymysql
from config import HOST, PORT, USER, PASS, DB, TABLE
from wordcloud import WordCloud
import cv2
import jieba
import os
class View(object):
def __init__(self):
self.connect = pymysql.connect(host=HOST, user=USER, passwd=PASS, db=DB, port=PORT, charset='utf8')
self.dirname = os.path.dirname(os.path.realpath(__file__))
mpl.rcParams['font.sans-serif'] = ['SimHei']
def meishi_top10(self):
"""当前地区评论前10的店铺"""
df = pd.read_sql("select title,comments from {table}".format(table=TABLE), self.connect)
# 排序
df2 = df.sort_values(by='comments', ascending=False)
# 设置索引
df3 = df2.set_index('title')[0:10].sort_values(by='comments', ascending=True)
# 柱状图
fig = df3.plot(kind='barh', alpha=0.3).get_figure()
plt.tight_layout()
fig.savefig('{}\\{}\\{}.jpg'.format(self.dirname, '\\view', '\\top10'))
# plt.show()
def avgscore_ratio(self):
"""美食店铺各评分占比"""
df = pd.read_sql('select avgscore from {table}'.format(table=TABLE), self.connect)
# 饼状图
fig = df['avgscore'].value_counts().plot(kind='pie').get_figure()
fig.savefig('{}\\{}\\{}.jpg'.format(self.dirname, '\\view', '\\ratio'))
# plt.show()
def avgprice_comments(self):
"""店铺价格与评论数量的关联性"""
df = pd.read_sql('select avgprice, comments from {table}'.format(table=TABLE), self.connect)
fig = df.plot(kind='scatter', x='avgprice', y='comments').get_figure()
fig.savefig('{}\\{}\\{}.jpg'.format(self.dirname, '\\view', '\\pricom'))
def wrodcloud(self):
"""词云"""
# 读取title
titles = pd.read_sql("select title from {table}".format(table=TABLE), self.connect)
dirname = self.dirname + '\\view'
text_path = dirname + '\\title.txt'
# if not os.path.exists(text_path):
# open(text_path)
with open(text_path, 'w', encoding='utf-8') as f:
for title in titles['title']:
title = title.split('')[0]
f.write('%s\n' % str(title))
if text_path:
with open(text_path, 'r', encoding='utf-8') as f:
text = f.read()
cut_text = " ".join(jieba.cut(text))
color_mask = cv2.imread(dirname + '\\qin.png')
cloud = WordCloud(
# 设置字体,不指定就会出现乱码
font_path= dirname + "\\FZSTK.TTF",
# 设置背景色
background_color='white',
# 词云形状
mask=color_mask,
# 允许最大词汇
max_words=2000,
# 最大号字体
max_font_size=50
)
wCloud = cloud.generate(cut_text)
wCloud.to_file(dirname + '\\key.png')
plt.imshow(wCloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if __name__ == '__main__':
view = View()
view.meishi_top10()
# view.avgscore_ratio()
# view.avgprice_comments()
# view.wrodcloud()
Loading…
Cancel
Save