细节完善

master
Qkbrauyvi 4 years ago
parent 0a971573e2
commit 2a2daf4111

@ -284,7 +284,7 @@ from lxml import etree
import requests import requests
def begin_spider(url, web_name): def begin_spider(url, web_name, web=1):
url = "https://guilin.zbj.com/search/f/?type=new&kw=saas" url = "https://guilin.zbj.com/search/f/?type=new&kw=saas"
# 设置headers防止UA验证Host为要爬取的域名,通过浏览器F12获取User-Agent # 设置headers防止UA验证Host为要爬取的域名,通过浏览器F12获取User-Agent
headers = { headers = {
@ -314,10 +314,10 @@ def begin_spider(url, web_name):
def main(): def main():
begin_spider("https://guilin.zbj.com/search/f/?type=new&kw=saas", "猪八戒") begin_spider("https://guilin.zbj.com/search/f/?type=new&kw=saas", "猪八戒")
'''
if __name__ == '__main__': if __name__ == '__main__':
main() main()
'''
import json import json
@ -358,15 +358,15 @@ def begin_spider(page, url, web_name):
f.close() f.close()
def main(): def main_a():
for i in range(0, 100, 20): for i in range(0, 100, 20):
begin_spider(i,"https://music.163.com/","网易云") begin_spider(i,"https://music.163.com/","网易云")
time.sleep(1) time.sleep(1)
'''
if __name__ == '__main__': if __name__ == '__main__':
main() main()
'''
import re # 正则表达式进行文字匹配 import re # 正则表达式进行文字匹配
from bs4 import BeautifulSoup # 网页解析获取数据 from bs4 import BeautifulSoup # 网页解析获取数据
import urllib.error,urllib.request import urllib.error,urllib.request
@ -374,7 +374,7 @@ import xlwt # 进行excel操作
def main(): def main_b():
baseurl = "https://movie.douban.com/top250?start=" baseurl = "https://movie.douban.com/top250?start="
# 1.爬取网页 # 1.爬取网页
name= '豆瓣top250' name= '豆瓣top250'
@ -505,11 +505,11 @@ def saveData(datalist, savepath):
'''
if __name__ == "__main__": if __name__ == "__main__":
main() main()
print("爬取完毕!") print("爬取完毕!")
'''

Loading…
Cancel
Save