|
|
@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
# @Time : 2021/11/1 22:09
|
|
|
|
|
|
|
|
# @Author :wenkaic
|
|
|
|
|
|
|
|
# @File : 001python大作业
|
|
|
|
|
|
|
|
# @Project : python爬虫
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import urllib.request
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
url = 'http://www.ddxs.com/doupocangqiong/'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.40",
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 请求对象定制
|
|
|
|
|
|
|
|
request = urllib.request.Request(url=url, headers=headers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 获取响应数据
|
|
|
|
|
|
|
|
response = urllib.request.urlopen(request)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
content = response.read().decode('utf-8')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 把数据下载到本地
|
|
|
|
|
|
|
|
# open方法默认为gbk编码,如果要保存汉字,就要指定编码为utf-8
|
|
|
|
|
|
|
|
fp = open('003斗破苍穹.txt', 'w', encoding='utf-8')
|
|
|
|
|
|
|
|
fp.write(content)
|