Delete '最终版本1.0.py'

master
Qw37tgf5k 3 years ago
parent f801dc3626
commit 776a067f8f

@ -1,157 +0,0 @@
# @Time : 2021/11/5 13:57
# @Author :wenkaic
# @File : 002草稿
# @Project : pythonProject4
import urllib.request
from bs4 import BeautifulSoup
from urllib import parse
from wordcloud import WordCloud
import numpy as np
from PIL import Image
from lxml import etree
#阿拉伯数字转为中文数字
def change(num):
dic_num = {"1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": "", }
dic_unit = {1: "", 2: "", 3: "", 4: "", 5: ""}
fs = []
daxie = ""
lennum = len(num)
if lennum >= 1:
for item in num:
if int(item) > 0:
fs.append(dic_num[item])
fs.append(dic_unit[lennum])
elif int(item) == 0:
fs.append("")
lennum -= 1
while fs[-1] == "":
fs.pop()
daxie = "".join(fs)
if int(num)>1000 and int(num)<=1009:
daxie=daxie.replace('','',1)
return daxie
# 词云的制作需要处理后的content
def Word_cloud():
mask = np.array(Image.open('006.png'))
fp = open('003斗破苍穹.txt', 'r', encoding='utf-8')
text = fp.read()
w = WordCloud(font_path='方正粗黑宋简体.ttf', width=650, height=700
, contour_color='yellow', contour_width=2, mask=mask
, max_words=500)
w.generate(text)
w.to_file('004dou.png')
fp.close()
# 保存爬取文件或者直接输出爬取的文件
def save(n,soup):
bbs = soup.select('th,tr')
fp = open('003斗破苍穹.txt', 'a', encoding='utf-8')
if(n=='1'):
print('执行指令1')
for i in range(0, len(bbs)):
obj = bbs[i]
fp.write(obj.get_text().strip())
elif(n=='2'):
print('执行指令2')
for i in range(0, len(bbs)):
obj = bbs[i]
print(obj.get_text().strip())
else:
print('出现问题',n)
print('没有执行')
fp.close()
url = 'http://www.ddxs.com/doupocangqiong/'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.40",
}
# 请求对象定制
request = urllib.request.Request(url=url, headers=headers)
# 获取处理前的相应数据
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
soup = BeautifulSoup(content,'lxml')
print('输入1保存在文件夹中\n' \
'输入2直接输出\n' \
'输入3词云(文件会下载到本地)\n' \
'输入4查询章节\n'\
'输入5显示提示信息')
while 1:
print('请输入指令:')
n = input()
#创造词云
if n == '1':
save(n, soup)
elif n == '2':
save(n, soup)
elif n == '3':
print('指令3执行')
save('1',soup)
Word_cloud()
#查询章节需要未处理的content
elif n == '4':
print('指令4执行')
tree = etree.HTML(content)
list = tree.xpath('//body//a/text()')
# 把阿拉伯数据装变为中文数字
num = input("\n请输入数字1-1623")
if int(num) < 1 or int(num) > 1623:
print("输入范围错误!\n")
continue
n = change(num)
n = '' + n + ''
if (n == '第一百章'):
n = '一百零章'
elif (n =='第两百零两章'):
n='第两百零二章'
elif(n=='第一千零一十一章'):
n='第一千零十一章'
print('n',n)
print(list)
for i in range(0, len(list)):
if n in list[i]:
print(list[i])
break
elif n=='5':
print('输入1保存在文件夹中\n' \
'输入2直接输出\n' \
'输入3词云\n' \
'输入4查询章节\n' \
'输入5显示提示信息'\
'输入6退出')
elif n=='6':
print('退出程序')
break
else:
print('!!输入了错误指令!!')
print('==================')
print('输入1保存在文件夹中\n' \
'输入2直接输出\n' \
'输入3词云\n' \
'输入4查询章节\n' \
'输入5显示提示信息' \
'输入6退出')
print('==================')
# url1 = 'http://www.ddxs.com/doupocangqiong/'
# url2 = '/doupocangqiong/1656.html'
# new = urllib.parse.urljoin(url1,url2)
# print(new)
# from urllib import parse
#
# page_url = 'http://www.ddxs.com/doupocangqiong/'
# new_url = '/doupocangqiong/1656.html'
#
# new_full_url = parse.urljoin(page_url, new_url)
# print(new_full_url)
Loading…
Cancel
Save