From 776a067f8f5070f5cd65952b0d0bf5e5ae4532d0 Mon Sep 17 00:00:00 2001 From: Qw37tgf5k <1879847326@qq.com> Date: Fri, 5 Nov 2021 21:30:51 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E6=9C=80=E7=BB=88=E7=89=88=E6=9C=AC1?= =?UTF-8?q?.0.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 最终版本1.0.py | 157 --------------------------------------------- 1 file changed, 157 deletions(-) delete mode 100644 最终版本1.0.py diff --git a/最终版本1.0.py b/最终版本1.0.py deleted file mode 100644 index b8b6d52..0000000 --- a/最终版本1.0.py +++ /dev/null @@ -1,157 +0,0 @@ -# @Time : 2021/11/5 13:57 -# @Author :wenkaic -# @File : 002草稿 -# @Project : pythonProject4 -import urllib.request -from bs4 import BeautifulSoup -from urllib import parse -from wordcloud import WordCloud -import numpy as np -from PIL import Image -from lxml import etree - -#阿拉伯数字转为中文数字 -def change(num): - dic_num = {"1": "一", "2": "两", "3": "三", "4": "四", "5": "五", "6": "六", "7": "七", "8": "八", "9": "九", } - dic_unit = {1: "", 2: "十", 3: "百", 4: "千", 5: "万"} - fs = [] - daxie = "" - lennum = len(num) - if lennum >= 1: - for item in num: - if int(item) > 0: - fs.append(dic_num[item]) - fs.append(dic_unit[lennum]) - elif int(item) == 0: - fs.append("零") - lennum -= 1 - while fs[-1] == "零": - fs.pop() - daxie = "".join(fs) - if int(num)>1000 and int(num)<=1009: - daxie=daxie.replace('零','',1) - return daxie - -# 词云的制作,需要处理后的content -def Word_cloud(): - - mask = np.array(Image.open('006.png')) - fp = open('003斗破苍穹.txt', 'r', encoding='utf-8') - text = fp.read() - w = WordCloud(font_path='方正粗黑宋简体.ttf', width=650, height=700 - , contour_color='yellow', contour_width=2, mask=mask - , max_words=500) - w.generate(text) - w.to_file('004dou.png') - fp.close() - -# 保存爬取文件或者直接输出爬取的文件 -def save(n,soup): - bbs = soup.select('th,tr') - fp = open('003斗破苍穹.txt', 'a', encoding='utf-8') - if(n=='1'): - print('执行指令1') - for i in range(0, len(bbs)): - obj = bbs[i] - fp.write(obj.get_text().strip()) - elif(n=='2'): - print('执行指令2') - for i in range(0, len(bbs)): - obj = bbs[i] - print(obj.get_text().strip()) - else: - print('出现问题',n) - print('没有执行') - fp.close() -url = 'http://www.ddxs.com/doupocangqiong/' - -headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.40", -} - -# 请求对象定制 -request = urllib.request.Request(url=url, headers=headers) - -# 获取处理前的相应数据 -response = urllib.request.urlopen(request) -content = response.read().decode('utf-8') - -soup = BeautifulSoup(content,'lxml') -print('输入1:保存在文件夹中\n' \ - '输入2:直接输出\n' \ - '输入3:词云(文件会下载到本地)\n' \ - '输入4:查询章节\n'\ - '输入5:显示提示信息') -while 1: - print('请输入指令:') - n = input() - - #创造词云 - if n == '1': - save(n, soup) - elif n == '2': - save(n, soup) - elif n == '3': - print('指令3执行') - save('1',soup) - Word_cloud() - - #查询章节,需要未处理的content - elif n == '4': - print('指令4执行') - tree = etree.HTML(content) - list = tree.xpath('//body//a/text()') - - # 把阿拉伯数据装变为中文数字 - num = input("\n请输入数字(1-1623):") - if int(num) < 1 or int(num) > 1623: - print("输入范围错误!\n") - continue - n = change(num) - n = '第' + n + '章' - if (n == '第一百章'): - n = '一百零章' - elif (n =='第两百零两章'): - n='第两百零二章' - elif(n=='第一千零一十一章'): - n='第一千零十一章' - print('n',n) - print(list) - for i in range(0, len(list)): - if n in list[i]: - print(list[i]) - break - - elif n=='5': - print('输入1:保存在文件夹中\n' \ - '输入2:直接输出\n' \ - '输入3:词云\n' \ - '输入4:查询章节\n' \ - '输入5:显示提示信息'\ - '输入6:退出') - elif n=='6': - print('退出程序') - break - else: - print('!!输入了错误指令!!') - print('==================') - print('输入1:保存在文件夹中\n' \ - '输入2:直接输出\n' \ - '输入3:词云\n' \ - '输入4:查询章节\n' \ - '输入5:显示提示信息' \ - '输入6:退出') - print('==================') - - -# url1 = 'http://www.ddxs.com/doupocangqiong/' -# url2 = '/doupocangqiong/1656.html' -# new = urllib.parse.urljoin(url1,url2) -# print(new) -# from urllib import parse -# -# page_url = 'http://www.ddxs.com/doupocangqiong/' -# new_url = '/doupocangqiong/1656.html' -# -# new_full_url = parse.urljoin(page_url, new_url) -# print(new_full_url)