From f48ff6c3626576d99fc00b7989033cd433624767 Mon Sep 17 00:00:00 2001 From: jinzhudada <2897439727@qq.com> Date: Tue, 28 May 2024 18:14:11 +0800 Subject: [PATCH] 5/28 --- .../__pycache__/main.cpython-312.pyc | Bin 0 -> 4231 bytes pyton_kese.py/main.py | 58 ++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 pyton_kese.py/__pycache__/main.cpython-312.pyc create mode 100644 pyton_kese.py/main.py diff --git a/pyton_kese.py/__pycache__/main.cpython-312.pyc b/pyton_kese.py/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc1f851a78f5415583c8003ec1bbb3439f39532f GIT binary patch literal 4231 zcmb_feQXrR6`$SPz1utA`TU7(NZ~Lfc#>&kns{0R#1P^TM}&|sH4;9; zn3FyejAV%HqI?vQ+M|6mXW}S<@R>RK9P(Lu2)FqWX4dE?bf#I1g!nUjC~k|&ytJMT z@L?t1e@KiAp%Ck>@@#OeX%WJl7?WL%&8~_Ko((%(u&{a44p;Z)O>Woj2z=kdpFSWc z-l~mTJR3K=)*NVPK6G%MDg(Vq-;x3l?iT*s!;v!k^aN7-;BC@O!$6KfgVChx}D&Nb4&syDWJ%6 z%tcW897+%hU{WoZF<@o^#v5t+BGpF)G2cavqi&?XC(t}j=0z}=_E8+IqEYh=d@^VW zQYC|Mg^L?!8D;(lo>rBZ$J3;mzLuwLK2OjjXR*{dO$pS4+&PU44C(-QA^e`6fy4il z7@DUl2gbaSHe4O*M}9H@zmp6oh&YpqevUZms%enCWTU{?2^7GJ6Ln~SIJpimCM9hS z@M-GBL#ZYuYsyhNBPT(rR1hRAZV6h+Di*Xa-bIKrtF&Uo58eyrzKF?SS-vwT z%m0lN^DUg1?|de!YK6P9rSGZ=?!m@cgZVGoW1hr<7k4?EYRlY7#$|)WFXA#m1YWQO zydWoGQ%W)@)vQ`nM$NgAzWq9M@_Pui!YpA=I1;%DCxl5^20b^bcGaQgs!lHF%DfP~ zw1NW9y5yyhvMht;?3{zk9d%~zwhATkR1)N1|I1hNf*@~kJ_I6BUx7-_p?V-r2>2yXEJN91XcekJG7*EF zu8z3;Ay$@OE0@Kn23zZh0zF$}Qs3j`AUJ?9pt8s9T(yi!3EKWz2_VruwgTsgYA}53cU*HgbaP7jQUkq<|NjPq!xLfDC z|M8WFz2{*KM|*nh!Ii7`|L{v#!*QQpn|=HI?5}UZ8qQzO37gxg(-IpF@H!*$Z$)`o zk#$pmS0pS*rxYGeLQ1@DX@~cW4TW@b-Qnil2M_90P!z&CEk{Cv0)rw65#1Dr&df$4 ze3;WI5eA)(NrJ+=3w1)!si+i^b&H{*U(ii3#Zu*n7?wc-HY2>Fy4kQ108>VibtY{k zSW1~m)`^Io6Xs)W>A7JELv=$}Lphyl7r@jMi0{(#c+Yx&YorHmFpLK=+}6p}1L5WJC6w`fW-$avda>T--mm;%>-DW8hi^7buG%@dY}fR%`iW)rca}A1OMS@$(A}@B8f(89xZSNawN6%^ ze1ZsvKuqL$2Izq^!?6+bOkT-Q?qKdn@y%DpTeMT{6DtCD@;aWFFzj(5*7w;`lv^aaZ0GUcdxj7Vz(d9!GybOq+sKn_qdc|}9ELEG@LYuw0|>s{B{#`fP-Za3Wekyg_< z9@LJX)P5M4INqTh6Es2Al1@zs+8wCn`&r5*344)t8dDSg0E|1$OIE8FYWn*X%2 zvfgX@vzM;lYVl`lEWl)o30$2mEGGK0sXRfbL@-+#z*p(U5pxMt$<|}RY>|+`A*5!J z-&kppoN1JrOVBD#*z{5aPs0ven!v-8EhpyqD%mpS8%hilXGuQ_jB4VnY1#&KPMWp@ z?MTzP@NlHdCrI|MQE={{b52@@$~>V)b>?i0V#sX&`wu@Hdidc_?tgOq!PyU+b?UUJ zbht^~%y&mvoRuIcgdP(Ck7*(R62F4 z8RtwY!umUPvjCY*QgqYa#yXwB*&!r~5jU+lja#Fx8L4p*NR`6V!n@~IxrX* zZhu!y=FBjK(@g0EQ+kaW*{m_8Q%vPQi%N$Jhsy_k0HI}llT6ml*h{AE%O~v1uSoBm zxqN1%bZqrl>^JMjR@}CYZ_-{r_LmxMzfbdV8ZT%;d4g9oE~-U;3{kY3+>3*$o@~T{ zwwpYRXS>MP@$fp?jE6NxaiBTgBp;iww8v&-b@n^^oOhVAr=SJ$<1W*%gBbNUlq~R{ z-MoJ^(yo&M;&Fki5RKpubXSl3R!A?D6;@K(j7eKW68*d^{|ZUF;2Xo{dd@sR{N=?K zo=fHPVECG3TuAq#`;23nSvkS1yu++Am@ahEe$mY(JJDT@I z6?mSct*|OV{9?ogOz*`od+PZ)ZD4eJ$_A;Kv=gHYMQ|FyKDcYwoD&*5SfZ>#_f;=? zMi$fM&pcLo>622DUj2+g_T2uezN&Y&^w!N#%!U1D_iIkq6t(hxVe!zm!EM9MBe}P2 zQ-ue556)0|(^T;URXkj&QN>f#@+?GKv*Y%OslukO!C;<;&K^2{*f1)DAoc`l33R$e z=@g)jkPK6#LURw^#Py7EZmAAayKsl2>N!*0C34XaUn4cb6lZ1mRp`J63F0nt-bL2C hsOTT)rMt)u9!cjv2EXILMadkp*Yec7fhaPV{TG;K>UsbG literal 0 HcmV?d00001 diff --git a/pyton_kese.py/main.py b/pyton_kese.py/main.py new file mode 100644 index 0000000..7f32d32 --- /dev/null +++ b/pyton_kese.py/main.py @@ -0,0 +1,58 @@ +import requests +import re +import tkinter as tk +from tkinter import Toplevel +import mysql + +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/11" +} +def show_data_window(): + def fetch_data(): + urls = [f"https://www.gushiwen.cn/default_{i}.aspx" for i in range(1, 3)] + gushici = [] + for url in urls: + response = requests.get(url, headers=headers) + content = response.text + titles = re.findall('(.*?)', content, re.DOTALL) + authors = re.findall('

.*?(.*?)', content, re.DOTALL) + dynastys = re.findall('

.*?(.*?)', content, re.DOTALL) + poetics = re.findall('

(.*?)
', content, re.DOTALL) + new_poetics = [''.join(re.split('<.*?>|<.*? />', p)).strip() for p in poetics] + + for title, author, dynasty, poetic in zip(titles, authors, dynastys, new_poetics): + gushici.append({"title": title, "author": author, "dynasty": dynasty, "poetic": poetic}) + + # 保存到TXT文件 + with open("gushi.txt", "w", encoding="utf-8") as file: + for item in gushici: + file.write(f"标题: {item['title']}, 作者: {item['author']}, 朝代: {item['dynasty']}, 内容: {item['poetic']}\n") + + # 在新窗口显示数据 + show_data_window(gushici) + + def show_data_window(data): + window = Toplevel(root) + window.title("古诗词信息") + text_widget = tk.Text(window) + text_widget.pack(expand=True, fill='both') + + for item in data[:20]: # 仅显示前10条数据作为示例 + text_widget.insert(tk.END, f"标题: {item['title']}\n作者: {item['author']}\n朝代: {item['dynasty']}\n内容: {item['poetic']}\n\n") + + window.mainloop() + + + def start_mysql(): + + mysql.start_mysql_process() + + root = tk.Tk() + root.title("古诗词爬虫") + + fetch_button = tk.Button(root, text="开始爬取", command=fetch_data) + fetch_button.pack(pady=20) + + mysql_button = tk.Button(root, text="跳转到mysql.py", command=start_mysql) + mysql_button.pack(pady=10) # 在"开始爬取"按钮下面添加新按钮 + root.mainloop() \ No newline at end of file