From d993acfad0c54b2751136dfd26185bdd9bf573c1 Mon Sep 17 00:00:00 2001
From: pbqs4lvtu <1779073549@qq.com>
Date: Tue, 7 May 2024 12:43:27 +0800
Subject: [PATCH] Update README.md
---
README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 64 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 7d203ec..be65fb3 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,65 @@
-# import
+import requests
+import re
+headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'}
+urls = ["https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html"]
+for i in range(2, 21):
+ url = f"https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen{-i}.html"
+ urls.append(url)
+
+for url in urls:
+ response = requests.get(url, headers=headers)
+ response.raise_for_status() # 检查请求是否成功
+ # 打印响应编码和状态码
+ print(f'URL: {url}: Encoding - {response.encoding}, Status Code - {response.status_code}')
+ # 打印网页源代码
+ if url == "https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html" and response.status_code == 200:
+ print("Page 1 Source Code:")
+ print(response.text[:800])
+ print() # 空行分隔不同页面的输出
+
+ pattern = r'
(.*?)(.*?)'
+ # 使用re.findall找到所有匹配项
+ matches = re.findall(pattern, response.text, re.DOTALL)
+ # 打印结果
+ for match in matches:
+ link, title, _, date = match
+ print(f"链接: {link}")
+ print(f"标题: {title}")
+ print(f"发布时间: {date}")
+ print()
+
+# 匹配标题(包含'产教融合')、链接和发布时间
+for url in urls:
+ response = requests.get(url, headers=headers)
+ response.raise_for_status() # 检查请求是否成功
+ pattern = re.compile(r'(.+?产教融合.+?)([^<]+)')
+ matches = pattern.findall(response.text)
+ # 打印结果
+ for match in matches:
+ link, title, content, time = match
+ print(f"标题: {title.strip()}")
+ print(f"内容: {content.strip()}")
+ print(f"时间: {time.strip()}")
+ print(f"链接: {link}")
+ print()
+
+# 提取2024年3月份发布的文章数量以及所有文章标题
+march_2024_count = 0
+march_2024_titles = []
+for url in urls:
+ response = requests.get(url, headers=headers)
+ response.raise_for_status() # 检查请求是否成功
+ pattern = r'.*?(2024-03-\d\d)'
+ # 使用re.findall找到所有匹配的标题和日期
+ matches = re.findall(pattern, response.text)
+ # 遍历匹配结果
+ for title, date in matches:
+ if date.startswith('2024-03-'):
+ march_2024_count += 1
+ march_2024_titles.append(title)
+# 打印结果
+print(f"2024年3月份发布的文章数量: {march_2024_count}")
+print(f"所有文章标题:")
+for title in march_2024_titles:
+ print(title)
\ No newline at end of file