From fbdfa8382617293b58ed590a2bed768ae4ba3f6f Mon Sep 17 00:00:00 2001
From: pt5ix8vk3 <2686237691@qq.com>
Date: Thu, 27 Jun 2024 15:27:25 +0800
Subject: [PATCH] ADD file via upload

---
 getNet.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 getNet.py

diff --git a/getNet.py b/getNet.py
new file mode 100644
index 0000000..8b40bea
--- /dev/null
+++ b/getNet.py
@@ -0,0 +1,65 @@
+from bs4 import BeautifulSoup
+import requests
+from datetime import datetime
+import json
+
+# 设置请求头
+headers = {
+    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1"
+}
+
+# 发送请求获取网页内容
+url = "http://www.xinhuanet.com/politicspro/"
+response = requests.get(url, headers=headers)
+html = response.text
+
+# 使用BeautifulSoup解析网页内容
+soup = BeautifulSoup(html, 'html.parser')
+
+# 提取标题
+titles = []
+divs_with_titles = soup.select('.tit')  # 选择所有class为'tit'的元素
+for div in divs_with_titles:
+    a_tag = div.find('a')  # 在每个div中找到第一个'a'标签
+    if a_tag:
+        title = a_tag.get_text(strip=True)  # 获取标签文本并去除空白字符
+        titles.append(title)
+
+# 提取其他内容
+other_content = []
+divs_with_other = soup.select('.xhwp_list')  # 选择所有class为'xhwp_list'的元素
+for ul in divs_with_other:
+    lis = ul.find_all('li')  # 找到每个'ul'中的所有'li'元素
+    for li in lis:
+        content = li.get_text(strip=True)  # 获取元素文本并去除空白字符
+        other_content.append(content)
+
+# 格式化输出和时间戳
+timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")  # 获取当前时间并格式化
+output_file_txt = f"xinhuanet_data_{timestamp}.txt"  # 输出文本文件名包含时间戳
+output_file_json = f"xinhuanet_data_{timestamp}.json"  # 输出JSON文件名包含时间戳
+
+# 写入文本文件
+with open(output_file_txt, 'w', encoding='utf-8') as file:
+    file.write("=== 标题 ===\n\n")
+    for i, title in enumerate(titles, start=1):
+        file.write(f"{i}. {title}\n")
+
+    file.write("\n=== 其他内容 ===\n\n")
+    for i, content in enumerate(other_content, start=1):
+        file.write(f"{i}. {content}\n")
+
+print(f"数据已写入文本文件 {output_file_txt}")
+
+# 准备数据写入JSON文件
+data = {
+    "timestamp": timestamp,
+    "titles": titles,
+    "other_content": other_content
+}
+
+# 写入JSON文件
+with open(output_file_json, 'w', encoding='utf-8') as json_file:
+    json.dump(data, json_file, ensure_ascii=False, indent=4)
+
+print(f"数据已写入JSON文件 {output_file_json}")