From d239ccf98ac244a2aadc4217700d5cf02dfc57da Mon Sep 17 00:00:00 2001
From: p27cbh5ik <lonia@aliyun.com>
Date: Wed, 12 Jun 2024 12:07:58 +0800
Subject: [PATCH] ADD file via upload

---
 main.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 main.py
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..56b3149
--- /dev/null
+++ b/main.py
@@ -0,0 +1,149 @@
+import re  # 导入正则表达式模块
+import os  # 导入操作系统相关模块
+import joblib  # 导入用于序列化和反序列化Python对象的模块
+import asyncio  # 导入异步I/O模块
+import aiohttp  # 异步HTTP客户端/服务器框架
+import requests as rq  # 导入用于发送HTTP请求的模块，起别名rq
+import pandas as pd  # 导入用于数据分析的库，起别名pd
+
+
+class getTopSecCom:
+    def __init__(self, top=None):
+        # 设置请求头信息
+        self.headers = {"Referer": "http://quote.eastmoney.com/",
+                        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"}
+        # 定义获取板块股票列表的URL
+        self.bk_url = "http://71.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124034348162124675374_1612595298605&pn=1&pz=85&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f62&fs=b:BK0655&fields=f12,f14&_=1612595298611"
+        # 定义雪球网的API地址
+        self.shares_api = "https://xueqiu.com/S/"
+        self.top = top
+        # 如果不存在文件夹"./useful_sec_com_list"，则获取股票公司代码列表
+        if not os.path.exists("./useful_sec_com_list"):
+            self.useful_sec_com_list = self.get_sec_com_code()
+        else:
+            # 否则，从文件中加载已有的列表数据
+            with open("./useful_sec_com_list", "rb") as fp:
+                self.useful_sec_com_list = joblib.load(fp)
+
+    # 获取股票公司代码列表
+    def get_sec_com_code(self):
+        # 发送HTTP请求，获取板块股票列表的HTML内容
+        html = rq.get(self.bk_url, headers=self.headers).content.decode("utf-8")
+        # 从HTML内容中提取股票公司列表信息
+        sec_com_list = eval(re.findall("\[(.*?)\]", html)[0])
+        # 过滤出有效的股票公司列表
+        useful_sec_com_list = [[i["f12"], i["f14"]] for i in sec_com_list if "ST" not in i["f14"]]
+
+        # 将股票代码格式化为统一的格式
+        for sec_com in useful_sec_com_list:
+            if sec_com[0][0] == "6":
+                sec_com[0] = "sh" + sec_com[0]
+            else:
+                sec_com[0] = "sz" + sec_com[0]
+
+        # 将有效的股票公司列表保存到文件中
+        with open("useful_sec_com_list", "wb") as fp:
+            joblib.dump(useful_sec_com_list, fp)
+
+        return useful_sec_com_list
+
+    # 异步方式获取股票详细信息
+    async def async_get_shares_details(self, sec_com, url):
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=self.headers) as response:
+                html = await response.text()
+                market_value = re.search("<td>总市值：<span>(.*?)亿</span>", html)
+                if market_value:
+                    return [*sec_com, market_value.groups()[0]]
+
+    # 异步方式获取所有股票的详细信息
+    async def async_get_all_shares(self):
+        tasks = []
+        for sec_com in self.useful_sec_com_list:
+            url = self.shares_api + sec_com[0]
+            tasks.append(
+                asyncio.create_task(
+                    self.async_get_shares_details(sec_com, url)
+                )
+            )
+        done, pending = await asyncio.wait(tasks)
+        return [share.result() for share in done if share.result()]
+
+    # 获取股票详细信息（同步方式）
+    def get_shares_details(self):
+        all_shares = []
+        for sec_com in self.useful_sec_com_list:
+            url = self.shares_api + sec_com[0]
+            response = rq.get(url, headers=self.headers).content.decode("utf-8")
+            market_value = re.search("<td>总市值：<span>(.*?)亿</span>", response)
+            if market_value:
+                all_shares.append([*sec_com, market_value.groups()[0]])
+        return all_shares
+
+    # 保存数据到不同类型的存储介质中
+    def yield_storage(self, save_path, storage_type="all"):
+        if storage_type == "txt" or storage_type == "all":
+            self.save_txt(save_path)  # 保存为文本文件
+        if storage_type == "csv" or storage_type == "all":
+            self.save_csv(save_path)  # 保存为CSV文件
+        if storage_type == "sql" or storage_type == "all":
+            self.save_sql(save_path)  # 保存到SQL数据库中
+
+    # 将数据保存为文本文件
+    def save_txt(self, save_path):
+        all_shares = asyncio.run(self.async_get_all_shares())
+        with open(save_path, 'w', encoding='utf-8') as f:
+            for share in all_shares:
+                f.write(f"{share[0]}, {share[1]}, {share[2]}\n")
+
+    # 将数据保存为CSV文件
+    def save_csv(self, save_path):
+        all_shares = asyncio.run(self.async_get_all_shares())
+        df = pd.DataFrame(all_shares, columns=["股票代码", "公司", "市值(亿)"])
+        df["市值(亿)"] = df["市值(亿)"].astype(float)
+        df.sort_values(by="市值(亿)", ascending=False, inplace=True)
+        df.to_csv(save_path, index=False, encoding='utf-8-sig')
+
+    # 将数据保存到SQL数据库中
+    def save_sql(self, save_path):
+        all_shares = asyncio.run(self.async_get_all_shares())
+        with open(save_path, 'w', encoding='utf-8') as f:
+            for share in all_shares:
+                f.write(
+                    f"INSERT INTO shares (股票代码, 公司, 市值亿) VALUES ('{share[0]}', '{share[1]}', {share[2]});\n")
+
+
+    # 统一接口，根据存储类型选择不同的保存
+    def yield_storage(self, save_path, storage_type="all"):
+        if storage_type == "txt" or storage_type == "all":
+            self.save_txt(f"{save_path}.txt")  # 修改这里，传递相同的文件名
+        if storage_type == "csv" or storage_type == "all":
+            self.save_csv(f"{save_path}.csv")  # 修改这里，传递相同的文件名
+        if storage_type == "sql" or storage_type == "all":
+            self.save_sql(f"{save_path}.sql")  # 修改这里，传递相同的文件名
+
+if __name__ == "__main__":
+    # 创建getTopSecCom类的实例对象
+    m = getTopSecCom()
+    save_path = "rank"
+
+    # 用户选择存储方式
+    storage_option = input("请选择存储方式：\n1. txt\n2. csv\n3. sql\n4. all\n请输入数字：")
+
+    # 根据用户选择的存储方式执行相应的操作
+    if storage_option == "1":
+        # 保存为文本文件
+        m.yield_storage(f"{save_path}.txt", "txt")
+    elif storage_option == "2":
+        # 保存为CSV文件
+        m.yield_storage(f"{save_path}.csv", "csv")
+    elif storage_option == "3":
+        # 保存到SQL数据库中
+        m.yield_storage(f"{save_path}.sql", "sql")
+    elif storage_option == "4":
+        # 全部保存
+        m.yield_storage(save_path, "all")
+    else:
+        # 输入错误提示
+        print("请输入正确的数字选项。")  # 提示用户输入正确的数字选项
+