ADD file via upload

9 months ago · fce5ca5ade
parent 87a8461e74
commit fce5ca5ade
1 changed files with 73 additions and 0 deletions
--- a/码源.txt
+++ b/码源.txt
@ -0,0 +1,73 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
+}
+def get_info(url):
+    wb_data = requests.get(url, headers=headers)
+    soup = BeautifulSoup(wb_data.text, 'html.parser')
+    ranks = soup.select('span.pc_temp_num')
+    titles = soup.select('div.pc_temp_songlist > ul > li > a')
+    times = soup.select('span.pc_temp_tips_r > span')
+    for rank, title, time in zip(ranks, titles, times):
+        data = {
+            'rank': rank.get_text().strip(),
+            'singer': title.get_text().split('-')[0].strip(),
+            'song': title.get_text().split('-')[1].strip() if len(title.get_text().split('-')) > 1 else '',
+            'time': time.get_text().strip()
+        }
+        print(data)
+if __name__ == '__main__':
+    urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1, 24)]
+    for url in urls:
+        get_info(url)
+        time.sleep(1)
+
+
+
+
+其中items.py文件中删出name前的#
+setting.py删除注释65-67
+meiju.py
+import scrapy
+from movie.items import MovieItem
+class MeijuSpider(scrapy.Spider):
+    name = "meiju"
+    allowed_domains = ["meijutt.com"]
+    start_urls = ['https://www.meijutt.tv/topiclist/2024xinjutop.html']
+
+    def parse(self, response):
+        movies = response.xpath('//div[@class="topic_box"]/ul')
+        for each_movie in movies:
+            item = MovieItem()
+            item['name'] = each_movie.xpath('./li[@class="font_14"]/a/@title').extract()[0]
+            yield item
+
+pipelines.py
+import json
+class MoviePipeline(object):
+    # def process_item(self, item, spider):
+    #     return item
+    def open_spider(self, spider):
+        self.file = open('log.txt', 'w', encoding='utf-8')
+    def close_spider(self, spider):
+        self.file.close()
+    def process_item(self, item, spider):
+        self.file.write(str(item) + '\n')
+
+
+
+
+import pandas as pd
+
+# 指定Excel文件路径
+excel_file_path = 'path_to_your_excel_file.xlsx'
+
+# 使用Pandas读取Excel文件
+# 这里假设Excel文件中只有一个sheet，如果有多个sheet，可以通过sheet_name参数指定
+df = pd.read_excel(excel_file_path)
+
+# 打印DataFrame的内容
+print(df)
+