Update README.md

3 changed files with 6 additions and 99 deletions
--- a/README.md
+++ b/README.md
@ -1,10 +1,6 @@
-# 久久小说网小说爬取
-本程序可分类别下载[久久小说网](https://txt909.com/)所有的小说
-特性：
- 本程序使用**requests**和**BeautifulSoup**实现网页访问与解析
- 可通过变量`category`修改下载的小说分类
- 下载的小说分别存放在对应分类下的目录中
- 可通过变量`start_page`和`end_page`来指定下载的页数范围
- 会自行判断是否已下载过某篇小说避免重复下载
- 每下载一篇小说会在终端输出相应信息
- 更多特性请自行探索
+# Spider
+本项目包含各种网站的爬虫，持续更新，~~时间少，更新慢~~
+项目列表~~（包括画的大饼）~~
+- [x] [久久小说网小说爬取](%E4%B9%85%E4%B9%85%E5%B0%8F%E8%AF%B4%E7%BD%91)
+- [ ] 必应每日壁纸爬取
+- [ ] B站视频爬取
--- a/久久小说网.py
+++ b/久久小说网.py
@ -1,58 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import os
-
-
-def get_download_link(url, page):
-    '''返回小说的名字和下载链接'''
-    resp = requests.get(f'{url}{page}.html', headers=headers)
-    html = resp.content.decode('utf-8')
-    soup = BeautifulSoup(html, 'lxml')
-    div = soup.find_all('div', attrs={'class': 'listbg'})
-    novel_dic = {a.find_all('a')[0].attrs['title']: a.find_all('a')[
-        0].attrs['href'][5:-5] for a in div}
-    return novel_dic
-
-
-def download(name, href, page, x):
-    '''下载小说'''
-    name = name.replace('?', '')
-    if f'{name}.txt' in os.listdir(category):
-        print(f'{name} 已存在')
-        return
-    link = f'http://www.vbiquge.co/api/txt_down.php?articleid={href}&articlename={name}'
-    resp = requests.get(link, headers=headers).text
-    with open(f'{category}/{name}.txt', 'w', encoding='utf-8') as f:
-        f.write(resp)
-    print(f'正在下载第{page}页第{x}篇：{name}')
-
-
-headers = {
-    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
-}
-categories = {
-    '穿越小说': 'https://www.txt909.com/html/chuanyue/',
-    '言情小说': 'https://www.txt909.com/html/yanqing/',
-    '现代都市': 'https://www.txt909.com/html/dushi/',
-    '耽美百合': 'https://www.txt909.com/html/baihe/',
-    '历史架空': 'https://www.txt909.com/html/lishi/',
-    '美文同人': 'https://www.txt909.com/html/tongren/',
-    '武侠仙侠': 'https://www.txt909.com/html/wuxia/',
-    '玄幻小说': 'https://www.txt909.com/html/xuanhuan/',
-    '惊悚悬疑': 'https://www.txt909.com/html/jingsong/',
-    '科幻小说': 'https://www.txt909.com/html/kehuan/',
-    '网游竞技': 'https://www.txt909.com/html/wangyou/'}
-category = '科幻小说'
-start_page = 1
-end_page = 200
-
-if categories.get(category, 0):
-    if not os.path.exists('./'+category):
-        os.mkdir('./'+category)
-
-    for page in range(start_page, end_page):
-        novel_dic = get_download_link(categories[category], page)
-        x = 1
-        for name, href in novel_dic.items():
-            download(name, href, page, x)
-            x += 1
--- a/久久小说网爬取.py
+++ b/久久小说网爬取.py
@ -1,31 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import os
-
-headers = {
-    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
-}
-
-category = '武侠'
-url = 'https://www.txt909.com/html/wuxia/'
-for i in range(1, 208):
-    
-    resp = requests.get(f'{url}{i}.html', headers=headers)
-    html = resp.content.decode('utf-8')
-    soup = BeautifulSoup(html, 'lxml')
-    div = soup.find_all('div', attrs={'class': 'listbg'})
-    novel_pages = {a.find_all('a')[0].attrs['title']: a.find_all('a')[
-        0].attrs['href'][5:-5] for a in div}
-    x = 1
-    for name, href in novel_pages.items():
-        name = name.replace('?', '')
-        if f'{name}.txt' in os.listdir(category):
-            print(f'{name} 已存在')
-            continue
-        link = f'http://www.vbiquge.co/api/txt_down.php?articleid={href}&articlename={name}'
-        resp = requests.get(link, headers=headers).text
-        with open(f'{category}/{name}.txt', 'w', encoding='utf-8') as f:
-            f.write(resp)
-        print(f'正在下载第{i}页第{x}篇：{name}')
-        x += 1
-print('下载完毕')
Author	SHA1	Message	Date
p5htmrsic	57f47d46f0	Update README.md	4 years ago
p5htmrsic	6456a76a07	Update README.md	4 years ago
p5htmrsic	5ba6c20afa	Update README.md	4 years ago
p5htmrsic	96b9f36017	Update README.md	4 years ago