|
|
|
|
@ -1,60 +0,0 @@
|
|
|
|
|
# python
|
|
|
|
|
#http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"} ->所有章节内容(名称,cid),%22 -> "
|
|
|
|
|
#章节内部内容
|
|
|
|
|
#http://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|11348571","need_bookinfo":1}
|
|
|
|
|
|
|
|
|
|
#通过URL1得到小说全部章节,并且得到cid
|
|
|
|
|
#通过循环拿到每一个cid,cid拼接URL2,从而得到小说内容
|
|
|
|
|
|
|
|
|
|
#请求URL1只需要一次,在请求URL1之后会有多个章节一起下载,无需异步,同步即可
|
|
|
|
|
#请求URL2,任务多同时跑,需要异步
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
import asyncio
|
|
|
|
|
import aiohttp
|
|
|
|
|
import aiofiles
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
步骤:
|
|
|
|
|
1.同步操作:访问getCatalog 拿到所有章节的cid和名称
|
|
|
|
|
2.异步操作:访问getChapterContent 下载所有的文章内容
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
async def aiodownload(cid,b_id,title):
|
|
|
|
|
data={"book_id":b_id,
|
|
|
|
|
"cid":f"{b_id}|{cid}",
|
|
|
|
|
"need_bookinfo":1
|
|
|
|
|
}
|
|
|
|
|
data = json.dumps(data)
|
|
|
|
|
url = f"http://dushu.baidu.com/api/pc/getChapterContent?data={data}"
|
|
|
|
|
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
|
async with session.get(url) as resp:
|
|
|
|
|
dic = await resp.json()
|
|
|
|
|
|
|
|
|
|
async with aiofiles.open(title,mode="w",encoding="utf-8") as f:
|
|
|
|
|
await f.write(dic['data']['novel']['content'])#把小说内容写出
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def getCatalog(url):
|
|
|
|
|
resp = requests.get(url)#发送请求
|
|
|
|
|
#print(resp.text)
|
|
|
|
|
#print(resp.json())
|
|
|
|
|
dic=resp.json()
|
|
|
|
|
tasks = []
|
|
|
|
|
for item in dic['data']['novel']['items']: #item对应每一个章节的名称和cid
|
|
|
|
|
title = item['title']
|
|
|
|
|
cid = item['cid']
|
|
|
|
|
#print(cid, title)
|
|
|
|
|
#准备异步任务
|
|
|
|
|
tasks.append(aiodownload(cid,b_id,title))
|
|
|
|
|
|
|
|
|
|
await asyncio.wait(tasks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ =='__main__':
|
|
|
|
|
b_id="4306063500"
|
|
|
|
|
url='http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' + b_id + '"}'
|
|
|
|
|
#getCatalog(url)
|
|
|
|
|
asyncio.run(getCatalog(url))
|