diff --git a/README.md b/README.md index 6376748..620b4c1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,60 @@ # python +#http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"} ->所有章节内容(名称,cid),%22 -> " +#章节内部内容 +#http://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|11348571","need_bookinfo":1} +#通过URL1得到小说全部章节,并且得到cid +#通过循环拿到每一个cid,cid拼接URL2,从而得到小说内容 + +#请求URL1只需要一次,在请求URL1之后会有多个章节一起下载,无需异步,同步即可 +#请求URL2,任务多同时跑,需要异步 + +import requests +import asyncio +import aiohttp +import aiofiles +import json + +""" +步骤: +1.同步操作:访问getCatalog 拿到所有章节的cid和名称 +2.异步操作:访问getChapterContent 下载所有的文章内容 +""" + +async def aiodownload(cid,b_id,title): + data={"book_id":b_id, + "cid":f"{b_id}|{cid}", + "need_bookinfo":1 + } + data = json.dumps(data) + url = f"http://dushu.baidu.com/api/pc/getChapterContent?data={data}" + + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + dic = await resp.json() + + async with aiofiles.open(title,mode="w",encoding="utf-8") as f: + await f.write(dic['data']['novel']['content'])#把小说内容写出 + + +async def getCatalog(url): + resp = requests.get(url)#发送请求 + #print(resp.text) + #print(resp.json()) + dic=resp.json() + tasks = [] + for item in dic['data']['novel']['items']: #item对应每一个章节的名称和cid + title = item['title'] + cid = item['cid'] + #print(cid, title) + #准备异步任务 + tasks.append(aiodownload(cid,b_id,title)) + + await asyncio.wait(tasks) + + +if __name__ =='__main__': + b_id="4306063500" + url='http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' + b_id + '"}' + #getCatalog(url) + asyncio.run(getCatalog(url))