You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
2.1 KiB
74 lines
2.1 KiB
# coding = utf-8
|
|
|
|
from pyquery import PyQuery as pq
|
|
import getvalue as gv
|
|
import requests
|
|
import getBooks
|
|
import os
|
|
|
|
path = './Books'
|
|
|
|
|
|
def del_file(path):
|
|
ls = os.listdir(path)
|
|
for i in ls:
|
|
c_path = os.path.join(path, i)
|
|
if os.path.isdir(c_path):
|
|
del_file(c_path)
|
|
else:
|
|
os.remove(c_path)
|
|
|
|
|
|
def mkdir(path):
|
|
if os.path.exists(path):
|
|
del_file(path)
|
|
else: os.mkdir(path)
|
|
|
|
|
|
def storebook(spath, BookId, BookName, Price, Author, introduce, Type, Imgae):
|
|
book_info = os.path.join(spath,BookName)
|
|
with open(book_info,'a',encoding='utf-8') as file:
|
|
file.write(BookId+'\n')
|
|
file.write(BookName+'\n')
|
|
file.write(str(Price)+'\n')
|
|
file.write(Author+'\n')
|
|
file.write(Type+'\n')
|
|
book_introduce = os.path.join(spath,BookName+".txt")
|
|
with open(book_introduce,'a',encoding='utf-8') as file:
|
|
file.write(introduce)
|
|
book_Image = os.path.join(spath,BookName+'.jpg')
|
|
context = requests.get(Imgae).content
|
|
with open(book_Image,'wb') as file:
|
|
file.write(context)
|
|
|
|
def main():
|
|
total = 1
|
|
getBooks.getBookinfo()
|
|
for page in getBooks.dict:
|
|
print(page)
|
|
store = os.path.join(path, page)
|
|
spath = store + '小说'
|
|
mkdir(spath)
|
|
with open(store, encoding='utf-8') as file:
|
|
line = file.readline()
|
|
while line:
|
|
url, bookname = line.split()[0], line.split()[1]
|
|
response = requests.get(url)
|
|
doc = pq(response.text)
|
|
html = str(doc)
|
|
BookId = str(total)
|
|
BookName = gv.getBookName(html)
|
|
Price = 111
|
|
Author = gv.getAuthor(html)
|
|
introduce = gv.getText(html)
|
|
Type = page
|
|
Imgae = gv.getImage(html)
|
|
storebook(spath, BookId, BookName, Price, Author, introduce, Type, Imgae)
|
|
print(total,BookName)
|
|
line = file.readline()
|
|
total = total + 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|