diff --git a/src/宠物知识.py b/src/宠物知识.py new file mode 100644 index 0000000..47aa03d --- /dev/null +++ b/src/宠物知识.py @@ -0,0 +1,91 @@ +import requests +import re + +def get_html(url): + try: + response = requests.get(url,headers=headers) + response.encoding = 'GBK' + response.encoding = 'utf-8' +# response.encoding = 'gbk' + html = response.text + return html + except: + print('请求网址出错') + +def parse_url_dog(html): + pattern = re.compile('
(.*?)
',re.S) + items = re.findall(pattern,html) + return items + + +def write_item_to_file(item,name): + with open('问答\\' + str(name)+'知识.txt', 'a', encoding='ANSI') as f: + f.write('问:' + item[0] + '?\n') + f.write('答:' + item[1] + '\n') + f.close() + +headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',} +url_main = 'https://www.chongshe.cn/' +html_main = get_html(url_main) +dog_urls = parse_url_dog(html_main) #['vdys', 'vdjk', 'vdxl', 'vdsh', 'vdfy'] +cat_urls = parse_url_cat(html_main) + +urls = [] +for dog_url in dog_urls: + tmp = 'https://www.chongshe.cn/vod/' + str(dog_url) + html_dog = get_html(tmp) + urls = urls + parse_url(html_dog) +for cat_url in cat_urls: + tmp = 'https://www.chongshe.cn/vod/' + str(cat_url) + html_cat = get_html(tmp) + urls = urls + parse_url(html_cat) + +for i in range(len(urls)): + if i%2==1: + for url in urls[i]: + tmp = 'https://www.chongshe.cn/vod' + str(url) + html = get_html(tmp) + items = parse_result(html) + for j in range(len(items)): + write_item_to_file(eval(str(items[j])),urls[i-1][0]) + +#添加进目录 + ''' +with open('catalogue.txt','r', encoding='ANSI') as f: + line_data = '' + for line in f: + line_data += line + if line == '狗狗\n': + line_data += '\t常见问题\n' + for i in range(0,10,2): + line_data += ('\t\t' + urls[i][0] + '\n') + if line == '猫猫\n': + line_data += '\t常见问题\n' + for i in range(10,len(urls),2): + line_data += ('\t\t' + urls[i][0] + '\n') + f.close() + +with open('catalogue.txt','w', encoding='ANSI') as f: + f.write(line_data) + f.close()''' \ No newline at end of file