diff --git a/src/宠物知识.py b/src/宠物知识.py deleted file mode 100644 index 47aa03d..0000000 --- a/src/宠物知识.py +++ /dev/null @@ -1,91 +0,0 @@ -import requests -import re - -def get_html(url): - try: - response = requests.get(url,headers=headers) - response.encoding = 'GBK' - response.encoding = 'utf-8' -# response.encoding = 'gbk' - html = response.text - return html - except: - print('请求网址出错') - -def parse_url_dog(html): - pattern = re.compile('
  • 狗狗..
  • ',re.S) - items = re.findall(pattern,html) - for i in range(len(items)): - items[i] = items[i][-5:-1:] - return items - -def parse_url_cat(html): - pattern = re.compile('
  • 猫猫..
  • ',re.S) - items = re.findall(pattern,html) - for i in range(len(items)): - items[i] = items[i][-5:-1:] - return items - -def parse_url(html): - pattern1 = re.compile('
  • .*?',re.S) - items1 = re.findall(pattern1,html) - pattern2 = re.compile('(.*?)-我的宠舍',re.S) - items2 = re.findall(pattern2,html) - return [items2,items1] - -def parse_result(html): - pattern = re.compile('(.*?)-我的宠舍.*?

    (.*?)

    ',re.S) - items = re.findall(pattern,html) - return items - - -def write_item_to_file(item,name): - with open('问答\\' + str(name)+'知识.txt', 'a', encoding='ANSI') as f: - f.write('问:' + item[0] + '?\n') - f.write('答:' + item[1] + '\n') - f.close() - -headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',} -url_main = 'https://www.chongshe.cn/' -html_main = get_html(url_main) -dog_urls = parse_url_dog(html_main) #['vdys', 'vdjk', 'vdxl', 'vdsh', 'vdfy'] -cat_urls = parse_url_cat(html_main) - -urls = [] -for dog_url in dog_urls: - tmp = 'https://www.chongshe.cn/vod/' + str(dog_url) - html_dog = get_html(tmp) - urls = urls + parse_url(html_dog) -for cat_url in cat_urls: - tmp = 'https://www.chongshe.cn/vod/' + str(cat_url) - html_cat = get_html(tmp) - urls = urls + parse_url(html_cat) - -for i in range(len(urls)): - if i%2==1: - for url in urls[i]: - tmp = 'https://www.chongshe.cn/vod' + str(url) - html = get_html(tmp) - items = parse_result(html) - for j in range(len(items)): - write_item_to_file(eval(str(items[j])),urls[i-1][0]) - -#添加进目录 - ''' -with open('catalogue.txt','r', encoding='ANSI') as f: - line_data = '' - for line in f: - line_data += line - if line == '狗狗\n': - line_data += '\t常见问题\n' - for i in range(0,10,2): - line_data += ('\t\t' + urls[i][0] + '\n') - if line == '猫猫\n': - line_data += '\t常见问题\n' - for i in range(10,len(urls),2): - line_data += ('\t\t' + urls[i][0] + '\n') - f.close() - -with open('catalogue.txt','w', encoding='ANSI') as f: - f.write(line_data) - f.close()''' \ No newline at end of file