From 7752b3b4fecb5f676adebb613b8f7240932e8915 Mon Sep 17 00:00:00 2001 From: p6sitlwaz <284768853@qq.com> Date: Tue, 18 Jun 2024 17:47:05 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E6=9C=9F=E6=9C=ABzy.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 期末zy.py | 104 ---------------------------------------------------- 1 file changed, 104 deletions(-) delete mode 100644 期末zy.py diff --git a/期末zy.py b/期末zy.py deleted file mode 100644 index 6ff9691..0000000 --- a/期末zy.py +++ /dev/null @@ -1,104 +0,0 @@ -from bs4 import BeautifulSoup -import xlwt -import requests -def ask_url(url): - head = { - "User-Agent": -"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" - } - try: - r = requests.get(url, headers=head, timeout=30) - r.raise_for_status() - r.encoding = 'utf-8' - return r.text - except: - return "" - -def get_data(base_url): - data_list = [] - - for i in range(0, 10): - url = base_url + str(i + 1) - html = ask_url(url) - if html == "": - continue - soup = BeautifulSoup(html, 'html.parser') - for item in soup.find_all('div', class_="result_item"): - data = {} - - if item.div.p.span.string == "疾病": - data['diseaseName'] = item.div.p.a.string - - symptoms = [] - p = item.find('p', class_='result_item_content_label') - for symptom in p.find_all('a'): - symptoms.append(symptom.string) - - sub_url = item.div.p.a.attrs["href"] - sub_html = ask_url(sub_url) - if sub_html == "": - continue - sub_soup = BeautifulSoup(sub_html, 'html.parser') - - information_ul = sub_soup.find('ul', class_="information_ul") - for detail in information_ul.find_all('li'): - if detail.i.string == '别名:': - data['diseaseAlias'] = detail.span.string - elif detail.i.string == '发病部位:': - data['siteOfOnset'] = [] - for site in detail.span.find_all('a'): - data['siteOfOnset'].append(site.string) - elif detail.i.string == '传染性:': - data['infectivity'] = detail.span.string - elif detail.i.string == '多发人群:': - data['multiplePopulation'] = detail.span.string - elif detail.i.string == '并发症:': - data['complication'] = [] - for complication in detail.span.find_all('a'): - data['complication'].append(complication.string) - elif detail.i.string == '挂号科室:': - data['registrationDepartment'] = [] - for department in detail.span.find_all('a'): - data['registrationDepartment'].append(department.string) - elif detail.i.string == '临床检查:': - data['clinicalExamination'] = [] - for examination in detail.span.find_all('a'): - data['clinicalExamination'].append(examination.string) - elif detail.i.string == '典型症状:': - for symptom in detail.span.find_all('a'): - symptoms.append(symptom.string) - data['commonDrugs'] = symptoms - - information_ul1 = sub_soup.find('ul', class_="information_ul information_ul_bottom") - for detail in information_ul1.find_all('li'): - if detail.i.string == '常用药品:': - data['commonDrugs'] = [] - for drug in detail.span.find_all('a'): - data['commonDrugs'].append(drug.string) - - data_list.append(data) - - return data_list - -def save_data(data_list, save_path): - book = xlwt.Workbook(encoding='utf-8', style_compression=0) - sheet = book.add_sheet("疾病", cell_overwrite_ok=True) - col = ("diseaseName", "diseaseAlias", "siteOfOnset", "infectivity", "multiplePopulation", "complication", "registrationDepartment", "clinicalExamination", "commonDrugs") - length = len(data_list) - for i in range(0, 9): - sheet.write(0, i, col[i]) - for i in range(0, length): - # print("\r当前进度:{:.2f}%".format((i + 1) * 100 / length), end="") - data = data_list[i] - for j in range(0, 9): - if col[j] in data: - sheet.write(i + 1, j, data[col[j]]) - book.save(save_path) - return "" - -if __name__ == "__main__": - base_url = "https://jbk.39.net/bw/jizhenke_p" - save_path = "D:\数据采集\PycharmProjects\pythonProject\数据采集\文件\疾病.xls" - - data_list = get_data(base_url) - save_data(data_list, save_path)