import requests import csv, os from pprint import pprint headers = { "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", "content-type": "application/json;charset=UTF-8", "origin": "https://www.kaoyan.cn", "priority": "u=1, i", "referer": "https://www.kaoyan.cn/", "sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" } def file_write(file_path, data_title, file_data): # 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容 # 表头: data_title file_v = False try: # 检测CSV文件是否为空 if os.path.getsize(file_path) == 0: file_v = True except: file_v = True with open(file_path, 'a', encoding='UTF-8', newline='') as file: writer = csv.writer(file) if file_v: writer.writerow(data_title) writer.writerow(file_data) print('写入成功,本次下载已完成。') def get_html(school_id, year): url = "https://api.kaoyan.cn/pc/school/planList" data = { "school_id": school_id, "page": 1, "limit": 8000, "recruit_type": "", "year": year, "keyword": "081200", "is_apply": 2 } while True: try: response = requests.post(url, headers=headers, json=data) print(school_id, year, response) if response.status_code == 200: break except: print('请求出错,重试中。。。') # pprint(response.json()) res_json = response.json()['data'] data_list = res_json['data'] return data_list def get_degree_type_list(): url = "https://api.kaoyan.cn/pc/special/schoolListV2" data = { "page": 1, "limit": 500, "province_id": "", "type": "", "feature": "", "spe_id": "20364", "recruit_type": "", "sort_type": "", "english_subject": "", "math_subject": "" } response = requests.post(url, headers=headers, json=data) print(response) # pprint(response.json()) res_data_list = response.json()['data']['data'] return res_data_list def get_success(plan_id): url = "https://api.kaoyan.cn/pc/school/planDetail" data = { "plan_id": plan_id, "is_apply": 2 } while True: try: response = requests.post(url, headers=headers, json=data) print(plan_id, response) if response.status_code == 200: break except: print('请求出错,重试中。。。') # pprint(response.json()) data_dict = response.json()['data'] degree_type_name = data_dict['degree_type_name'] # 学位类别 year = data_dict['year'] # 招生年份 level1_code = data_dict['level1_code'] # 所属门类代码 level1_name = data_dict['level1_name'] # 所属门类 special_code = data_dict['special_code'] # 专业代码 special_name = data_dict['special_name'] # 专业 depart_name = data_dict['depart_name'] # 所属学院 recruit_type_name = data_dict['recruit_type_name'] # 学习方式 level2_code = data_dict['level2_code'] # 所属学科代码 level2_name = data_dict['level2_name'] # 所属学科 recruit_number = data_dict['recruit_number'] # 拟招生人数 research_area = data_dict['research_area'] # 研究方向 exam_subject = data_dict['exam_subject'] # 初试科目 exam_book = data_dict['exam_book'] # 参考书目 note = data_dict['note'] # 备注 file_data = [degree_type_name, year, level1_code, level1_name, special_code, special_name, depart_name, recruit_type_name, level2_code, level2_name, recruit_number, research_area, exam_subject, exam_book, note] return file_data def star(): data_title = ['学校名称', '学校类型', '省份', '学位类别', '招生年份', '所属门类代码', '所属门类', '专业代码', '专业', '所属学院', '学习方式', '所属学科代码', '所属学科', '拟招生人数', '研究方向', '初试科目', '参考书目', '备注', ] school_list = get_degree_type_list() for school in school_list: school_name = school['school_name'] # 学校名称 school_type_name = school['school_type_name'] # 学校类型 school_id = school['school_id'] # 查询ID province_name = school['province_name'] # 省份 # school_id = '149' year_list = ['2022', '2023', '2024'] for year in year_list: success_list = get_html(school_id, year) for success in success_list: plan_id = success['plan_id'] # 专业查询ID # print(plan_id) data = get_success(plan_id) file_data = [school_name, school_type_name, province_name] + data file_write('招生专业.csv', data_title, file_data) if __name__ == '__main__': star()