import requests import csv, os from pprint import pprint headers = { "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", "content-type": "application/json;charset=UTF-8", "origin": "https://www.kaoyan.cn", "priority": "u=1, i", "referer": "https://www.kaoyan.cn/", "sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" } def file_write(file_path, data_title, file_data): # 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容 # 表头: data_title file_v = False try: # 检测CSV文件是否为空 if os.path.getsize(file_path) == 0: file_v = True except: file_v = True with open(file_path, 'a', encoding='UTF-8', newline='') as file: writer = csv.writer(file) if file_v: writer.writerow(data_title) writer.writerow(file_data) print('写入成功,本次下载已完成。') def get_html(year, school, page): data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语', '专业课一', '专业课二', '备注'] school_name = school['school_name'] # 学校名称 school_type_name = school['school_type_name'] # 学校类型 school_id = school['school_id'] # 查询ID province_name = school['province_name'] # 省份 url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json" while True: try: response = requests.get(url, headers=headers) print(url, school_name, f'正在查询第{page}页', response) if response.status_code == 200: break if response.status_code == 404: return 0 except: print('请求出错,重试中,请稍等。。。') res_data = response.json()['data'] count = res_data['count'] page_max = count // 10 + 1 item_list = res_data['item'] for item in item_list: special_code = item['special_code'] # 专业代码 degree_type_name = item['degree_type_name'] # 硕士类型 special_name = item['special_name'] # 专业名称 total = item['total'] # 总分 politics = item['politics'] # 政治 english = item['english'] # 英语 special_one = item['special_one'] # 专业课一 special_two = item['special_two'] # 专业课二 note = item['note'] # 备注 file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code, special_name, total, politics, english, special_one, special_two, note] # print(file_data) # # 所有分数数据写入文件 # file_write('复试分数线_所有.csv', data_title, file_data) # 有符合条件的数据写入文件 if special_code == '08' or special_code == '1812' or special_code == '081200': # print(file_data) file_write('复试分数线.csv', data_title, file_data) return page_max # 返回数据页数 def get_degree_type_list(): # 请求学校列表 url = "https://api.kaoyan.cn/pc/special/schoolListV2" data = { "page": 1, "limit": 500, "province_id": "", "type": "", "feature": "", "spe_id": "20364", "recruit_type": "", "sort_type": "", "english_subject": "", "math_subject": "" } response = requests.post(url, headers=headers, json=data) print(response) # pprint(response.json()) res_data_list = response.json()['data']['data'] return res_data_list def star(): school_list = get_degree_type_list() for school in school_list: year_list = ['2022', '2023', '2024'] # 年份 for year in year_list: page = 1 page_max = page while page <= page_max and page != 0: page_max = get_html(year, school, page) page += 1 if __name__ == '__main__': star()