diff --git a/复试分数线.py b/复试分数线.py new file mode 100644 index 0000000..06862d6 --- /dev/null +++ b/复试分数线.py @@ -0,0 +1,123 @@ +import requests +import csv, os +from pprint import pprint + + +headers = { + "accept": "application/json, text/plain, */*", + "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", + "content-type": "application/json;charset=UTF-8", + "origin": "https://www.kaoyan.cn", + "priority": "u=1, i", + "referer": "https://www.kaoyan.cn/", + "sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"Windows\"", + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-site", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" +} + + +def file_write(file_path, data_title, file_data): + # 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容 + # 表头: data_title + + file_v = False + try: + # 检测CSV文件是否为空 + if os.path.getsize(file_path) == 0: + file_v = True + except: + file_v = True + with open(file_path, 'a', encoding='UTF-8', newline='') as file: + writer = csv.writer(file) + if file_v: + writer.writerow(data_title) + writer.writerow(file_data) + print('写入成功,本次下载已完成。') + + +def get_html(year, school, page): + data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语', + '专业课一', '专业课二', '备注'] + school_name = school['school_name'] # 学校名称 + school_type_name = school['school_type_name'] # 学校类型 + school_id = school['school_id'] # 查询ID + province_name = school['province_name'] # 省份 + url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json" + while True: + try: + response = requests.get(url, headers=headers) + print(url, school_name, f'正在查询第{page}页', response) + if response.status_code == 200: + break + if response.status_code == 404: + return 0 + except: + print('请求出错,重试中,请稍等。。。') + res_data = response.json()['data'] + count = res_data['count'] + page_max = count // 10 + 1 + item_list = res_data['item'] + for item in item_list: + special_code = item['special_code'] # 专业代码 + degree_type_name = item['degree_type_name'] # 硕士类型 + special_name = item['special_name'] # 专业名称 + total = item['total'] # 总分 + politics = item['politics'] # 政治 + english = item['english'] # 英语 + special_one = item['special_one'] # 专业课一 + special_two = item['special_two'] # 专业课二 + note = item['note'] # 备注 + + file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code, + special_name, total, politics, english, special_one, special_two, note] + # print(file_data) + # # 所有分数数据写入文件 + # file_write('复试分数线_所有.csv', data_title, file_data) + + # 有符合条件的数据写入文件 + if special_code == '08' or special_code == '1812' or special_code == '081200': + # print(file_data) + file_write('复试分数线.csv', data_title, file_data) + return page_max # 返回数据页数 + + +def get_degree_type_list(): + # 请求学校列表 + url = "https://api.kaoyan.cn/pc/special/schoolListV2" + data = { + "page": 1, + "limit": 500, + "province_id": "", + "type": "", + "feature": "", + "spe_id": "20364", + "recruit_type": "", + "sort_type": "", + "english_subject": "", + "math_subject": "" + } + response = requests.post(url, headers=headers, json=data) + print(response) + # pprint(response.json()) + res_data_list = response.json()['data']['data'] + return res_data_list + + +def star(): + school_list = get_degree_type_list() + for school in school_list: + year_list = ['2022', '2023', '2024'] # 年份 + for year in year_list: + page = 1 + page_max = page + while page <= page_max and page != 0: + page_max = get_html(year, school, page) + page += 1 + + +if __name__ == '__main__': + star() diff --git a/复试分数线.xlsx b/复试分数线.xlsx new file mode 100644 index 0000000..7bbc0e0 Binary files /dev/null and b/复试分数线.xlsx differ diff --git a/招生专业.xlsx b/招生专业.xlsx new file mode 100644 index 0000000..557991a Binary files /dev/null and b/招生专业.xlsx differ diff --git a/招生信息.py b/招生信息.py new file mode 100644 index 0000000..b829329 --- /dev/null +++ b/招生信息.py @@ -0,0 +1,149 @@ +import requests +import csv, os +from pprint import pprint + + +headers = { + "accept": "application/json, text/plain, */*", + "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", + "content-type": "application/json;charset=UTF-8", + "origin": "https://www.kaoyan.cn", + "priority": "u=1, i", + "referer": "https://www.kaoyan.cn/", + "sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"Windows\"", + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-site", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" +} + + +def file_write(file_path, data_title, file_data): + # 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容 + # 表头: data_title + + file_v = False + try: + # 检测CSV文件是否为空 + if os.path.getsize(file_path) == 0: + file_v = True + except: + file_v = True + with open(file_path, 'a', encoding='UTF-8', newline='') as file: + writer = csv.writer(file) + if file_v: + writer.writerow(data_title) + writer.writerow(file_data) + print('写入成功,本次下载已完成。') + + +def get_html(school_id, year): + url = "https://api.kaoyan.cn/pc/school/planList" + data = { + "school_id": school_id, + "page": 1, + "limit": 8000, + "recruit_type": "", + "year": year, + "keyword": "081200", + "is_apply": 2 + } + while True: + try: + response = requests.post(url, headers=headers, json=data) + print(school_id, year, response) + if response.status_code == 200: + break + except: + print('请求出错,重试中。。。') + # pprint(response.json()) + res_json = response.json()['data'] + data_list = res_json['data'] + return data_list + + + +def get_degree_type_list(): + url = "https://api.kaoyan.cn/pc/special/schoolListV2" + data = { + "page": 1, + "limit": 500, + "province_id": "", + "type": "", + "feature": "", + "spe_id": "20364", + "recruit_type": "", + "sort_type": "", + "english_subject": "", + "math_subject": "" + } + response = requests.post(url, headers=headers, json=data) + print(response) + # pprint(response.json()) + res_data_list = response.json()['data']['data'] + return res_data_list + + +def get_success(plan_id): + url = "https://api.kaoyan.cn/pc/school/planDetail" + data = { + "plan_id": plan_id, + "is_apply": 2 + } + while True: + try: + response = requests.post(url, headers=headers, json=data) + print(plan_id, response) + if response.status_code == 200: + break + except: + print('请求出错,重试中。。。') + + # pprint(response.json()) + data_dict = response.json()['data'] + degree_type_name = data_dict['degree_type_name'] # 学位类别 + year = data_dict['year'] # 招生年份 + level1_code = data_dict['level1_code'] # 所属门类代码 + level1_name = data_dict['level1_name'] # 所属门类 + special_code = data_dict['special_code'] # 专业代码 + special_name = data_dict['special_name'] # 专业 + depart_name = data_dict['depart_name'] # 所属学院 + recruit_type_name = data_dict['recruit_type_name'] # 学习方式 + level2_code = data_dict['level2_code'] # 所属学科代码 + level2_name = data_dict['level2_name'] # 所属学科 + recruit_number = data_dict['recruit_number'] # 拟招生人数 + research_area = data_dict['research_area'] # 研究方向 + exam_subject = data_dict['exam_subject'] # 初试科目 + exam_book = data_dict['exam_book'] # 参考书目 + note = data_dict['note'] # 备注 + file_data = [degree_type_name, year, level1_code, level1_name, special_code, special_name, depart_name, recruit_type_name, + level2_code, level2_name, recruit_number, research_area, exam_subject, exam_book, note] + return file_data + + +def star(): + data_title = ['学校名称', '学校类型', '省份', '学位类别', '招生年份', '所属门类代码', '所属门类', '专业代码', + '专业', '所属学院', + '学习方式', '所属学科代码', '所属学科', '拟招生人数', '研究方向', '初试科目', '参考书目', '备注', ] + school_list = get_degree_type_list() + for school in school_list: + school_name = school['school_name'] # 学校名称 + school_type_name = school['school_type_name'] # 学校类型 + school_id = school['school_id'] # 查询ID + province_name = school['province_name'] # 省份 + # school_id = '149' + year_list = ['2022', '2023', '2024'] + for year in year_list: + success_list = get_html(school_id, year) + for success in success_list: + plan_id = success['plan_id'] # 专业查询ID + # print(plan_id) + data = get_success(plan_id) + file_data = [school_name, school_type_name, province_name] + data + file_write('招生专业.csv', data_title, file_data) + + +if __name__ == '__main__': + star()