You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
test2/招生信息.py

150 lines
5.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
import csv, os
from pprint import pprint
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.kaoyan.cn",
"priority": "u=1, i",
"referer": "https://www.kaoyan.cn/",
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
def file_write(file_path, data_title, file_data):
# 数据写入CSV文件如果文件为空写入tile如果不为空续写内容
# 表头: data_title
file_v = False
try:
# 检测CSV文件是否为空
if os.path.getsize(file_path) == 0:
file_v = True
except:
file_v = True
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
writer = csv.writer(file)
if file_v:
writer.writerow(data_title)
writer.writerow(file_data)
print('写入成功,本次下载已完成。')
def get_html(school_id, year):
url = "https://api.kaoyan.cn/pc/school/planList"
data = {
"school_id": school_id,
"page": 1,
"limit": 8000,
"recruit_type": "",
"year": year,
"keyword": "081200",
"is_apply": 2
}
while True:
try:
response = requests.post(url, headers=headers, json=data)
print(school_id, year, response)
if response.status_code == 200:
break
except:
print('请求出错,重试中。。。')
# pprint(response.json())
res_json = response.json()['data']
data_list = res_json['data']
return data_list
def get_degree_type_list():
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
data = {
"page": 1,
"limit": 500,
"province_id": "",
"type": "",
"feature": "",
"spe_id": "20364",
"recruit_type": "",
"sort_type": "",
"english_subject": "",
"math_subject": ""
}
response = requests.post(url, headers=headers, json=data)
print(response)
# pprint(response.json())
res_data_list = response.json()['data']['data']
return res_data_list
def get_success(plan_id):
url = "https://api.kaoyan.cn/pc/school/planDetail"
data = {
"plan_id": plan_id,
"is_apply": 2
}
while True:
try:
response = requests.post(url, headers=headers, json=data)
print(plan_id, response)
if response.status_code == 200:
break
except:
print('请求出错,重试中。。。')
# pprint(response.json())
data_dict = response.json()['data']
degree_type_name = data_dict['degree_type_name'] # 学位类别
year = data_dict['year'] # 招生年份
level1_code = data_dict['level1_code'] # 所属门类代码
level1_name = data_dict['level1_name'] # 所属门类
special_code = data_dict['special_code'] # 专业代码
special_name = data_dict['special_name'] # 专业
depart_name = data_dict['depart_name'] # 所属学院
recruit_type_name = data_dict['recruit_type_name'] # 学习方式
level2_code = data_dict['level2_code'] # 所属学科代码
level2_name = data_dict['level2_name'] # 所属学科
recruit_number = data_dict['recruit_number'] # 拟招生人数
research_area = data_dict['research_area'] # 研究方向
exam_subject = data_dict['exam_subject'] # 初试科目
exam_book = data_dict['exam_book'] # 参考书目
note = data_dict['note'] # 备注
file_data = [degree_type_name, year, level1_code, level1_name, special_code, special_name, depart_name, recruit_type_name,
level2_code, level2_name, recruit_number, research_area, exam_subject, exam_book, note]
return file_data
def star():
data_title = ['学校名称', '学校类型', '省份', '学位类别', '招生年份', '所属门类代码', '所属门类', '专业代码',
'专业', '所属学院',
'学习方式', '所属学科代码', '所属学科', '拟招生人数', '研究方向', '初试科目', '参考书目', '备注', ]
school_list = get_degree_type_list()
for school in school_list:
school_name = school['school_name'] # 学校名称
school_type_name = school['school_type_name'] # 学校类型
school_id = school['school_id'] # 查询ID
province_name = school['province_name'] # 省份
# school_id = '149'
year_list = ['2022', '2023', '2024']
for year in year_list:
success_list = get_html(school_id, year)
for success in success_list:
plan_id = success['plan_id'] # 专业查询ID
# print(plan_id)
data = get_success(plan_id)
file_data = [school_name, school_type_name, province_name] + data
file_write('招生专业.csv', data_title, file_data)
if __name__ == '__main__':
star()