|
|
|
@ -0,0 +1,149 @@
|
|
|
|
|
import requests
|
|
|
|
|
import csv, os
|
|
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
"accept": "application/json, text/plain, */*",
|
|
|
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
|
|
"content-type": "application/json;charset=UTF-8",
|
|
|
|
|
"origin": "https://www.kaoyan.cn",
|
|
|
|
|
"priority": "u=1, i",
|
|
|
|
|
"referer": "https://www.kaoyan.cn/",
|
|
|
|
|
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
|
|
|
|
|
"sec-ch-ua-mobile": "?0",
|
|
|
|
|
"sec-ch-ua-platform": "\"Windows\"",
|
|
|
|
|
"sec-fetch-dest": "empty",
|
|
|
|
|
"sec-fetch-mode": "cors",
|
|
|
|
|
"sec-fetch-site": "same-site",
|
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def file_write(file_path, data_title, file_data):
|
|
|
|
|
# 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容
|
|
|
|
|
# 表头: data_title
|
|
|
|
|
|
|
|
|
|
file_v = False
|
|
|
|
|
try:
|
|
|
|
|
# 检测CSV文件是否为空
|
|
|
|
|
if os.path.getsize(file_path) == 0:
|
|
|
|
|
file_v = True
|
|
|
|
|
except:
|
|
|
|
|
file_v = True
|
|
|
|
|
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
|
|
|
|
|
writer = csv.writer(file)
|
|
|
|
|
if file_v:
|
|
|
|
|
writer.writerow(data_title)
|
|
|
|
|
writer.writerow(file_data)
|
|
|
|
|
print('写入成功,本次下载已完成。')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_html(school_id, year):
|
|
|
|
|
url = "https://api.kaoyan.cn/pc/school/planList"
|
|
|
|
|
data = {
|
|
|
|
|
"school_id": school_id,
|
|
|
|
|
"page": 1,
|
|
|
|
|
"limit": 8000,
|
|
|
|
|
"recruit_type": "",
|
|
|
|
|
"year": year,
|
|
|
|
|
"keyword": "081200",
|
|
|
|
|
"is_apply": 2
|
|
|
|
|
}
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
|
|
print(school_id, year, response)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
break
|
|
|
|
|
except:
|
|
|
|
|
print('请求出错,重试中。。。')
|
|
|
|
|
# pprint(response.json())
|
|
|
|
|
res_json = response.json()['data']
|
|
|
|
|
data_list = res_json['data']
|
|
|
|
|
return data_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_degree_type_list():
|
|
|
|
|
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
|
|
|
|
|
data = {
|
|
|
|
|
"page": 1,
|
|
|
|
|
"limit": 500,
|
|
|
|
|
"province_id": "",
|
|
|
|
|
"type": "",
|
|
|
|
|
"feature": "",
|
|
|
|
|
"spe_id": "20364",
|
|
|
|
|
"recruit_type": "",
|
|
|
|
|
"sort_type": "",
|
|
|
|
|
"english_subject": "",
|
|
|
|
|
"math_subject": ""
|
|
|
|
|
}
|
|
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
|
|
print(response)
|
|
|
|
|
# pprint(response.json())
|
|
|
|
|
res_data_list = response.json()['data']['data']
|
|
|
|
|
return res_data_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_success(plan_id):
|
|
|
|
|
url = "https://api.kaoyan.cn/pc/school/planDetail"
|
|
|
|
|
data = {
|
|
|
|
|
"plan_id": plan_id,
|
|
|
|
|
"is_apply": 2
|
|
|
|
|
}
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
|
|
print(plan_id, response)
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
break
|
|
|
|
|
except:
|
|
|
|
|
print('请求出错,重试中。。。')
|
|
|
|
|
|
|
|
|
|
# pprint(response.json())
|
|
|
|
|
data_dict = response.json()['data']
|
|
|
|
|
degree_type_name = data_dict['degree_type_name'] # 学位类别
|
|
|
|
|
year = data_dict['year'] # 招生年份
|
|
|
|
|
level1_code = data_dict['level1_code'] # 所属门类代码
|
|
|
|
|
level1_name = data_dict['level1_name'] # 所属门类
|
|
|
|
|
special_code = data_dict['special_code'] # 专业代码
|
|
|
|
|
special_name = data_dict['special_name'] # 专业
|
|
|
|
|
depart_name = data_dict['depart_name'] # 所属学院
|
|
|
|
|
recruit_type_name = data_dict['recruit_type_name'] # 学习方式
|
|
|
|
|
level2_code = data_dict['level2_code'] # 所属学科代码
|
|
|
|
|
level2_name = data_dict['level2_name'] # 所属学科
|
|
|
|
|
recruit_number = data_dict['recruit_number'] # 拟招生人数
|
|
|
|
|
research_area = data_dict['research_area'] # 研究方向
|
|
|
|
|
exam_subject = data_dict['exam_subject'] # 初试科目
|
|
|
|
|
exam_book = data_dict['exam_book'] # 参考书目
|
|
|
|
|
note = data_dict['note'] # 备注
|
|
|
|
|
file_data = [degree_type_name, year, level1_code, level1_name, special_code, special_name, depart_name, recruit_type_name,
|
|
|
|
|
level2_code, level2_name, recruit_number, research_area, exam_subject, exam_book, note]
|
|
|
|
|
return file_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def star():
|
|
|
|
|
data_title = ['学校名称', '学校类型', '省份', '学位类别', '招生年份', '所属门类代码', '所属门类', '专业代码',
|
|
|
|
|
'专业', '所属学院',
|
|
|
|
|
'学习方式', '所属学科代码', '所属学科', '拟招生人数', '研究方向', '初试科目', '参考书目', '备注', ]
|
|
|
|
|
school_list = get_degree_type_list()
|
|
|
|
|
for school in school_list:
|
|
|
|
|
school_name = school['school_name'] # 学校名称
|
|
|
|
|
school_type_name = school['school_type_name'] # 学校类型
|
|
|
|
|
school_id = school['school_id'] # 查询ID
|
|
|
|
|
province_name = school['province_name'] # 省份
|
|
|
|
|
# school_id = '149'
|
|
|
|
|
year_list = ['2022', '2023', '2024']
|
|
|
|
|
for year in year_list:
|
|
|
|
|
success_list = get_html(school_id, year)
|
|
|
|
|
for success in success_list:
|
|
|
|
|
plan_id = success['plan_id'] # 专业查询ID
|
|
|
|
|
# print(plan_id)
|
|
|
|
|
data = get_success(plan_id)
|
|
|
|
|
file_data = [school_name, school_type_name, province_name] + data
|
|
|
|
|
file_write('招生专业.csv', data_title, file_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
star()
|