You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
test2/招生信息.py

150 lines
5.3 KiB

3 weeks ago
import requests
import csv, os
from pprint import pprint
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.kaoyan.cn",
"priority": "u=1, i",
"referer": "https://www.kaoyan.cn/",
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
def file_write(file_path, data_title, file_data):
# 数据写入CSV文件如果文件为空写入tile如果不为空续写内容
# 表头: data_title
file_v = False
try:
# 检测CSV文件是否为空
if os.path.getsize(file_path) == 0:
file_v = True
except:
file_v = True
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
writer = csv.writer(file)
if file_v:
writer.writerow(data_title)
writer.writerow(file_data)
print('写入成功,本次下载已完成。')
def get_html(school_id, year):
url = "https://api.kaoyan.cn/pc/school/planList"
data = {
"school_id": school_id,
"page": 1,
"limit": 8000,
"recruit_type": "",
"year": year,
"keyword": "081200",
"is_apply": 2
}
while True:
try:
response = requests.post(url, headers=headers, json=data)
print(school_id, year, response)
if response.status_code == 200:
break
except:
print('请求出错,重试中。。。')
# pprint(response.json())
res_json = response.json()['data']
data_list = res_json['data']
return data_list
def get_degree_type_list():
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
data = {
"page": 1,
"limit": 500,
"province_id": "",
"type": "",
"feature": "",
"spe_id": "20364",
"recruit_type": "",
"sort_type": "",
"english_subject": "",
"math_subject": ""
}
response = requests.post(url, headers=headers, json=data)
print(response)
# pprint(response.json())
res_data_list = response.json()['data']['data']
return res_data_list
def get_success(plan_id):
url = "https://api.kaoyan.cn/pc/school/planDetail"
data = {
"plan_id": plan_id,
"is_apply": 2
}
while True:
try:
response = requests.post(url, headers=headers, json=data)
print(plan_id, response)
if response.status_code == 200:
break
except:
print('请求出错,重试中。。。')
# pprint(response.json())
data_dict = response.json()['data']
degree_type_name = data_dict['degree_type_name'] # 学位类别
year = data_dict['year'] # 招生年份
level1_code = data_dict['level1_code'] # 所属门类代码
level1_name = data_dict['level1_name'] # 所属门类
special_code = data_dict['special_code'] # 专业代码
special_name = data_dict['special_name'] # 专业
depart_name = data_dict['depart_name'] # 所属学院
recruit_type_name = data_dict['recruit_type_name'] # 学习方式
level2_code = data_dict['level2_code'] # 所属学科代码
level2_name = data_dict['level2_name'] # 所属学科
recruit_number = data_dict['recruit_number'] # 拟招生人数
research_area = data_dict['research_area'] # 研究方向
exam_subject = data_dict['exam_subject'] # 初试科目
exam_book = data_dict['exam_book'] # 参考书目
note = data_dict['note'] # 备注
file_data = [degree_type_name, year, level1_code, level1_name, special_code, special_name, depart_name, recruit_type_name,
level2_code, level2_name, recruit_number, research_area, exam_subject, exam_book, note]
return file_data
def star():
data_title = ['学校名称', '学校类型', '省份', '学位类别', '招生年份', '所属门类代码', '所属门类', '专业代码',
'专业', '所属学院',
'学习方式', '所属学科代码', '所属学科', '拟招生人数', '研究方向', '初试科目', '参考书目', '备注', ]
school_list = get_degree_type_list()
for school in school_list:
school_name = school['school_name'] # 学校名称
school_type_name = school['school_type_name'] # 学校类型
school_id = school['school_id'] # 查询ID
province_name = school['province_name'] # 省份
# school_id = '149'
year_list = ['2022', '2023', '2024']
for year in year_list:
success_list = get_html(school_id, year)
for success in success_list:
plan_id = success['plan_id'] # 专业查询ID
# print(plan_id)
data = get_success(plan_id)
file_data = [school_name, school_type_name, province_name] + data
file_write('招生专业.csv', data_title, file_data)
if __name__ == '__main__':
star()