|
|
import requests
|
|
|
import csv, os
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
"accept": "application/json, text/plain, */*",
|
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
"content-type": "application/json;charset=UTF-8",
|
|
|
"origin": "https://www.kaoyan.cn",
|
|
|
"priority": "u=1, i",
|
|
|
"referer": "https://www.kaoyan.cn/",
|
|
|
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
|
|
|
"sec-ch-ua-mobile": "?0",
|
|
|
"sec-ch-ua-platform": "\"Windows\"",
|
|
|
"sec-fetch-dest": "empty",
|
|
|
"sec-fetch-mode": "cors",
|
|
|
"sec-fetch-site": "same-site",
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
|
|
|
}
|
|
|
|
|
|
|
|
|
def file_write(file_path, data_title, file_data):
|
|
|
# 数据写入CSV文件,如果文件为空,写入tile,如果不为空,续写内容
|
|
|
# 表头: data_title
|
|
|
|
|
|
file_v = False
|
|
|
try:
|
|
|
# 检测CSV文件是否为空
|
|
|
if os.path.getsize(file_path) == 0:
|
|
|
file_v = True
|
|
|
except:
|
|
|
file_v = True
|
|
|
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
|
|
|
writer = csv.writer(file)
|
|
|
if file_v:
|
|
|
writer.writerow(data_title)
|
|
|
writer.writerow(file_data)
|
|
|
print('写入成功,本次下载已完成。')
|
|
|
|
|
|
|
|
|
def get_html(year, school, page):
|
|
|
data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语',
|
|
|
'专业课一', '专业课二', '备注']
|
|
|
school_name = school['school_name'] # 学校名称
|
|
|
school_type_name = school['school_type_name'] # 学校类型
|
|
|
school_id = school['school_id'] # 查询ID
|
|
|
province_name = school['province_name'] # 省份
|
|
|
url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json"
|
|
|
while True:
|
|
|
try:
|
|
|
response = requests.get(url, headers=headers)
|
|
|
print(url, school_name, f'正在查询第{page}页', response)
|
|
|
if response.status_code == 200:
|
|
|
break
|
|
|
if response.status_code == 404:
|
|
|
return 0
|
|
|
except:
|
|
|
print('请求出错,重试中,请稍等。。。')
|
|
|
res_data = response.json()['data']
|
|
|
count = res_data['count']
|
|
|
page_max = count // 10 + 1
|
|
|
item_list = res_data['item']
|
|
|
for item in item_list:
|
|
|
special_code = item['special_code'] # 专业代码
|
|
|
degree_type_name = item['degree_type_name'] # 硕士类型
|
|
|
special_name = item['special_name'] # 专业名称
|
|
|
total = item['total'] # 总分
|
|
|
politics = item['politics'] # 政治
|
|
|
english = item['english'] # 英语
|
|
|
special_one = item['special_one'] # 专业课一
|
|
|
special_two = item['special_two'] # 专业课二
|
|
|
note = item['note'] # 备注
|
|
|
|
|
|
file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code,
|
|
|
special_name, total, politics, english, special_one, special_two, note]
|
|
|
# print(file_data)
|
|
|
# # 所有分数数据写入文件
|
|
|
# file_write('复试分数线_所有.csv', data_title, file_data)
|
|
|
|
|
|
# 有符合条件的数据写入文件
|
|
|
if special_code == '08' or special_code == '1812' or special_code == '081200':
|
|
|
# print(file_data)
|
|
|
file_write('复试分数线.csv', data_title, file_data)
|
|
|
return page_max # 返回数据页数
|
|
|
|
|
|
|
|
|
def get_degree_type_list():
|
|
|
# 请求学校列表
|
|
|
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
|
|
|
data = {
|
|
|
"page": 1,
|
|
|
"limit": 500,
|
|
|
"province_id": "",
|
|
|
"type": "",
|
|
|
"feature": "",
|
|
|
"spe_id": "20364",
|
|
|
"recruit_type": "",
|
|
|
"sort_type": "",
|
|
|
"english_subject": "",
|
|
|
"math_subject": ""
|
|
|
}
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
print(response)
|
|
|
# pprint(response.json())
|
|
|
res_data_list = response.json()['data']['data']
|
|
|
return res_data_list
|
|
|
|
|
|
|
|
|
def star():
|
|
|
school_list = get_degree_type_list()
|
|
|
for school in school_list:
|
|
|
year_list = ['2022', '2023', '2024'] # 年份
|
|
|
for year in year_list:
|
|
|
page = 1
|
|
|
page_max = page
|
|
|
while page <= page_max and page != 0:
|
|
|
page_max = get_html(year, school, page)
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
star()
|