You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
test2/复试分数线.py

124 lines
4.4 KiB

4 weeks ago
import requests
import csv, os
from pprint import pprint
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.kaoyan.cn",
"priority": "u=1, i",
"referer": "https://www.kaoyan.cn/",
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
def file_write(file_path, data_title, file_data):
# 数据写入CSV文件如果文件为空写入tile如果不为空续写内容
# 表头: data_title
file_v = False
try:
# 检测CSV文件是否为空
if os.path.getsize(file_path) == 0:
file_v = True
except:
file_v = True
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
writer = csv.writer(file)
if file_v:
writer.writerow(data_title)
writer.writerow(file_data)
print('写入成功,本次下载已完成。')
def get_html(year, school, page):
data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语',
'专业课一', '专业课二', '备注']
school_name = school['school_name'] # 学校名称
school_type_name = school['school_type_name'] # 学校类型
school_id = school['school_id'] # 查询ID
province_name = school['province_name'] # 省份
url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json"
while True:
try:
response = requests.get(url, headers=headers)
print(url, school_name, f'正在查询第{page}', response)
if response.status_code == 200:
break
if response.status_code == 404:
return 0
except:
print('请求出错,重试中,请稍等。。。')
res_data = response.json()['data']
count = res_data['count']
page_max = count // 10 + 1
item_list = res_data['item']
for item in item_list:
special_code = item['special_code'] # 专业代码
degree_type_name = item['degree_type_name'] # 硕士类型
special_name = item['special_name'] # 专业名称
total = item['total'] # 总分
politics = item['politics'] # 政治
english = item['english'] # 英语
special_one = item['special_one'] # 专业课一
special_two = item['special_two'] # 专业课二
note = item['note'] # 备注
file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code,
special_name, total, politics, english, special_one, special_two, note]
# print(file_data)
# # 所有分数数据写入文件
# file_write('复试分数线_所有.csv', data_title, file_data)
# 有符合条件的数据写入文件
if special_code == '08' or special_code == '1812' or special_code == '081200':
# print(file_data)
file_write('复试分数线.csv', data_title, file_data)
return page_max # 返回数据页数
def get_degree_type_list():
# 请求学校列表
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
data = {
"page": 1,
"limit": 500,
"province_id": "",
"type": "",
"feature": "",
"spe_id": "20364",
"recruit_type": "",
"sort_type": "",
"english_subject": "",
"math_subject": ""
}
response = requests.post(url, headers=headers, json=data)
print(response)
# pprint(response.json())
res_data_list = response.json()['data']['data']
return res_data_list
def star():
school_list = get_degree_type_list()
for school in school_list:
year_list = ['2022', '2023', '2024'] # 年份
for year in year_list:
page = 1
page_max = page
while page <= page_max and page != 0:
page_max = get_html(year, school, page)
page += 1
if __name__ == '__main__':
star()