You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
test2/复试分数线.py

124 lines
4.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
import csv, os
from pprint import pprint
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.kaoyan.cn",
"priority": "u=1, i",
"referer": "https://www.kaoyan.cn/",
"sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
def file_write(file_path, data_title, file_data):
# 数据写入CSV文件如果文件为空写入tile如果不为空续写内容
# 表头: data_title
file_v = False
try:
# 检测CSV文件是否为空
if os.path.getsize(file_path) == 0:
file_v = True
except:
file_v = True
with open(file_path, 'a', encoding='UTF-8', newline='') as file:
writer = csv.writer(file)
if file_v:
writer.writerow(data_title)
writer.writerow(file_data)
print('写入成功,本次下载已完成。')
def get_html(year, school, page):
data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语',
'专业课一', '专业课二', '备注']
school_name = school['school_name'] # 学校名称
school_type_name = school['school_type_name'] # 学校类型
school_id = school['school_id'] # 查询ID
province_name = school['province_name'] # 省份
url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json"
while True:
try:
response = requests.get(url, headers=headers)
print(url, school_name, f'正在查询第{page}', response)
if response.status_code == 200:
break
if response.status_code == 404:
return 0
except:
print('请求出错,重试中,请稍等。。。')
res_data = response.json()['data']
count = res_data['count']
page_max = count // 10 + 1
item_list = res_data['item']
for item in item_list:
special_code = item['special_code'] # 专业代码
degree_type_name = item['degree_type_name'] # 硕士类型
special_name = item['special_name'] # 专业名称
total = item['total'] # 总分
politics = item['politics'] # 政治
english = item['english'] # 英语
special_one = item['special_one'] # 专业课一
special_two = item['special_two'] # 专业课二
note = item['note'] # 备注
file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code,
special_name, total, politics, english, special_one, special_two, note]
# print(file_data)
# # 所有分数数据写入文件
# file_write('复试分数线_所有.csv', data_title, file_data)
# 有符合条件的数据写入文件
if special_code == '08' or special_code == '1812' or special_code == '081200':
# print(file_data)
file_write('复试分数线.csv', data_title, file_data)
return page_max # 返回数据页数
def get_degree_type_list():
# 请求学校列表
url = "https://api.kaoyan.cn/pc/special/schoolListV2"
data = {
"page": 1,
"limit": 500,
"province_id": "",
"type": "",
"feature": "",
"spe_id": "20364",
"recruit_type": "",
"sort_type": "",
"english_subject": "",
"math_subject": ""
}
response = requests.post(url, headers=headers, json=data)
print(response)
# pprint(response.json())
res_data_list = response.json()['data']['data']
return res_data_list
def star():
school_list = get_degree_type_list()
for school in school_list:
year_list = ['2022', '2023', '2024'] # 年份
for year in year_list:
page = 1
page_max = page
while page <= page_max and page != 0:
page_max = get_html(year, school, page)
page += 1
if __name__ == '__main__':
star()