test2/复试分数线.py

import requests
import csv, os
from pprint import pprint


headers = {
    "accept": "application/json, text/plain, */*",
    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
    "content-type": "application/json;charset=UTF-8",
    "origin": "https://www.kaoyan.cn",
    "priority": "u=1, i",
    "referer": "https://www.kaoyan.cn/",
    "sec-ch-ua": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"Windows\"",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}


def file_write(file_path, data_title, file_data):
    # 数据写入CSV文件，如果文件为空，写入tile，如果不为空，续写内容
    # 表头: data_title

    file_v = False
    try:
        # 检测CSV文件是否为空
        if os.path.getsize(file_path) == 0:
            file_v = True
    except:
        file_v = True
    with open(file_path, 'a', encoding='UTF-8', newline='') as file:
        writer = csv.writer(file)
        if file_v:
            writer.writerow(data_title)
        writer.writerow(file_data)
        print('写入成功，本次下载已完成。')


def get_html(year, school, page):
    data_title = ['年份', '学校名称', '学校类型', '省份', '硕士类型', '专业代码', '专业名称', '总分', '政治', '英语',
                  '专业课一', '专业课二', '备注']
    school_name = school['school_name']  # 学校名称
    school_type_name = school['school_type_name']  # 学校类型
    school_id = school['school_id']  # 查询ID
    province_name = school['province_name']  # 省份
    url = f"https://static.kaoyan.cn/json/score/{year}/{school_id}/0/{page}.json"
    while True:
        try:
            response = requests.get(url, headers=headers)
            print(url, school_name, f'正在查询第{page}页', response)
            if response.status_code == 200:
                break
            if response.status_code == 404:
                return 0
        except:
            print('请求出错，重试中，请稍等。。。')
    res_data = response.json()['data']
    count = res_data['count']
    page_max = count // 10 + 1
    item_list = res_data['item']
    for item in item_list:
        special_code = item['special_code']  # 专业代码
        degree_type_name = item['degree_type_name']  # 硕士类型
        special_name = item['special_name']  # 专业名称
        total = item['total']  # 总分
        politics = item['politics']  # 政治
        english = item['english']  # 英语
        special_one = item['special_one']  # 专业课一
        special_two = item['special_two']  # 专业课二
        note = item['note']  # 备注

        file_data = [year, school_name, school_type_name, province_name, degree_type_name, special_code,
                     special_name, total, politics, english, special_one, special_two, note]
        # print(file_data)
        # # 所有分数数据写入文件
        # file_write('复试分数线_所有.csv', data_title, file_data)

        # 有符合条件的数据写入文件
        if special_code == '08' or special_code == '1812' or special_code == '081200':
            # print(file_data)
            file_write('复试分数线.csv', data_title, file_data)
    return page_max   # 返回数据页数


def get_degree_type_list():
    # 请求学校列表
    url = "https://api.kaoyan.cn/pc/special/schoolListV2"
    data = {
        "page": 1,
        "limit": 500,
        "province_id": "",
        "type": "",
        "feature": "",
        "spe_id": "20364",
        "recruit_type": "",
        "sort_type": "",
        "english_subject": "",
        "math_subject": ""
    }
    response = requests.post(url, headers=headers, json=data)
    print(response)
    # pprint(response.json())
    res_data_list = response.json()['data']['data']
    return res_data_list


def star():
    school_list = get_degree_type_list()
    for school in school_list:
        year_list = ['2022', '2023', '2024']  # 年份
        for year in year_list:
            page = 1
            page_max = page
            while page <= page_max and page != 0:
                page_max = get_html(year, school, page)
                page += 1


if __name__ == '__main__':
    star()