git-02/01src/75%25%.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
使用 75% 训练 / 25% 测试 的方式评估 SVM（输出 ACC & AUC）
"""

import pickle
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score


# ---------- 1. 数据路径 ----------
PKL_PATH = Path(r"D:\Python\空心检测\pythonProject\feature_dataset.pkl")

# ---------- 2. 读取特征 ----------
def load_pkl_matrix(path: Path):
    with open(path, "rb") as f:
        data = pickle.load(f)
    return data["matrix"], data["label"]

X, y = load_pkl_matrix(PKL_PATH)
y = y.ravel()                           # shape (N,)

# ---------- 3. 75% / 25% 拆分 ----------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y, shuffle=True
)

# ---------- 4. 标准化 + SVM ----------
scaler = StandardScaler().fit(X_train)
X_train_std = scaler.transform(X_train)
X_test_std  = scaler.transform(X_test)

svm = SVC(
    kernel="rbf",
    C=10,
    gamma="scale",
    probability=True,
    class_weight="balanced",
    random_state=42,
)
svm.fit(X_train_std, y_train)

# ---------- 5. 评估 ----------
y_pred        = svm.predict(X_test_std)
y_proba_pos   = svm.predict_proba(X_test_std)[:, list(svm.classes_).index(1)]

acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba_pos)

print("\n========== 评估结果 ==========")
print(f"样本总数: {len(y)}  |  训练: {len(y_train)}  测试: {len(y_test)}")
print(f"ACC  = {acc:.4f}")
print(f"AUC  = {auc:.4f}")