ADD file via upload

main
pxae954wu 1 month ago
parent 62da16387f
commit 9b85dce4fd

@ -0,0 +1,177 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.colors as colors
from mpl_toolkits.mplot3d import Axes3D
class Kernel(object):
def __init__(self, gamma=1.0, coef0=1, degree=3):
self.gamma, self.coef0, self.degree = gamma, coef0, degree
def linear(self, X1, X2):
return np.dot(X1, X2.T)
def poly(self, X1, X2):
return (self.gamma * np.dot(X1, X2.T) + self.coef0)**self.degree
def gaussian(self, X1, X2):
X1_norm_sq = np.sum(X1**2, axis=1)
X2_norm_sq = np.sum(X2**2, axis=1)
dot_product = np.dot(X1, X2.T)
dist_sq = X1_norm_sq[:, np.newaxis] - 2 * dot_product + X2_norm_sq
dist_sq = np.maximum(dist_sq, 0)
return np.exp(-self.gamma * dist_sq)
def laplace(self, X1, X2):
mat = np.zeros([len(X1), len(X2)])
for i in range(len(X1)):
for j in range(len(X2)):
mat[i, j] = np.exp(-self.gamma * np.linalg.norm(X1[i] - X2[j]))
return mat
class KPerceptron(object):
def __init__(self, ker='poly', gamma=1, coef0=1, degree=2, eta0=1.0, max_iter=100):
kernel_obj = Kernel(gamma=gamma, coef0=coef0, degree=degree)
self.kernel_func = getattr(kernel_obj, ker if ker != 'rbf' else 'gaussian')
self.eta0, self.max_iter = eta0, max_iter
def decision_function(self, Z):
Z = np.atleast_2d(Z)
if not hasattr(self, 'sv_index') or not self.sv_index.any():
return np.zeros(len(Z))
k_values = self.kernel_func(Z, self.sv[self.sv_index])
return np.dot(k_values, self.alpha[self.sv_index])
def fit(self, X, y):
m = X.shape[0]; self.alpha = np.zeros(m)
self.sv_index = np.zeros(m, dtype=bool); self.sv = X
for _ in range(self.max_iter):
indexes = np.random.permutation(m); stop = True
for i in indexes:
xi, yi = X[i:i+1], y[i]
if yi * self.decision_function(xi) <= 0:
self.alpha[i] += yi * self.eta0
self.sv_index[i] = (self.alpha[i] != 0)
stop = False
if stop: return
def predict(self, Z):
return np.sign(self.decision_function(Z))
class LogisticRegression(object):
def __init__(self, kernel_name='linear', gamma=1.0, degree=3, coef0=1):
kernel_obj = Kernel(gamma=gamma, degree=degree, coef0=coef0)
self.kernel_func = getattr(kernel_obj, kernel_name)
@staticmethod
def _sigmoid(z):
z = np.clip(z, -250, 250); return 1 / (1 + np.exp(-z))
def _cost(self, K, y, a):
pred = np.dot(a, K); pred = np.clip(pred, -50, 50)
return -np.dot(y, pred) + np.sum(np.log(1 + np.exp(pred)))
def _gradient(self, K, y, a):
return -np.dot(K, y - self._sigmoid(np.dot(a, K)))
def fit(self, X, y, max_rate=100, min_rate=0.001, gd_step=10, epsilon=1e-4):
self.X_train = X; m = len(X); K = self.kernel_func(X, X)
self.a = np.zeros(m); prev_cost = 0; next_cost = self._cost(K, y, self.a)
for _ in range(1000):
if np.abs(prev_cost - next_cost) <= epsilon and _ > 0: break
neg_grad = -self._gradient(K, y, self.a)
best_rate, min_cost_for_step = 0, self._cost(K, y, self.a)
rate = max_rate
while rate >= min_rate:
cost = self._cost(K, y, self.a + neg_grad * rate)
if cost < min_cost_for_step: min_cost_for_step, best_rate = cost, rate
rate /= gd_step
self.a += neg_grad * best_rate; prev_cost, next_cost = next_cost, min_cost_for_step
def predict(self, X):
X = np.atleast_2d(X)
K_pred = self.kernel_func(X, self.X_train)
return self._sigmoid(np.dot(K_pred, self.a))
def run_xor_kperceptron_experiment(ker_name, degree_param=2):
X = np.array([[1, 1], [1, 0], [0, 1], [0, 0]])
y = np.array([-1, 1, 1, -1])
np.random.seed(1)
fig = plt.figure(figsize=(10, 5))
with plt.style.context('Solarize_Light2'):
x_min, x_max, y_min, y_max = -0.2, 1.2, -0.2, 1.2
h = .02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
grid_points = np.c_[xx.ravel(), yy.ravel()]
ax1 = fig.add_subplot(1, 2, 1)
ax1.set_xlim(xx.min(), xx.max()); ax1.set_ylim(yy.min(), yy.max())
ax1.set_xticks(()); ax1.set_yticks(())
kp = KPerceptron(ker=ker_name, gamma=1, coef0=1, degree=degree_param, eta0=0.5)
kp.fit(X, y)
Z = kp.decision_function(grid_points).reshape(xx.shape)
contours = ax1.contour(xx, yy, Z, 16, alpha=.8)
ax1.clabel(contours)
ax1.scatter(X[:, 0], X[:, 1], s=50, c=y, edgecolors='#002b36')
title_str = f'{ker_name} {degree_param}' if ker_name == 'poly' else f'{ker_name}'
ax1.set_title(title_str, color='#586e75')
ax2 = fig.add_subplot(1, 2, 2, projection='3d')
ax2.plot_surface(xx, yy, Z)
ax2.set_xlabel(r'$x_1$'); ax2.set_ylabel(r'$x_2$')
plt.tight_layout()
plt.show()
def run_watermelon_logreg_experiment(kernel_name):
try: data = pd.read_csv('data3.0.csv')
except FileNotFoundError: print("错误:找不到'data3.0.csv'"); return
X = np.array(data[['密度', '含糖率']])
y = (np.array(data['好瓜']) == '').astype(int)
model = None
title = ''
if kernel_name == 'poly':
model = LogisticRegression(kernel_name='linear')
title = '线性核'
elif kernel_name == 'gaussian':
model = LogisticRegression(kernel_name='gaussian', gamma=50)
title = '高斯核,σ=0.1'
elif kernel_name == 'laplace':
model = LogisticRegression(kernel_name='laplace', gamma=10)
title = '拉普拉斯核,σ=0.1'
else:
print(f"错误:未知的核函数名称 '{kernel_name}'")
return
model.fit(X, y)
cmap = colors.LinearSegmentedColormap.from_list('watermelon', ['red', 'green'])
xx, yy = np.meshgrid(np.arange(0.2, 0.8, 0.01), np.arange(0.0, 0.5, 0.01))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
plt.figure()
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.3, antialiased=True)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap)
plt.xlabel('密度'); plt.ylabel('含糖率'); plt.title(title)
plt.show()
if __name__ == '__main__':
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
run_xor_kperceptron_experiment(ker_name='poly', degree_param=2)
# run_xor_kperceptron_experiment(ker_name='rbf')
run_watermelon_logreg_experiment(kernel_name='gaussian')
# run_watermelon_logreg_experiment(kernel_name='poly')
Loading…
Cancel
Save