|
|
|
@ -0,0 +1,177 @@
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.colors as colors
|
|
|
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
|
|
|
|
|
|
|
|
class Kernel(object):
|
|
|
|
|
def __init__(self, gamma=1.0, coef0=1, degree=3):
|
|
|
|
|
self.gamma, self.coef0, self.degree = gamma, coef0, degree
|
|
|
|
|
|
|
|
|
|
def linear(self, X1, X2):
|
|
|
|
|
return np.dot(X1, X2.T)
|
|
|
|
|
|
|
|
|
|
def poly(self, X1, X2):
|
|
|
|
|
return (self.gamma * np.dot(X1, X2.T) + self.coef0)**self.degree
|
|
|
|
|
|
|
|
|
|
def gaussian(self, X1, X2):
|
|
|
|
|
X1_norm_sq = np.sum(X1**2, axis=1)
|
|
|
|
|
X2_norm_sq = np.sum(X2**2, axis=1)
|
|
|
|
|
dot_product = np.dot(X1, X2.T)
|
|
|
|
|
dist_sq = X1_norm_sq[:, np.newaxis] - 2 * dot_product + X2_norm_sq
|
|
|
|
|
dist_sq = np.maximum(dist_sq, 0)
|
|
|
|
|
return np.exp(-self.gamma * dist_sq)
|
|
|
|
|
|
|
|
|
|
def laplace(self, X1, X2):
|
|
|
|
|
mat = np.zeros([len(X1), len(X2)])
|
|
|
|
|
for i in range(len(X1)):
|
|
|
|
|
for j in range(len(X2)):
|
|
|
|
|
mat[i, j] = np.exp(-self.gamma * np.linalg.norm(X1[i] - X2[j]))
|
|
|
|
|
return mat
|
|
|
|
|
|
|
|
|
|
class KPerceptron(object):
|
|
|
|
|
def __init__(self, ker='poly', gamma=1, coef0=1, degree=2, eta0=1.0, max_iter=100):
|
|
|
|
|
kernel_obj = Kernel(gamma=gamma, coef0=coef0, degree=degree)
|
|
|
|
|
|
|
|
|
|
self.kernel_func = getattr(kernel_obj, ker if ker != 'rbf' else 'gaussian')
|
|
|
|
|
self.eta0, self.max_iter = eta0, max_iter
|
|
|
|
|
|
|
|
|
|
def decision_function(self, Z):
|
|
|
|
|
Z = np.atleast_2d(Z)
|
|
|
|
|
if not hasattr(self, 'sv_index') or not self.sv_index.any():
|
|
|
|
|
return np.zeros(len(Z))
|
|
|
|
|
k_values = self.kernel_func(Z, self.sv[self.sv_index])
|
|
|
|
|
return np.dot(k_values, self.alpha[self.sv_index])
|
|
|
|
|
|
|
|
|
|
def fit(self, X, y):
|
|
|
|
|
m = X.shape[0]; self.alpha = np.zeros(m)
|
|
|
|
|
self.sv_index = np.zeros(m, dtype=bool); self.sv = X
|
|
|
|
|
for _ in range(self.max_iter):
|
|
|
|
|
indexes = np.random.permutation(m); stop = True
|
|
|
|
|
for i in indexes:
|
|
|
|
|
xi, yi = X[i:i+1], y[i]
|
|
|
|
|
if yi * self.decision_function(xi) <= 0:
|
|
|
|
|
self.alpha[i] += yi * self.eta0
|
|
|
|
|
self.sv_index[i] = (self.alpha[i] != 0)
|
|
|
|
|
stop = False
|
|
|
|
|
if stop: return
|
|
|
|
|
|
|
|
|
|
def predict(self, Z):
|
|
|
|
|
return np.sign(self.decision_function(Z))
|
|
|
|
|
|
|
|
|
|
class LogisticRegression(object):
|
|
|
|
|
def __init__(self, kernel_name='linear', gamma=1.0, degree=3, coef0=1):
|
|
|
|
|
kernel_obj = Kernel(gamma=gamma, degree=degree, coef0=coef0)
|
|
|
|
|
self.kernel_func = getattr(kernel_obj, kernel_name)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _sigmoid(z):
|
|
|
|
|
z = np.clip(z, -250, 250); return 1 / (1 + np.exp(-z))
|
|
|
|
|
def _cost(self, K, y, a):
|
|
|
|
|
pred = np.dot(a, K); pred = np.clip(pred, -50, 50)
|
|
|
|
|
return -np.dot(y, pred) + np.sum(np.log(1 + np.exp(pred)))
|
|
|
|
|
def _gradient(self, K, y, a):
|
|
|
|
|
return -np.dot(K, y - self._sigmoid(np.dot(a, K)))
|
|
|
|
|
def fit(self, X, y, max_rate=100, min_rate=0.001, gd_step=10, epsilon=1e-4):
|
|
|
|
|
self.X_train = X; m = len(X); K = self.kernel_func(X, X)
|
|
|
|
|
self.a = np.zeros(m); prev_cost = 0; next_cost = self._cost(K, y, self.a)
|
|
|
|
|
for _ in range(1000):
|
|
|
|
|
if np.abs(prev_cost - next_cost) <= epsilon and _ > 0: break
|
|
|
|
|
neg_grad = -self._gradient(K, y, self.a)
|
|
|
|
|
best_rate, min_cost_for_step = 0, self._cost(K, y, self.a)
|
|
|
|
|
rate = max_rate
|
|
|
|
|
while rate >= min_rate:
|
|
|
|
|
cost = self._cost(K, y, self.a + neg_grad * rate)
|
|
|
|
|
if cost < min_cost_for_step: min_cost_for_step, best_rate = cost, rate
|
|
|
|
|
rate /= gd_step
|
|
|
|
|
self.a += neg_grad * best_rate; prev_cost, next_cost = next_cost, min_cost_for_step
|
|
|
|
|
def predict(self, X):
|
|
|
|
|
X = np.atleast_2d(X)
|
|
|
|
|
K_pred = self.kernel_func(X, self.X_train)
|
|
|
|
|
return self._sigmoid(np.dot(K_pred, self.a))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_xor_kperceptron_experiment(ker_name, degree_param=2):
|
|
|
|
|
|
|
|
|
|
X = np.array([[1, 1], [1, 0], [0, 1], [0, 0]])
|
|
|
|
|
y = np.array([-1, 1, 1, -1])
|
|
|
|
|
np.random.seed(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig = plt.figure(figsize=(10, 5))
|
|
|
|
|
|
|
|
|
|
with plt.style.context('Solarize_Light2'):
|
|
|
|
|
x_min, x_max, y_min, y_max = -0.2, 1.2, -0.2, 1.2
|
|
|
|
|
h = .02
|
|
|
|
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
|
|
|
|
|
grid_points = np.c_[xx.ravel(), yy.ravel()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ax1 = fig.add_subplot(1, 2, 1)
|
|
|
|
|
ax1.set_xlim(xx.min(), xx.max()); ax1.set_ylim(yy.min(), yy.max())
|
|
|
|
|
ax1.set_xticks(()); ax1.set_yticks(())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
kp = KPerceptron(ker=ker_name, gamma=1, coef0=1, degree=degree_param, eta0=0.5)
|
|
|
|
|
kp.fit(X, y)
|
|
|
|
|
Z = kp.decision_function(grid_points).reshape(xx.shape)
|
|
|
|
|
|
|
|
|
|
contours = ax1.contour(xx, yy, Z, 16, alpha=.8)
|
|
|
|
|
ax1.clabel(contours)
|
|
|
|
|
ax1.scatter(X[:, 0], X[:, 1], s=50, c=y, edgecolors='#002b36')
|
|
|
|
|
title_str = f'{ker_name} {degree_param}' if ker_name == 'poly' else f'{ker_name}'
|
|
|
|
|
ax1.set_title(title_str, color='#586e75')
|
|
|
|
|
|
|
|
|
|
ax2 = fig.add_subplot(1, 2, 2, projection='3d')
|
|
|
|
|
ax2.plot_surface(xx, yy, Z)
|
|
|
|
|
ax2.set_xlabel(r'$x_1$'); ax2.set_ylabel(r'$x_2$')
|
|
|
|
|
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
def run_watermelon_logreg_experiment(kernel_name):
|
|
|
|
|
|
|
|
|
|
try: data = pd.read_csv('data3.0.csv')
|
|
|
|
|
except FileNotFoundError: print("错误:找不到'data3.0.csv'!"); return
|
|
|
|
|
|
|
|
|
|
X = np.array(data[['密度', '含糖率']])
|
|
|
|
|
y = (np.array(data['好瓜']) == '是').astype(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = None
|
|
|
|
|
title = ''
|
|
|
|
|
if kernel_name == 'poly':
|
|
|
|
|
model = LogisticRegression(kernel_name='linear')
|
|
|
|
|
title = '线性核'
|
|
|
|
|
elif kernel_name == 'gaussian':
|
|
|
|
|
model = LogisticRegression(kernel_name='gaussian', gamma=50)
|
|
|
|
|
title = '高斯核,σ=0.1'
|
|
|
|
|
elif kernel_name == 'laplace':
|
|
|
|
|
model = LogisticRegression(kernel_name='laplace', gamma=10)
|
|
|
|
|
title = '拉普拉斯核,σ=0.1'
|
|
|
|
|
else:
|
|
|
|
|
print(f"错误:未知的核函数名称 '{kernel_name}'")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
model.fit(X, y)
|
|
|
|
|
|
|
|
|
|
cmap = colors.LinearSegmentedColormap.from_list('watermelon', ['red', 'green'])
|
|
|
|
|
xx, yy = np.meshgrid(np.arange(0.2, 0.8, 0.01), np.arange(0.0, 0.5, 0.01))
|
|
|
|
|
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
|
|
|
|
|
|
|
|
|
|
plt.figure()
|
|
|
|
|
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.3, antialiased=True)
|
|
|
|
|
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap)
|
|
|
|
|
plt.xlabel('密度'); plt.ylabel('含糖率'); plt.title(title)
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
|
|
|
|
|
run_xor_kperceptron_experiment(ker_name='poly', degree_param=2)
|
|
|
|
|
# run_xor_kperceptron_experiment(ker_name='rbf')
|
|
|
|
|
|
|
|
|
|
run_watermelon_logreg_experiment(kernel_name='gaussian')
|
|
|
|
|
# run_watermelon_logreg_experiment(kernel_name='poly')
|