parent
e64d0e8bab
commit
74ab41bf67
@ -0,0 +1,110 @@
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
|
||||
# Load Data
|
||||
data = pd.read_csv('dataset.csv', index_col=0)
|
||||
# 去掉非数字特征和直接前驱特征
|
||||
X = data.loc[:, 'prs': 'sig_cluster'].drop(labels=['last_contribute_to_now', 'user_login_pr'], axis=1)
|
||||
print(X)
|
||||
|
||||
X = MinMaxScaler().fit_transform(X.values)
|
||||
y = data['tag'].values
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
|
||||
|
||||
# Results initialize
|
||||
results = pd.DataFrame(columns=['Acc', 'Pre', 'Rec', 'F1'],
|
||||
index=['LR', 'SVM', 'LDA', 'NB', 'KNN', 'NN', 'DT', 'RF', 'GBT'])
|
||||
print(results)
|
||||
|
||||
# Logistic Regression
|
||||
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['LR', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['LR', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['LR', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['LR', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['LR'])
|
||||
|
||||
# SVM
|
||||
clf = SVC(random_state=0).fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['SVM', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['SVM', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['SVM', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['SVM', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['SVM'])
|
||||
|
||||
# NN
|
||||
clf = MLPClassifier(random_state=0, max_iter=10000).fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['NN', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['NN', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['NN', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['NN', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['NN'])
|
||||
|
||||
# LDA
|
||||
clf = LinearDiscriminantAnalysis().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['LDA', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['LDA', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['LDA', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['LDA', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['LDA'])
|
||||
|
||||
# NB
|
||||
clf = GaussianNB().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['NB', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['NB', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['NB', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['NB', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['NB'])
|
||||
|
||||
# KNN
|
||||
clf = KNeighborsClassifier().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['KNN', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['KNN', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['KNN', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['KNN', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['KNN'])
|
||||
|
||||
# DT
|
||||
clf = DecisionTreeClassifier().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['DT', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['DT', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['DT', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['DT', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['DT'])
|
||||
|
||||
# RF
|
||||
clf = RandomForestClassifier().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['RF', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['RF', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['RF', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['RF', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['RF'])
|
||||
|
||||
# GBT
|
||||
clf = GradientBoostingClassifier().fit(X_train, y_train)
|
||||
y_pred = clf.predict(X_test)
|
||||
results.loc['GBT', 'Acc'] = accuracy_score(y_test, y_pred)
|
||||
results.loc['GBT', 'Pre'] = precision_score(y_test, y_pred)
|
||||
results.loc['GBT', 'Rec'] = recall_score(y_test, y_pred)
|
||||
results.loc['GBT', 'F1'] = f1_score(y_test, y_pred)
|
||||
print(results.loc['GBT'])
|
||||
|
||||
print(results)
|
Loading…
Reference in new issue