|
|
|
@ -199,6 +199,7 @@ model = lr.fit(train_df)
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
predict_df = model.transform(test_df)
|
|
|
|
|
predict_df.show()
|
|
|
|
|
|
|
|
|
|
# 对测试集做predict, 生成(预测分类, 正确分类)
|
|
|
|
|
def build_predict_target(row):
|
|
|
|
@ -211,4 +212,36 @@ metrics = BinaryClassificationMetrics(predict_and_target_rdd)
|
|
|
|
|
print(metrics.areaUnderROC)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
+--------------------+-----+--------------------+--------------------+----------+
|
|
|
|
|
| features|label| rawPrediction| probability|prediction|
|
|
|
|
|
+--------------------+-----+--------------------+--------------------+----------+
|
|
|
|
|
|[136.0,170.0,186....| 2|[5.50038384392939...|[1.14272614124392...| 2.0|
|
|
|
|
|
|[153.0,171.0,188....| 2|[5.64285991677237...|[1.05275151181886...| 2.0|
|
|
|
|
|
|[154.0,169.0,170....| 2|[9.13811240418261...|[1.23461484600179...| 2.0|
|
|
|
|
|
|[170.0,136.0,137....| 2|[8.73938836713828...|[0.82025040564946...| 0.0|
|
|
|
|
|
|[170.0,136.0,153....| 2|[-49.156574377688...|[1.01787595814187...| 2.0|
|
|
|
|
|
|[170.0,136.0,170....| 2|[-4.3502636934223...|[2.10198053824788...| 2.0|
|
|
|
|
|
|[170.0,153.0,153....| 2|[24.7504340465642...|[0.95194611168804...| 0.0|
|
|
|
|
|
|[170.0,153.0,153....| 2|[10.3396476252517...|[5.05385721565579...| 2.0|
|
|
|
|
|
|[170.0,153.0,170....| 2|[-32.327699408487...|[1.79381269532298...| 2.0|
|
|
|
|
|
|[170.0,153.0,187....| 2|[-2.5797308183266...|[2.55194094783080...| 2.0|
|
|
|
|
|
|[170.0,169.0,188....| 2|[21.8731085621900...|[0.99999249446448...| 0.0|
|
|
|
|
|
|[170.0,170.0,171....| 2|[13.2679489547443...|[2.59628186559201...| 2.0|
|
|
|
|
|
|[170.0,170.0,187....| 2|[-5.3908431995277...|[4.7174559422271E...| 2.0|
|
|
|
|
|
|[170.0,171.0,170....| 2|[-1.4335778407781...|[2.73396015551679...| 2.0|
|
|
|
|
|
|[187.0,135.0,170....| 2|[10.2316756244436...|[0.97051656471285...| 0.0|
|
|
|
|
|
|[187.0,170.0,119....| 2|[-3.4790019515889...|[1.56511505482708...| 2.0|
|
|
|
|
|
|[187.0,170.0,136....| 2|[10.9991854080053...|[0.01532263766298...| 2.0|
|
|
|
|
|
|[187.0,170.0,170....| 2|[3.72541501247339...|[7.45308264995363...| 2.0|
|
|
|
|
|
|[187.0,170.0,170....| 2|[9.13578952188178...|[0.15750788232099...| 2.0|
|
|
|
|
|
|[187.0,170.0,171....| 2|[1.83264502941609...|[1.14223140974846...| 2.0|
|
|
|
|
|
+--------------------+-----+--------------------+--------------------+----------+
|
|
|
|
|
|
|
|
|
|
0.870404411764706
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
以上代码就是统计模型效果了,由于我们使用的是逻辑回归,我们只要获取`AUC`的值了,`AUC`越大,模型的准确度越高。
|