update HanLPTest

master
ziyonghong 5 years ago
parent a800017dd6
commit 7c639e6502

@ -38,5 +38,89 @@ public class HanLPTest {
} }
} }
@Test
public void TestC() throws Exception{
ModelProcess query = new ModelProcess("D:/HanLP/data");
String[] questionArr = new String[] {"卧虎藏龙的分数是多少"};
for(String que: questionArr){
ArrayList<String> question = query.analyQuery(que);
System.err.println(question);
}
}
@Test
public void TestRDD(){
SparkConf conf = new SparkConf().setAppName("NaiveBayesTest").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
/**
* MLlibDenseVectorSparseVector
*
*/
/**
* ==
* (1.0, 0.0, 2.0
* (2.0, 3.0, 0.0
*/
//稠密向量 == 连续的
Vector dense = Vectors.dense(1.0,0.0,2.0);
System.out.println(dense);
//稀疏向量 == 间隔的、指定的,未指定位置的向量值默认 = 0.0
int len = 3;
int[] index = new int[]{0,1};
double[] values = new double[]{2.0,3.0};
Vector sparse = Vectors.sparse(len, index, values);
/**
* labeled point
* label/response
* MLliblabeled points
* 使doublelabel使labeled points
* label 0 1
* labelsclass00,1,2,......
*/
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稠密向量模式 1.0:类别编号
LabeledPoint train_one = new LabeledPoint(1.0,dense); //(1.0, 0.0, 2.0
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稀疏向量模式 2.0:类别编号
LabeledPoint train_two = new LabeledPoint(2.0,sparse); //(2.0, 3.0, 0.0
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稠密向量模式 3.0:类别编号
LabeledPoint train_three = new LabeledPoint(3.0,Vectors.dense(1,1,2)); //(1.0, 1.0, 2.0
//List存放训练集【三个训练样本数据】
List<LabeledPoint> trains = new ArrayList<>();
trains.add(train_one);
trains.add(train_two);
trains.add(train_three);
//获得弹性分布式数据集JavaRDD数据类型为LabeledPoint
JavaRDD<LabeledPoint> trainingRDD = sc.parallelize(trains);
/**
* SparkRDD
* JavaRDDSparkRDD
*/
NaiveBayesModel nb_model = NaiveBayes.train(trainingRDD.rdd());
//测试集生成
double [] dTest = {2,1,0};
Vector vTest = Vectors.dense(dTest);//测试对象为单个vector或者是RDD化后的vector
//朴素贝叶斯用法
System.err.println(nb_model.predict(vTest));// 分类结果 == 返回分类的标签值
/**
*
*/
System.err.println(nb_model.predictProbabilities(vTest));
//最后不要忘了释放资源
sc.close();
}
} }

Loading…
Cancel
Save