delete test

master
ziyonghong 5 years ago
parent 7c639e6502
commit e30851d47b

@ -1,32 +0,0 @@
import java.util.List;
import org.junit.Test;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
public class test{
@Test
public void TestA(){
String lineStr = "明天虽然会下雨,但是我还是会看周杰伦的演唱会。";
try{
Segment segment = HanLP.newSegment();
segment.enableCustomDictionary(true);
/**
* +
*/
CustomDictionary.add("虽然会","ng 0");
List<Term> seg = segment.seg(lineStr);
for (Term term : seg) {
System.out.println(term.toString());
}
}catch(Exception ex){
System.out.println(ex.getClass()+","+ex.getMessage());
}
}
}

@ -1,126 +0,0 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.classification.NaiveBayes;
import org.apache.spark.mllib.classification.NaiveBayesModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.junit.Test;
import com.appleyk.process.ModelProcess;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
public class HanLPTest {
@Test
public void TestA(){
String lineStr = "明天虽然会下雨,但是我还是会看周杰伦的演唱会。";
try{
Segment segment = HanLP.newSegment();
segment.enableCustomDictionary(true);
/**
* +
*/
CustomDictionary.add("虽然会","ng 0");
List<Term> seg = segment.seg(lineStr);
for (Term term : seg) {
System.out.println(term.toString());
}
}catch(Exception ex){
System.out.println(ex.getClass()+","+ex.getMessage());
}
}
@Test
public void TestC() throws Exception{
ModelProcess query = new ModelProcess("D:/HanLP/data");
String[] questionArr = new String[] {"卧虎藏龙的分数是多少"};
for(String que: questionArr){
ArrayList<String> question = query.analyQuery(que);
System.err.println(question);
}
}
@Test
public void TestRDD(){
SparkConf conf = new SparkConf().setAppName("NaiveBayesTest").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
/**
* MLlibDenseVectorSparseVector
*
*/
/**
* ==
* (1.0, 0.0, 2.0
* (2.0, 3.0, 0.0
*/
//稠密向量 == 连续的
Vector dense = Vectors.dense(1.0,0.0,2.0);
System.out.println(dense);
//稀疏向量 == 间隔的、指定的,未指定位置的向量值默认 = 0.0
int len = 3;
int[] index = new int[]{0,1};
double[] values = new double[]{2.0,3.0};
Vector sparse = Vectors.sparse(len, index, values);
/**
* labeled point
* label/response
* MLliblabeled points
* 使doublelabel使labeled points
* label 0 1
* labelsclass00,1,2,......
*/
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稠密向量模式 1.0:类别编号
LabeledPoint train_one = new LabeledPoint(1.0,dense); //(1.0, 0.0, 2.0
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稀疏向量模式 2.0:类别编号
LabeledPoint train_two = new LabeledPoint(2.0,sparse); //(2.0, 3.0, 0.0
//训练集生成 规定数据结构为LabeledPoint == 构建方式:稠密向量模式 3.0:类别编号
LabeledPoint train_three = new LabeledPoint(3.0,Vectors.dense(1,1,2)); //(1.0, 1.0, 2.0
//List存放训练集【三个训练样本数据】
List<LabeledPoint> trains = new ArrayList<>();
trains.add(train_one);
trains.add(train_two);
trains.add(train_three);
//获得弹性分布式数据集JavaRDD数据类型为LabeledPoint
JavaRDD<LabeledPoint> trainingRDD = sc.parallelize(trains);
/**
* SparkRDD
* JavaRDDSparkRDD
*/
NaiveBayesModel nb_model = NaiveBayes.train(trainingRDD.rdd());
//测试集生成
double [] dTest = {2,1,0};
Vector vTest = Vectors.dense(dTest);//测试对象为单个vector或者是RDD化后的vector
//朴素贝叶斯用法
System.err.println(nb_model.predict(vTest));// 分类结果 == 返回分类的标签值
/**
*
*/
System.err.println(nb_model.predictProbabilities(vTest));
//最后不要忘了释放资源
sc.close();
}
}
Loading…
Cancel
Save