爬取数据

master
ziyonghong 5 years ago
commit f7abaef8d2

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/userlib"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>chinese</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8

@ -0,0 +1,6 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.5

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

@ -0,0 +1,30 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>chinesemedicine</groupId>
<artifactId>chinese</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>chinese</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
</dependencies>
</project>

@ -0,0 +1,150 @@
package Reptile;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class DBUtil {
public static String driverString = "com.mysql.cj.jdbc.Driver";
public static String sqlurl = "jdbc:mysql://114.55.37.70:3306/graph";
public static Connection conn = null;//连接对象
public static Statement stat = null;//执行对象
public static boolean isnewyid = true;
//建立连接
public static void Connection() {
try {
Class.forName(driverString);
} catch (ClassNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
conn = DriverManager.getConnection(sqlurl, "root", "XBxb0525@");
} catch (SQLException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
stat = conn.createStatement();
} catch (SQLException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
//返回desease的did如果已经存在了就返回did不存在就返回下一个
public static int finddid(String dname) {
String sqlString = "select id from desease where dname="+dname;
ResultSet resultSet = null;
try {
resultSet = stat.executeQuery(sqlString);
resultSet.next();
return resultSet.getInt(1);
} catch (SQLException e) {
// 空,这个病没有存在,将其插入到里面
int did = countTable("desease")+1;
sqlString = "insert into desease values("+did+",'"+dname+"');";//插入这个疾病
System.out.println(sqlString);
try {
stat.executeUpdate(sqlString);
} catch (SQLException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}//执行语句
return did;
// TODO Auto-generated catch block
}
}
//返回药的yid,如果已经存在了就返回yid不存在就返回下一个
public static int findyid(String mname) {
String sqlString = "select id from yangsheng where mname="+mname;
ResultSet resultSet = null;
try {
resultSet = stat.executeQuery(sqlString);
isnewyid = false;
resultSet.next();
return resultSet.getInt(1);
} catch (SQLException e) {
// 为空暂时为它新创建一个id
isnewyid = true;
return countTable("yangsheng")+1;
}
}
public static int insertintoY(String index,String values) {
String sqlString = "insert into yangsheng("+index+") values("+values+");";
System.out.println(sqlString);
try {
return stat.executeUpdate(sqlString);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return -1;
}
//插入关系表dtoy
public static void dtoy(int did,int yid) {
//did是疾病的idyid是养生方法id
String sqlString = "insert into dtoy values("+did+","+yid+");";
try {
stat.executeUpdate(sqlString);
} catch (SQLException e) {
// TODO Auto-generated catch block
System.out.println("dtoy插入数据失败");
e.printStackTrace();
}
}
//插入关系ytod
public static void ytod(int yid,int did) {
String sqlString = "insert into ytod values("+yid+","+did+");";
try {
stat.executeUpdate(sqlString);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//获取某个表的个数
public static int countTable(String tablename) {
String sqlString = "select count(*) from "+tablename+";";
ResultSet resultSet = null;
try {
resultSet = stat.executeQuery(sqlString);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
if(resultSet.next()) return resultSet.getInt(1);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return -1;
}
//关闭数据库连接
public static void close() {
try {
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

@ -0,0 +1,37 @@
package Reptile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import javax.naming.ldap.Rdn;
import net.sf.json.JSONObject;
public class ReadUrlUtil {
public static JSONObject readJsonFromUrl(String urlString) throws MalformedURLException, IOException {
InputStream iStream = new URL(urlString).openStream();
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(iStream, Charset.forName("UTF-8")));
StringBuilder sBuilder = new StringBuilder();
int cp;
while((cp = reader.read())!=-1) {
sBuilder.append((char)cp);
}
String jsonText = sBuilder.toString();
JSONObject jsonObject = JSONObject.fromObject(jsonText);
return jsonObject;
} catch (Exception e) {
// TODO: handle exception
}finally {
iStream.close();
}
return null;
}
}

@ -0,0 +1,23 @@
package Reptile;
import java.util.ArrayList;
import java.util.Arrays;
public class perfix {
/*
*
:name
:function
:operation
disease
:syndromes
:position
symptom
Remarks
:taboo
* */
public static ArrayList<String> indexchinese = new ArrayList<String>(Arrays.asList("名字","作用","操作方法","疾病","证候","部位","症状","备注","禁忌"));
public static ArrayList<String> indexenglish = new ArrayList<String>(Arrays.asList("mname","mfunction","moperation","mdisease","msyndromes","mposition","msymptom","mremarks","mtaboo"));
}

@ -0,0 +1,97 @@
package Reptile;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.google.gson.JsonArray;
import com.mysql.cj.jdbc.result.ResultSetMetaData;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
public class text {
public static void main(String[] args) {
/*
* dochck
* */
}
public static void dochck(String keyworString) {
DBUtil.Connection();
String urlString = "http://www.tcmkb.cn/kg/health2_service.php?keywords="+keyworString;
JSONObject jsonObject = null;
try {
jsonObject = ReadUrlUtil.readJsonFromUrl(urlString);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int deseaseid = DBUtil.finddid(keyworString);
// 已经能够得出json数据
// 能够分理处nodes属性的内容d,id,label,type
JSONArray datasArray = jsonObject.getJSONObject("data").getJSONArray("nodes");
for(int i=1;i<datasArray.size();i++) {
//外部循环可以获取每一种养生方法
// 每个数组有typeidlabeldescription
JSONObject jo = (JSONObject) datasArray.get(i);
String nameString = jo.getString("id");//养生方法名
// String typeString = jo.getString("type");//关系:养生方法
// String labelString = jo.getString("label");//标签
String describtionString = jo.getString("description");
// 先将病症和养生方法关系建立起来
int yid = DBUtil.findyid(nameString); //这个返回要么是新的id要么是已经存在的yid如果已经存在的没必要重复插入这个养生方法,但是不管是不是新的did和yid的关系还是要建立
DBUtil.ytod(yid, deseaseid);
DBUtil.dtoy(deseaseid, yid);
if(DBUtil.isnewyid) {
//不存在就要先插入这个养生方法然后写入id并将其他信息搞进去。
// 接下来解析description就是有关某个养生方法的信息这些养生方法信息要插入到yangsheng表里
StringBuilder sqllindex = new StringBuilder("id,mname");
StringBuilder sqlvalue = new StringBuilder(yid+",'"+nameString+"'");
Document document = Jsoup.parse(describtionString);
Elements elements = document.select("p:has(font)");//该药的所有的p标签
for(Element element:elements) {
String element2 = element.select("font:has(strong)").first().text();//获取字段名
String eleString = element.text();
String valueString = eleString.substring(element2.length(), eleString.length());
if(valueString.startsWith(": ")) valueString = valueString.substring(2);
//valueString是字段对应的值
if(perfix.indexchinese.contains(element2)) {
//如果包含了,那么就包含进去
sqllindex = sqllindex.append(","+perfix.indexenglish.get(perfix.indexchinese.indexOf(element2)));
sqlvalue = sqlvalue.append(",'"+valueString+"'");
}//不包含就不要管了
}
//插入
DBUtil.insertintoY(sqllindex.toString(), sqlvalue.toString());
}
}
System.out.println("end");
DBUtil.close();
}
}

@ -0,0 +1,13 @@
package chinesemedicine.chinese;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
}
}

@ -0,0 +1,38 @@
package chinesemedicine.chinese;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}
/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}
/**
* Rigourous Test :-)
*/
public void testApp()
{
assertTrue( true );
}
}
Loading…
Cancel
Save