commit
8f3f24828c
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="C:/Program Files/Java/jdk1.7.0_25/lib/mysql-connector-java-5.0.5-bin.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>Ossean</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,48 @@
|
||||
import java.math.*;
|
||||
public class CalculateNDCG {
|
||||
public double cal_NDCG(String[] realSort,String[] ideaSort,int r){
|
||||
double nDCG=0.0;
|
||||
double DCG=0.0;
|
||||
double iDCG=0.0;
|
||||
int[] gain_score=gain(realSort, ideaSort, r);
|
||||
for(int i=0;i<realSort.length;i++){
|
||||
DCG+=gain_score[i]*Math.log(2)/Math.log(2+i);
|
||||
}
|
||||
int len_i = ideaSort.length;
|
||||
int n = len_i / r; // 每个等级的项目个数
|
||||
int lev=r+1;
|
||||
for(int i=0;i<len_i;i++){
|
||||
if(i%n==0)
|
||||
{
|
||||
lev--;
|
||||
}
|
||||
iDCG+=(Math.pow(2, lev)-1)*Math.log(2)/Math.log(2+i);
|
||||
}
|
||||
return nDCG=DCG/iDCG;
|
||||
}
|
||||
|
||||
int[] gain(String[] realSort, String[] ideaSort, int r) {
|
||||
int len_r = realSort.length;
|
||||
int len_i = ideaSort.length;
|
||||
int gain_score[] = new int[len_r];
|
||||
int n = len_i / r; // 每个等级的项目个数
|
||||
for (int i = 0; i < len_r; i++) {
|
||||
int loc = find_loc(realSort[i], ideaSort);
|
||||
if(loc==-1){
|
||||
gain_score[i]=0;
|
||||
}else {
|
||||
gain_score[i] = (int)Math.pow(2, r - loc / n)-1;
|
||||
}
|
||||
}
|
||||
return gain_score;
|
||||
}
|
||||
|
||||
int find_loc(String sub, String aim_strings[]) {
|
||||
for (int i = 0; i < aim_strings.length; i++) {
|
||||
if (sub == aim_strings[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package Sort;
|
||||
/**
|
||||
* 通过贴子的浏览量和活跃度统计帖子的热度
|
||||
* @author LiLy
|
||||
*
|
||||
*/
|
||||
public class PostsHot {
|
||||
double w1=0d;
|
||||
double w2=0d;
|
||||
|
||||
public PostsHot(double w1,double w2) {
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
public double cla_postHot(int viewCount,String lastActivityDate){
|
||||
return 0d;
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package Sort;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class SortWithPostsAndView {
|
||||
// public List<E> name() {
|
||||
//
|
||||
// }
|
||||
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
import java.io.*;
|
||||
|
||||
public class SplitHugeFile {
|
||||
public void splitFile(String filename, int linesCount) {
|
||||
try {
|
||||
BufferedReader reader=new BufferedReader(new FileReader(new File(filename)));
|
||||
int count=0;
|
||||
int file_count=0;
|
||||
String tempString=null;
|
||||
BufferedWriter writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++))));
|
||||
while((tempString=reader.readLine())!=null){
|
||||
writer.write(tempString+'\n');
|
||||
count++;
|
||||
if(count>=linesCount){
|
||||
count=0;
|
||||
writer.flush();
|
||||
writer.close();
|
||||
writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++))));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
writer.flush();
|
||||
writer.close();
|
||||
reader.close();
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
String writeFilename(String filename,int count){
|
||||
String wfilename=filename.substring(0, filename.length()-4);
|
||||
wfilename=wfilename+count;
|
||||
return wfilename+filename.substring(filename.length()-4, filename.length());
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
public class StrToDate {
|
||||
public Date strToDate(String str){
|
||||
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
Date date=null;
|
||||
try{
|
||||
date=sdf.parse(str);
|
||||
}catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return date;
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
import java.util.*;
|
||||
import java.text.*;
|
||||
|
||||
import support.ReadBigFile;
|
||||
public class Test {
|
||||
public static void main(String[] args){
|
||||
SplitTags st=new SplitTags("posts","id","tags","posts_split_tags","posts_id","tag");//标签分离
|
||||
st.splitTags();
|
||||
|
||||
// StrToDate std=new StrToDate();//字符串转日期
|
||||
// Date date=std.strToDate("1992-12-2 02:1:2");
|
||||
//// System.out.print(date);
|
||||
|
||||
// ReadBigFile readBigFile=new ReadBigFile();
|
||||
// readBigFile.readFile("C:\\Users\\lili\\Posts.xml");
|
||||
// (new SplitHugeFile()).splitFile("C:\\Users\\lili\\Posts.xml",2000000);//大文件分解,以行数为单位
|
||||
|
||||
|
||||
// String[] realSort={"aaa","ac","bc","g","d","ab","e"};
|
||||
// String[] ideaSort={"aaa","ab","ac","bc","d","e"};
|
||||
// System.out.println(new CalculateNDCG().cal_NDCG(realSort, ideaSort, 3));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package support;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
|
||||
|
||||
public class ConnectionSql {
|
||||
String driver="com.mysql.jdbc.Driver";
|
||||
String url;
|
||||
String user="root";
|
||||
String password="mysql";
|
||||
public ConnectionSql(String sqlName) {
|
||||
// TODO Auto-generated constructor stub
|
||||
url="jdbc:mysql://127.0.0.1:3306/"+sqlName+"?useUnicode=true&charsetEncoding=utf8&useCursorFetch=true&defaultFetchSize=100";
|
||||
// url="jdbc:mysql://127.0.0.1:3306/"+sqlName;
|
||||
}
|
||||
public Connection connection(){
|
||||
Connection conn=null;
|
||||
try{
|
||||
Class.forName(driver);
|
||||
conn=DriverManager.getConnection(url, user, password);
|
||||
}catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
return conn;
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package support;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class DataHandle {
|
||||
/**
|
||||
*
|
||||
* @param śÔdataÁĐąí˝řĐĐFrobeniusˇśĘýšéŇťťŻ
|
||||
* @return
|
||||
*/
|
||||
public static List<Double> frobenius(List<Double> data) {
|
||||
Double sum_square=0d;
|
||||
Iterator<Double> iterator=data.iterator();
|
||||
while(iterator.hasNext()){
|
||||
double datai=iterator.next();
|
||||
sum_square+=datai*datai;
|
||||
}
|
||||
double frob_num= Math.pow(sum_square, 0.5);
|
||||
List<Double> frob_list=new ArrayList<Double>();
|
||||
iterator=data.listIterator();
|
||||
|
||||
while(iterator.hasNext()){
|
||||
frob_list.add(iterator.next()/frob_num);
|
||||
}
|
||||
return frob_list;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -0,0 +1,71 @@
|
||||
package support;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ReadBigFile extends Thread{
|
||||
String filename;
|
||||
public ReadBigFile(String filename) {
|
||||
// TODO Auto-generated constructor stub
|
||||
this.filename=filename;
|
||||
}
|
||||
public void insertSql(List<String> sqls)
|
||||
{
|
||||
Connection con=new ConnectionSql("ossean_db").connection();
|
||||
|
||||
try{
|
||||
con.setAutoCommit(false);
|
||||
Statement st=con.createStatement();
|
||||
for(String s:sqls)
|
||||
{
|
||||
st.addBatch(s);
|
||||
}
|
||||
st.executeBatch();
|
||||
st.close();
|
||||
con.commit();
|
||||
con.close();
|
||||
}catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
public void readFile() {
|
||||
File file = new File(filename);
|
||||
BufferedReader reader = null;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(file), 20 * 1024 * 1024);
|
||||
String tempString = null;
|
||||
int count = 1;
|
||||
List<String> sqls=new ArrayList<String>();
|
||||
while ((tempString = reader.readLine()) != null) {
|
||||
// tempString=URLEncoder.encode(tempString, "utf-8");
|
||||
sqls.add(tempString);
|
||||
if(count++%1000==0){
|
||||
insertSql(sqls);
|
||||
sqls.clear();
|
||||
System.out.println(count);
|
||||
}
|
||||
}
|
||||
insertSql(sqls);
|
||||
reader.close();
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
}
|
||||
}
|
||||
public void run() {
|
||||
readFile();
|
||||
}
|
||||
public static void main(String[] args) {
|
||||
String path="C:\\Users\\lili\\Desktop\\sql\\";
|
||||
new ReadBigFile(path+"oschina_project.sql").run();
|
||||
}
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
package support;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.*;
|
||||
import java.lang.Math;
|
||||
public class Similarity extends Thread{
|
||||
public Double cla_EX(List<Double> data) {
|
||||
int len = data.size();
|
||||
Double ex = 0d;
|
||||
Iterator<Double> iterator = data.iterator();
|
||||
Double sum = 0d;
|
||||
while (iterator.hasNext()) {
|
||||
sum += iterator.next();
|
||||
}
|
||||
return ex = sum / len;
|
||||
}
|
||||
public Double cla_sim(List<Double> data1, List<Double> data2) {
|
||||
int len = data1.size();
|
||||
List<Double> data3 = new ArrayList<Double>();
|
||||
List<Double> data4 = new ArrayList<Double>();
|
||||
List<Double> data5 = new ArrayList<Double>();
|
||||
Iterator<Double> iterator1 = data1.iterator();
|
||||
Iterator<Double> iterator2 = data2.iterator();
|
||||
while (iterator1.hasNext()) {
|
||||
Double temp = iterator1.next();
|
||||
data3.add(temp * temp);
|
||||
}
|
||||
while (iterator2.hasNext()) {
|
||||
Double temp = iterator2.next();
|
||||
data4.add(temp * temp);
|
||||
}
|
||||
iterator1 = data1.iterator();
|
||||
iterator2 = data2.iterator();
|
||||
while (iterator1.hasNext()) {
|
||||
data5.add(iterator1.next() * iterator2.next());
|
||||
}
|
||||
Double E_data1 = cla_EX(data1);
|
||||
Double E_data2 = cla_EX(data2);
|
||||
Double E_data3 = cla_EX(data3);
|
||||
Double E_data4 = cla_EX(data4);
|
||||
Double E_data5 = cla_EX(data5);
|
||||
return (E_data5 - E_data1 * E_data2)
|
||||
/ (Math.pow(E_data3 - E_data1 * E_data1, 0.5) * Math.pow(
|
||||
E_data4 - E_data2 * E_data2, 0.5));
|
||||
}
|
||||
public void operaterSql(String dataBaseName,String tablename,String column1,String column2) {
|
||||
Connection conn=new ConnectionSql(dataBaseName).connection();
|
||||
try {
|
||||
Statement st=conn.createStatement();
|
||||
String sql="select "+column1+" from "+tablename;
|
||||
ResultSet rs=st.executeQuery(sql);
|
||||
List<Double> data1=new ArrayList<Double>();
|
||||
List<Double> data2=new ArrayList<Double>();
|
||||
while (rs.next()) {
|
||||
double temp=rs.getDouble(1);
|
||||
data1.add(temp);
|
||||
}
|
||||
sql="select "+column2+" from "+tablename;
|
||||
rs=st.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
double temp=rs.getDouble(1);
|
||||
data2.add(temp);
|
||||
}
|
||||
|
||||
Double similarity=cla_sim(data1,data2);
|
||||
System.out.println(similarity);
|
||||
tablename="'"+tablename+"'";
|
||||
column1="'"+column1+"'";
|
||||
column2="'"+column2+"'";
|
||||
sql="replace into similary_record(table_name,column1,column2,similarity) values ("+tablename+","+column1+","+column2+","+similarity+")";
|
||||
st.execute(sql);
|
||||
conn.close();
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
public static void main(String[] args) {
|
||||
Similarity similarity=new Similarity();
|
||||
// similarity.operaterSql("ossean_db","sourceforge_project","stars","downloadCount");
|
||||
|
||||
// similarity.operaterSql("ossean_db","match_simple","db_score","postsCount");
|
||||
// similarity.operaterSql("ossean_db","match_simple","db_score","score");
|
||||
// similarity.operaterSql("ossean_db","match_simple","db_score","favoriteCount");
|
||||
// similarity.operaterSql("ossean_db","match_simple","db_score","viewCount");
|
||||
// similarity.operaterSql("ossean_db","match_simple","db_score","answerCount");
|
||||
similarity.operaterSql("ossean_db","match_simple","postsCount","viewCount");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package support;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -0,0 +1,78 @@
|
||||
package update;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.Date;
|
||||
|
||||
import support.ConnectionSql;
|
||||
public class UpdateStar extends Thread{
|
||||
String tablename;
|
||||
String columnname="rank";
|
||||
int levs=6;
|
||||
public UpdateStar(String tablename) {
|
||||
// TODO Auto-generated constructor stub
|
||||
this.tablename=tablename;
|
||||
}
|
||||
public UpdateStar(String tablename,String columnname) {
|
||||
// TODO Auto-generated constructor stub
|
||||
this.tablename=tablename;
|
||||
this.columnname=columnname;
|
||||
}
|
||||
// public double cal_star(int count,int maxCount) {
|
||||
// double star=0d;
|
||||
// double step=(double)maxCount/6;
|
||||
// star=count/step;
|
||||
// return star;
|
||||
// }
|
||||
public void operateSql() {
|
||||
Connection conn = new ConnectionSql("ossean_db").connection();
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
|
||||
String sql1="select max("+columnname+") from "+tablename;
|
||||
int count=0;
|
||||
Statement st=conn.createStatement();
|
||||
ResultSet rs= st.executeQuery(sql1); //查询记录的数,确定排名的数据量
|
||||
if(rs.next()){count=rs.getInt(1)/levs;} //确定每个级别数据量
|
||||
sql1="select "+columnname+" from "+tablename+" order by "+columnname;
|
||||
rs=st.executeQuery(sql1);//查询需要参考的数据排名结果
|
||||
String updataSql="update "+tablename+" set stars=? where "+columnname+"=?";
|
||||
// String updataSql="update "+tablename+" set stars=?";
|
||||
PreparedStatement ps=conn.prepareStatement(updataSql);
|
||||
System.out.println(tablename+" start!");
|
||||
while (rs.next()) {
|
||||
int num=rs.getInt(1);
|
||||
ps.setInt(1, num/count);
|
||||
ps.setInt(2, num);
|
||||
ps.addBatch();//添加更新stars语句
|
||||
if(num%10000==0)
|
||||
{
|
||||
ps.executeBatch();
|
||||
ps.clearBatch();
|
||||
}
|
||||
}
|
||||
ps.executeBatch();
|
||||
ps.clearBatch();
|
||||
conn.commit();
|
||||
conn.close();
|
||||
System.out.println(tablename+" down!");
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
public void run() {
|
||||
operateSql();
|
||||
}
|
||||
public static void main(String[] args) {
|
||||
|
||||
UpdateStar updateStar1=new UpdateStar("tag");
|
||||
// updateStar1.run();
|
||||
UpdateStar updateStar2=new UpdateStar("oschina_project");
|
||||
updateStar2.run();
|
||||
UpdateStar updateStar3=new UpdateStar("openhub_project");
|
||||
updateStar3.run();
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package update;
|
||||
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
|
||||
import support.ConnectionSql;
|
||||
/**
|
||||
* 统计tag表中tag标签在posts中关联帖子的个数并更新postsNum字段
|
||||
* @author LiLy
|
||||
*
|
||||
*/
|
||||
public class UpdateTagPostsNum {
|
||||
public void countPostsNum() throws SQLException{
|
||||
Connection conn = new ConnectionSql("ossean_db").connection();
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
Statement st=conn.createStatement();
|
||||
String sqlSelect="select tag from tag";
|
||||
ResultSet rs=st.executeQuery(sqlSelect);
|
||||
Statement stm=conn.createStatement();
|
||||
int flag=0;
|
||||
while(rs.next())
|
||||
{
|
||||
String tag=rs.getString("tag");
|
||||
tag="'"+tag+"'";
|
||||
sqlSelect="select count(tag) from posts_split_tags where tag="+tag;
|
||||
|
||||
ResultSet rscount=stm.executeQuery(sqlSelect);
|
||||
if(rscount.next())
|
||||
{
|
||||
String sql="update tag set tag.postsNum="+rscount.getInt(1)+" where tag.tag= "+tag;
|
||||
stm.execute(sql);
|
||||
}
|
||||
if(flag++%1000==0){System.out.println(flag);}
|
||||
}
|
||||
System.out.println("Down!");
|
||||
conn.commit();
|
||||
conn.close();
|
||||
} catch (Exception e) {
|
||||
// TODO: handle exception
|
||||
conn.rollback();
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
public static void main(String[] args) throws SQLException{
|
||||
UpdateTagPostsNum updateTagPosts=new UpdateTagPostsNum();
|
||||
updateTagPosts.countPostsNum();
|
||||
}
|
||||
}
|
Loading…
Reference in new issue