first commit

master
unknown 10 years ago
commit 8f3f24828c

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="C:/Program Files/Java/jdk1.7.0_25/lib/mysql-connector-java-5.0.5-bin.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Ossean</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,16 @@
StatisticsPostsCount.java:统计每个项目对应的帖子信息(对应的帖子数,帖子的总回复数,获赞数,收藏数,浏览数:(psotsCount,answerCount,score,favoriteCount,viewCount)
到并更新原表的对应项中。
主要方法cal_count从match_simple读出项目和匹配的标签根据标签在split_posts_tags表找出posts_id统计每个posts的count。
Update_match_result_proj_star.java: 更新match_result中的proj_stars用于计算项目在托管社区的热度平均值。
主要类Update_match_result_proj_star
主要方法: setProj_star() 操作update语句更新结果
getAVGStar(String[] onlySource, String name)返回onlySource表中name项目的stars的平均值
V:定义一些用到的常量
UpdateMatch_result定义一些方法
方法sourceFormat(String source)由source还原项目的来源表名返回一个该项目来源的String[]还原每个项目来源表并去掉FreeCode因为该网站项目数据没有可量化项
getSourceName(int id)由项目id获得从open_source_projects获得项目名称和项目源
UpdateRank.java:多线程算法给定一表名和表名一列通过这一列对记录排序并更新rank列座位名次rank列名设置更改构造函数即可
Update_match_result_rank更新match_result排名若相同同时存在在多个社区取排名最小的最为项目排名
UpdateTagPosts.java:方法更新tag表中每个tag关联的posts个数postsNum。
方法countPostsNum实现功能

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,48 @@
import java.math.*;
public class CalculateNDCG {
public double cal_NDCG(String[] realSort,String[] ideaSort,int r){
double nDCG=0.0;
double DCG=0.0;
double iDCG=0.0;
int[] gain_score=gain(realSort, ideaSort, r);
for(int i=0;i<realSort.length;i++){
DCG+=gain_score[i]*Math.log(2)/Math.log(2+i);
}
int len_i = ideaSort.length;
int n = len_i / r; // 每个等级的项目个数
int lev=r+1;
for(int i=0;i<len_i;i++){
if(i%n==0)
{
lev--;
}
iDCG+=(Math.pow(2, lev)-1)*Math.log(2)/Math.log(2+i);
}
return nDCG=DCG/iDCG;
}
int[] gain(String[] realSort, String[] ideaSort, int r) {
int len_r = realSort.length;
int len_i = ideaSort.length;
int gain_score[] = new int[len_r];
int n = len_i / r; // 每个等级的项目个数
for (int i = 0; i < len_r; i++) {
int loc = find_loc(realSort[i], ideaSort);
if(loc==-1){
gain_score[i]=0;
}else {
gain_score[i] = (int)Math.pow(2, r - loc / n)-1;
}
}
return gain_score;
}
int find_loc(String sub, String aim_strings[]) {
for (int i = 0; i < aim_strings.length; i++) {
if (sub == aim_strings[i]) {
return i;
}
}
return -1;
}
}

@ -0,0 +1,76 @@
/*
*
*/
/*
* xml
*/
class Fields{
public static String fields[]={"Id","PostTypeId","AcceptedAnswerId",
"CreationDate","Score","ViewCount",
"Body","OwnerUserId","LastEditorUserId",
"LastEditorDisplayName","LastEditDate",
"LastActivityDate","Title","Tags",
"AnswerCount","CommentCount","FavoriteCount",
"CommunityOwnedDate","ParentId","ClosedDate",
"OwnerDisplayName"};
public static byte fieldsType[]={
0,0,0,
1,0,0,
1,0,0,
1,1,
1,1,1,
0,0,0,
1,0,0,
1};
}
public class ControlFormat {
public String[] changeFormat(String record){
if(record==null){return null;}
// String record1[]=(record.substring(6,record.length()-3)).split(" ");
record=record.substring(7, record.length()-3);
/*
* recordBody[]={record except Body,Body}
*/
String recordBody[]=handleBody(record);
String record1[]=recordBody[0].split(" ");
int len=record1.length;
String record2[];
String realFields[]=new String[21];
for(int j=0;j<len;j++)
{
record2=record1[j].split("=");
for(int i=j;i<21;i++)
{
if(record2[0]==Fields.fields[i])
{
// System.out.print(record2[0]);
realFields[i]=record2[1];
break;
}
else {
realFields[i]="null";
}
}
}
realFields[6]=recordBody[1];
for(int j=len;j<21;j++)
{
realFields[j]="null";
}
return realFields;
}
String[] handleBody(String record){
// byte st[]=new byte[100];
// char recordChar[]=record.toCharArray();
int loc_B=record.indexOf("Body=");
int locAfter_B=record.indexOf("OwnerUserId=");
String body=record.substring(loc_B,locAfter_B-1);
String temprecord[]=record.split(body);
record=temprecord[0]+temprecord[1];
String returnRecord[]={record,body};
return returnRecord;
}
}

@ -0,0 +1,84 @@
/*
*
*/
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Statement;
import java.io.*;
import support.ConnectionSql;
public class InsertStrings {
// String[] realFields;
// public InsertStrings(String[] realFields) {
// // TODO Auto-generated constructor stub
// this.realFields=realFields;
// }
/*
*
*/
public void insertStrings(String filename,String tableInfor[]){
String sqlInsert = "insert into "+tableInfor[0]+" (";
String tempString;
int len=tableInfor.length;
int count=0;
for(int i=1;i<len-2;i++)
{
sqlInsert=sqlInsert+tableInfor[i]+",";
}
sqlInsert=sqlInsert+tableInfor[len-1]+") values(";
for(int i=1;i<len-2;i++)
{
sqlInsert=sqlInsert+"?,";
}
sqlInsert=sqlInsert+"?)";
try {
BufferedReader reader = new BufferedReader(new FileReader(new File(filename)), 20 * 1024 * 1024);
Connection conn = new ConnectionSql("ossean").connection();
PreparedStatement ps = conn.prepareStatement(sqlInsert);
reader.readLine();reader.readLine();
while((tempString=reader.readLine())!=null)
{
// System.out.print(tempString);
String readlFields[]=new ControlFormat().changeFormat(tempString);
for (int i = 1; i <len-1; i++) {
if(Fields.fieldsType[i]==0)
{
/*
* debug
*/
// System.out.print(readlFields[i]);
if(readlFields[i]!="null")
{
ps.setInt(i, Integer.parseInt(readlFields[i]));
}
else{
ps.setString(i, readlFields[i]);
}
}
else {
ps.setString(i, readlFields[i]);
}
ps.addBatch();
}
if (count++ > 1000) {
ps.executeBatch();
ps.clearBatch();
count = 0;
}
}
ps.executeBatch();
ps.clearBatch();
ps.close();
conn.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}

@ -0,0 +1,105 @@
package Sort;
import java.sql.*;
import java.util.*;
import support.ConnectionSql;
import update.UpdateRank;
/**
* postsCountviewCountscorefavoriteCount
* @author LiLy
*
*/
class OperateData{
Connection conn;
String tablename="match_simple";
String proj_name;
int postsCount;
int score;
int viewCount;
int answerCount;
int favoriteCount;
OperateData(){
}
public OperateData(String proj_name) {
// TODO Auto-generated constructor stub
conn=new ConnectionSql("ossean_db").connection();
this.proj_name="'"+proj_name+"'";
try {
Statement st=conn.createStatement();
String sql="select postsCount,score,viewCount,answerCount,favoriteCount from "+tablename+" where proj_name="+this.proj_name;
ResultSet rs=st.executeQuery(sql);
if(rs.next()){
postsCount=rs.getInt(1);
score=rs.getInt(2);
viewCount=rs.getInt(3);
answerCount=rs.getInt(4);
favoriteCount=rs.getInt(5);
}
st.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public boolean updateData(String column_allCount,int allCount) {
String sql="update "+tablename+" set "+column_allCount+" = "+allCount+" where proj_name="+proj_name;
try {
Statement st=conn.createStatement();
st.execute(sql);
st.close();
return true;
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
return false;
}
}
}
public class FirstSort {
public List<String> getAll_proj_names(String tablename) {
List<String> proj_names=new ArrayList<String>();
Connection conn=new ConnectionSql("ossean_db").connection();
try {
Statement st=conn.createStatement();
String sql="select proj_name from "+tablename;
ResultSet rs=st.executeQuery(sql);
while(rs.next()){
proj_names.add(rs.getString(1));
}
st.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return proj_names;
}
public void allCountSort() throws SQLException {
int allCount=0;
List<String> proj_names=getAll_proj_names("match_simple");
Iterator<String> iterator=proj_names.iterator();
while(iterator.hasNext()){
String proj_name=iterator.next();
OperateData opData=new OperateData(proj_name);
allCount=opData.answerCount+opData.postsCount+opData.score+opData.favoriteCount+opData.viewCount;
opData.updateData("allCount",allCount);
}
UpdateRank updateRank=new UpdateRank("match_simple", "allCount");
updateRank.operateSql();
}
public static void main(String[] args) {
try {
new FirstSort().allCountSort();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

@ -0,0 +1,17 @@
package Sort;
/**
*
* @author LiLy
*
*/
public class PostsHot {
double w1=0d;
double w2=0d;
public PostsHot(double w1,double w2) {
// TODO Auto-generated constructor stub
}
public double cla_postHot(int viewCount,String lastActivityDate){
return 0d;
}
}

@ -0,0 +1,215 @@
package Sort;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.sun.corba.se.impl.encoding.OSFCodeSetRegistry.Entry;
import support.ConnectionSql;
import support.DateHandle;
import update.UpdateRank;
/**
*
* @author LiLy
*
*/
public class SortWithNewpost {
String column="newPostsCount";
String tablename="simple_test_newPostsCount";
Connection conn=new ConnectionSql("ossean_db").connection();
String date;
public SortWithNewpost(String date) {
// TODO Auto-generated constructor stub
this.date=date;
}
public static void main(String[] args) {
SortWithNewpost sort=new SortWithNewpost("2014-08-31 00:00:00");
sort.start();
}
/**
* newPostCountRank_newPostsCount
*/
public void start() {
Map<String, Integer> count=getnewPostsCount();
updateTable(conn, tablename, "newPostsCount", "DBMS", count);
UpdateRank updateRank=new UpdateRank(tablename, "newPostsCount","rank_newPostsCount");
updateRank.run();
}
/**
* Map
* @param conn
* @param tablename
* @param updatecolumn
* @param conditionColumn
* @param count
*/
public void updateTable(Connection conn,String tablename,String updatecolumn,String conditionColumn,Map<String, Integer> count) {
String sql="update "+tablename+" set "+updatecolumn+"="+"? where "+conditionColumn+" = ?";
try {
conn.setAutoCommit(false);
PreparedStatement ps=conn.prepareStatement(sql);
int flag=1;
Iterator<Map.Entry<String, Integer>> iterator=count.entrySet().iterator();
System.out.println("start!");
while(iterator.hasNext()){
Map.Entry<String, Integer> entry=iterator.next();
String key=entry.getKey();
int value=(int)entry.getValue();
ps.setInt(1, value);
ps.setString(2, key);
ps.addBatch();
if(flag++%1000==0){
ps.executeBatch();
ps.clearBatch();
System.out.println("update: "+flag);
}
}
ps.executeBatch();
ps.clearBatch();
ps.close();
conn.commit();
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* tablename
* @return
*/
public Map<String, Integer> getnewPostsCount() {
Map<String,Integer> count=new HashMap<String,Integer>();
try {
conn.setAutoCommit(false);
ResultSet rs1;
ResultSet rs2;
ResultSet rs3;
Statement st1=conn.createStatement();
Statement st2=conn.createStatement();
Statement st3=conn.createStatement();
String projName;
int tag_id;
String sql;
int tempCount=0;
/**
*
*/
sql="create temporary table temp select tag_id,lastActivityDate from posts_split_tags where lastActivityDate > '"+date+"'";
st1.execute(sql);
sql="alter table temp add index index_tag_id (tag_id)";
st1.execute(sql);
conn.commit();
System.out.println("temp table create completement!");
sql="select DBMS from simple_test_newpostscount";
rs1=st1.executeQuery(sql);
while(rs1.next()){
tempCount=0;
projName=rs1.getString(1);
sql="select tag_id from simple_test_newpostscount_match where DBMS= '"+projName+"'";
rs3=st3.executeQuery(sql);
while(rs3.next())
{
tag_id=rs3.getInt(1);
sql="select count(*) from temp where tag_id = "+tag_id;
rs2=st2.executeQuery(sql);
while(rs2.next()){
tempCount+=rs2.getInt(1);
}
}
count.put(projName, tempCount);
System.out.println(projName);
}
st2.close();
st1.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return count;
}
/**
* ResultSet
* @param conn
* @param tablename
* @param column
* @param conditionColumn
* @param condition
* @return
* @throws SQLException
*/
public ResultSet getData(Connection conn,String tablename,String column,String conditionColumn,String condition){
String sql;
ResultSet rs=null;
if(!conditionColumn.isEmpty())
{
sql="select "+column+" from "+tablename+" where "+conditionColumn+"="+"'"+condition+"'";
}
else {
sql="select "+column+" from "+tablename;
}
try {
Statement st=conn.createStatement();
rs=st.executeQuery(sql);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
// st.close();
return rs;
}
/**
* ResultSet
* @param conn
* @param tablename
* @param column
* @param conditionColumn
* @param condition
* @return
* @throws SQLException
*/
public ResultSet getData(Connection conn,String tablename,String column,String conditionColumn,int condition) {
String sql;
ResultSet rs=null;
if(!conditionColumn.isEmpty())
{
sql="select "+column+" from "+tablename+" where "+conditionColumn+"="+condition;
}
else {
sql="select "+column+" from "+tablename;
}
try {
Statement st=conn.createStatement();
rs=st.executeQuery(sql);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return rs;
}
}

@ -0,0 +1,12 @@
package Sort;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class SortWithPostsAndView {
// public List<E> name() {
//
// }
}

@ -0,0 +1,263 @@
package Sort;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import support.ConnectionSql;
import support.DateHandle;
import update.UpdateRank;
public class SortWithTimeModel {
String column = "newPostsCount";
String tablename = "simple_test_newPostsCount";
Connection conn = new ConnectionSql("ossean_db").connection();
String date;
int n;
DateHandle dateHandle;
public SortWithTimeModel(String date, int n) {
// TODO Auto-generated constructor stub
this.date = date;
this.n = n;
dateHandle = new DateHandle(date, n);
}
public static void main(String[] args) {
SortWithTimeModel sort = new SortWithTimeModel("2014-09-31 00:00:00",
12);
sort.start();
}
/**
* newPostCountRank_newPostsCount
*/
public void start() {
Map<String, Double> count = getnewPostsCountTime();
updateTable(conn, tablename, "postsCountTime", "DBMS", count);
UpdateRank updateRank = new UpdateRank(tablename, "postsCountTime",
"rank_PostsCountTime");
updateRank.run();
}
/**
* Map
*
* @param conn
* @param tablename
* @param updatecolumn
* @param conditionColumn
* @param count
*/
public void updateTable(Connection conn, String tablename,
String updatecolumn, String conditionColumn,
Map<String, Double> count) {
String sql = "update " + tablename + " set " + updatecolumn + "="
+ "? where " + conditionColumn + " = ?";
try {
conn.setAutoCommit(false);
PreparedStatement ps = conn.prepareStatement(sql);
int flag = 1;
Iterator<Map.Entry<String, Double>> iterator = count.entrySet()
.iterator();
System.out.println("start!");
while (iterator.hasNext()) {
Map.Entry<String, Double> entry = iterator.next();
String key = entry.getKey();
double value = (Double) entry.getValue();
ps.setDouble(1, value);
ps.setString(2, key);
ps.addBatch();
if (flag++ % 1000 == 0) {
ps.executeBatch();
ps.clearBatch();
System.out.println("update: " + flag);
}
}
ps.executeBatch();
ps.clearBatch();
ps.close();
conn.commit();
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* gen
*
* @param lev
* @return
*/
public double getGen(int lev) {
return (double) lev / (lev + 1);
}
/**
* tablename
*
* @return
*/
public Map<String, Double> getnewPostsCountTime() {
Map<String, Double> count = new HashMap<String, Double>();
try {
conn.setAutoCommit(false);
ResultSet rs1;
Statement st1;
String projName;
String sql;
DateHandle dateHandle = new DateHandle();
/**
* Map count
*/
sql = "select DBMS from simple_test_newpostscount";
st1 = conn.createStatement();
rs1 = st1.executeQuery(sql);
while (rs1.next()) {
count.put(rs1.getString(1), 0d);
}
rs1.close();
st1.close();
/**
* JDBC
*/
sql = "create table temp "
+ "SELECT p.id as id,s.DBMS as DBMS,s.tag_id as tag_id,p.lastActivityDate "
+ "from simple_test_newpostscount_match as s "
+ "INNER JOIN posts_split_tags as p ON s.tag_id=p.tag_id";
st1 = conn.createStatement();
st1.execute(sql);
sql = "alter table temp add index index_id (id)";
st1.execute(sql);
conn.commit();
st1.close();
System.out.println("temp table create completement!");
int page_count;
int page_size;
String lastActivityDate;
int lev = 0;
double gen = 0.0;
boolean isEmpty;
/**
*
*/
page_count = 0; // 设置页大小和页码
page_size = 1000;
sql = "select DBMS,lastActivityDate from temp where id>(select id from temp order by id limit ?,1) limit ?";
PreparedStatement ps = conn.prepareStatement(sql);
do {
ps.setInt(1, page_count * page_size);
ps.setInt(2, page_size);
rs1 = ps.executeQuery();
isEmpty = true;
while (rs1.next()) {
isEmpty = false;
projName = rs1.getString(1);
lastActivityDate = rs1.getString(2);
lev = dateHandle.levDate(lastActivityDate);
gen = getGen(lev);
count.put(projName, count.get(projName) + gen);
}
if (isEmpty) {
break;
}
++page_count ;
System.out.print(new Date());
System.out.println("\t\t" + page_count);
} while (true);
rs1.close();
ps.close();
sql="drop table temp";
st1=conn.createStatement();
st1.execute(sql);
st1.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return count;
}
/**
* ResultSet
*
* @param conn
* @param tablename
* @param column
* @param conditionColumn
* @param condition
* @return
* @throws SQLException
*/
public ResultSet getData(Connection conn, String tablename, String column,
String conditionColumn, String condition) {
String sql;
ResultSet rs = null;
if (!conditionColumn.isEmpty()) {
sql = "select " + column + " from " + tablename + " where "
+ conditionColumn + "=" + "'" + condition + "'";
} else {
sql = "select " + column + " from " + tablename;
}
try {
Statement st = conn.createStatement();
rs = st.executeQuery(sql);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
// st.close();
return rs;
}
/**
* ResultSet
*
* @param conn
* @param tablename
* @param column
* @param conditionColumn
* @param condition
* @return
* @throws SQLException
*/
public ResultSet getData(Connection conn, String tablename, String column,
String conditionColumn, int condition) {
String sql;
ResultSet rs = null;
if (!conditionColumn.isEmpty()) {
sql = "select " + column + " from " + tablename + " where "
+ conditionColumn + "=" + condition;
} else {
sql = "select " + column + " from " + tablename;
}
try {
Statement st = conn.createStatement();
rs = st.executeQuery(sql);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return rs;
}
}

@ -0,0 +1,35 @@
import java.io.*;
public class SplitHugeFile {
public void splitFile(String filename, int linesCount) {
try {
BufferedReader reader=new BufferedReader(new FileReader(new File(filename)));
int count=0;
int file_count=0;
String tempString=null;
BufferedWriter writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++))));
while((tempString=reader.readLine())!=null){
writer.write(tempString+'\n');
count++;
if(count>=linesCount){
count=0;
writer.flush();
writer.close();
writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++))));
continue;
}
}
writer.flush();
writer.close();
reader.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
String writeFilename(String filename,int count){
String wfilename=filename.substring(0, filename.length()-4);
wfilename=wfilename+count;
return wfilename+filename.substring(filename.length()-4, filename.length());
}
}

@ -0,0 +1,159 @@
import java.sql.*;
import java.util.*;
import java.util.Date;
import support.ConnectionSql;
import update.UpdateTableColumn;
public class SplitTags {
String source_table;
String source_column1;
String source_column2;
String des_table;
String des_column1;
String des_column2;
String postTypeId;
public SplitTags() {
this.source_table = "posts";
this.source_column1 = "id";
this.source_column2 = "tags";
this.des_table = "posts_split_tags";
this.des_column1 = "posts_id";
this.des_column2 = "tag";
this.postTypeId="postTypeId";
}
public SplitTags(String source_table, String source_column1,
String source_column2, String des_table, String des_column1,
String des_column2) {
this.source_table = source_table;
this.source_column1 = source_column1;
this.source_column2 = source_column2;
this.des_table = des_table;
this.des_column1 = des_column1;
this.des_column2 = des_column2;
}
public static void main(String[] args){
SplitTags st=new SplitTags("posts","id","tags","posts_split_tags","posts_id","tag");//标签分离
st.splitTags();
}
public int breakTest(Connection conn, String break_sql) {
int lastId = 0;
try {
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(break_sql);
if (rs.next()) {
lastId = rs.getInt(des_column1);
String del_sqlString="delete from "+des_table+" where " + des_column1 + "="+ lastId;
st.execute(del_sqlString);
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return lastId;
}
public void splitTags() {
try {
Connection conn = new ConnectionSql("ossean_db").connection();
/*
*
*/
int page_count = 0; // 设置页大小和页码
int page_size = 100000;
int id = 0; // 获得需要插入的数据
conn.setAutoCommit(false);//设置手动提交事务
String tags;
/*
*
*/
String break_sql = "select "
+ des_column1 // 断点查询
+ " from " + des_table + " order by " + des_column1
+ " desc limit 0,1";
int breakPoint = breakTest(conn, break_sql);
/*
*
*/
System.out.print(new Date());
System.out.println("\t\tstart" );
/*
*
String sql = "select " + source_column1 + "," + source_column2
+ " from " + source_table + " where " + source_column1
+ " >= " + breakPoint + " limit ?,?"; // 查询语句
*/
String sql;
if(breakPoint>0){
sql = "select " + source_column1 + "," + source_column2
+ " from " + source_table + " where "+ source_column1+
" >(select id from "+source_table+" order by "+ source_column1+" limit ?,1) and "+
source_column1+ " >= " + breakPoint + " limit ?"; // 查询语句
}
else{
sql = "select " + source_column1 + "," + source_column2
+ " from " + source_table + " where "+ source_column1+
" >(select id from "+source_table+" order by "+ source_column1+" limit ?,1) limit ?";// 查询语句
}
String sqlInsert = " insert into " + des_table + " (" + des_column1
+ "," + des_column2 + ") values(?,?)"; // 插入语句
PreparedStatement p_statement = conn.prepareStatement(sql);
PreparedStatement ps = conn.prepareStatement(sqlInsert);
int count = 0;
do {
p_statement.setInt(1, page_count * page_size);
p_statement.setInt(2, page_size);
ResultSet rs = p_statement.executeQuery();
boolean isEmpty = true;
while (rs.next()) {
isEmpty = false;
id = rs.getInt(source_column1);
tags = rs.getString(source_column2);
if (tags == null)
continue;
tags = tags.substring(1, tags.length() - 1);
String tag[] = tags.split("><");
for (int i = 0; i < tag.length; i++) {
ps.setInt(1, id);
ps.setString(2, tag[i]);
ps.addBatch();
}
if (count++ > 10000) {
ps.executeBatch();
ps.clearBatch();
conn.commit();
count = 0;
}
}
if (isEmpty) {
break;
}
page_count++;
/*
*
*/
System.out.print(new Date());
System.out.println("\t\t" + page_count);
} while (true);
ps.executeBatch();
ps.clearBatch();
p_statement.close();
conn.commit();
conn.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

@ -0,0 +1,14 @@
import java.text.SimpleDateFormat;
import java.util.*;
public class StrToDate {
public Date strToDate(String str){
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date date=null;
try{
date=sdf.parse(str);
}catch (Exception e) {
e.printStackTrace();
}
return date;
}
}

@ -0,0 +1,26 @@
import java.util.*;
import java.text.*;
import support.ReadBigFile;
public class Test {
public static void main(String[] args){
SplitTags st=new SplitTags("posts","id","tags","posts_split_tags","posts_id","tag");//标签分离
st.splitTags();
// StrToDate std=new StrToDate();//字符串转日期
// Date date=std.strToDate("1992-12-2 02:1:2");
//// System.out.print(date);
// ReadBigFile readBigFile=new ReadBigFile();
// readBigFile.readFile("C:\\Users\\lili\\Posts.xml");
// (new SplitHugeFile()).splitFile("C:\\Users\\lili\\Posts.xml",2000000);//大文件分解,以行数为单位
// String[] realSort={"aaa","ac","bc","g","d","ab","e"};
// String[] ideaSort={"aaa","ab","ac","bc","d","e"};
// System.out.println(new CalculateNDCG().cal_NDCG(realSort, ideaSort, 3));
}
}

@ -0,0 +1,16 @@
StatisticsPostsCount.java:统计每个项目对应的帖子信息(对应的帖子数,帖子的总回复数,获赞数,收藏数,浏览数:(psotsCount,answerCount,score,favoriteCount,viewCount)
到并更新原表的对应项中。
主要方法cal_count从match_simple读出项目和匹配的标签根据标签在split_posts_tags表找出posts_id统计每个posts的count。
Update_match_result_proj_star.java: 更新match_result中的proj_stars用于计算项目在托管社区的热度平均值。
主要类Update_match_result_proj_star
主要方法: setProj_star() 操作update语句更新结果
getAVGStar(String[] onlySource, String name)返回onlySource表中name项目的stars的平均值
V:定义一些用到的常量
UpdateMatch_result定义一些方法
方法sourceFormat(String source)由source还原项目的来源表名返回一个该项目来源的String[]还原每个项目来源表并去掉FreeCode因为该网站项目数据没有可量化项
getSourceName(int id)由项目id获得从open_source_projects获得项目名称和项目源
UpdateRank.java:多线程算法给定一表名和表名一列通过这一列对记录排序并更新rank列座位名次rank列名设置更改构造函数即可
Update_match_result_rank更新match_result排名若相同同时存在在多个社区取排名最小的最为项目排名
UpdateTagPosts.java:方法更新tag表中每个tag关联的posts个数postsNum。
方法countPostsNum实现功能

@ -0,0 +1,27 @@
package support;
import java.sql.Connection;
import java.sql.DriverManager;
public class ConnectionSql {
String driver="com.mysql.jdbc.Driver";
String url;
String user="root";
String password="mysql";
public ConnectionSql(String sqlName) {
// TODO Auto-generated constructor stub
url="jdbc:mysql://127.0.0.1:3306/"+sqlName+"?useUnicode=true&amp;charsetEncoding=utf8&useCursorFetch=true&defaultFetchSize=100";
// url="jdbc:mysql://127.0.0.1:3306/"+sqlName;
}
public Connection connection(){
Connection conn=null;
try{
Class.forName(driver);
conn=DriverManager.getConnection(url, user, password);
}catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return conn;
}
}

@ -0,0 +1,29 @@
package support;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class DataHandle {
/**
*
* @param śÔdataÁĐąí˝řĐĐFrobeniusˇśĘýšéŇťťŻ
* @return
*/
public static List<Double> frobenius(List<Double> data) {
Double sum_square=0d;
Iterator<Double> iterator=data.iterator();
while(iterator.hasNext()){
double datai=iterator.next();
sum_square+=datai*datai;
}
double frob_num= Math.pow(sum_square, 0.5);
List<Double> frob_list=new ArrayList<Double>();
iterator=data.listIterator();
while(iterator.hasNext()){
frob_list.add(iterator.next()/frob_num);
}
return frob_list;
}
}

@ -0,0 +1,126 @@
package support;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/**
*
* @author LiLy
*
*/
public class DateHandle {
static String levDate[]; //
public DateHandle() {
// TODO Auto-generated constructor stub
}
public DateHandle(String refDate,int n) {
// TODO Auto-generated constructor stub
this.levDate=new String[n+1];
Date date=stringToDate(refDate);
Calendar c = Calendar.getInstance();
c.setTime(date);
for(int i=1;i<=n;i++)
{
date=c.getTime();
this.levDate[i]=DateHandle.FormatDate(date);
c.add(Calendar.MONTH, -1);
}
}
/**
* date1date2date1>date2date1date210-1
* @return
*/
public static int compareDate(Date date1,Date date2) {
try {
if (date1.getTime() > date2.getTime()) {
return 1;
} else if (date1.getTime() < date2.getTime()) {
return -1;
} else {
return 0;
}
} catch (Exception exception) {
exception.printStackTrace();
}
return 0;
}
/**
* Stringsyyyy-mm-dd HH:MM:SSDate
*
* @param strings
* @return
*/
public static boolean canFormatToDate(String string) {
String dateFormat="yyyy-MM-dd HH:mm:ss";
SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
if (string.isEmpty())
return false;
try {
sdf.parse(string);
} catch (ParseException e) {
return false;
}
return true;
}
/**
* dateyyyy-mm-dd HH:MM:SSDate
*
* @param strings
* @return
*/
public static String FormatDate(Date date) {
String dateFormat="yyyy-MM-dd HH:mm:ss";
SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
String formatDate=sdf.format(date);
return formatDate;
}
/**
* stringDate
* @param string
* @return
*/
public static Date stringToDate(String string){
String dateFormat="yyyy-MM-dd HH:mm:ss";
SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
Date date=null;
try {
date = sdf.parse(string);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return date;
}
public static int levDate(String date) {
if(date==null)
return 0;
for(int i=DateHandle.levDate.length-1;i>0;i--){
if(date.compareTo(DateHandle.levDate[i])>0){
return i;
}
}
return 0;
}
public static void main(String[] args) {
// new DateHandle("2014-02-01 00:00:00", 12);
// if ("2014-02-01 00:00:00".compareTo("2014-02-01 00:00:00.23")==1)
System.out.print("2014-02-01 00:01:00".compareTo("2014-02-01 01:00:00.23"));
}
}

Binary file not shown.

@ -0,0 +1,71 @@
package support;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.net.URL;
import java.net.URLEncoder;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
public class ReadBigFile extends Thread{
String filename;
public ReadBigFile(String filename) {
// TODO Auto-generated constructor stub
this.filename=filename;
}
public void insertSql(List<String> sqls)
{
Connection con=new ConnectionSql("ossean_db").connection();
try{
con.setAutoCommit(false);
Statement st=con.createStatement();
for(String s:sqls)
{
st.addBatch(s);
}
st.executeBatch();
st.close();
con.commit();
con.close();
}catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public void readFile() {
File file = new File(filename);
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file), 20 * 1024 * 1024);
String tempString = null;
int count = 1;
List<String> sqls=new ArrayList<String>();
while ((tempString = reader.readLine()) != null) {
// tempString=URLEncoder.encode(tempString, "utf-8");
sqls.add(tempString);
if(count++%1000==0){
insertSql(sqls);
sqls.clear();
System.out.println(count);
}
}
insertSql(sqls);
reader.close();
} catch (Exception e) {
// TODO: handle exception
}
}
public void run() {
readFile();
}
public static void main(String[] args) {
String path="C:\\Users\\lili\\Desktop\\sql\\";
new ReadBigFile(path+"oschina_project.sql").run();
}
}

@ -0,0 +1,91 @@
package support;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.*;
import java.lang.Math;
public class Similarity extends Thread{
public Double cla_EX(List<Double> data) {
int len = data.size();
Double ex = 0d;
Iterator<Double> iterator = data.iterator();
Double sum = 0d;
while (iterator.hasNext()) {
sum += iterator.next();
}
return ex = sum / len;
}
public Double cla_sim(List<Double> data1, List<Double> data2) {
int len = data1.size();
List<Double> data3 = new ArrayList<Double>();
List<Double> data4 = new ArrayList<Double>();
List<Double> data5 = new ArrayList<Double>();
Iterator<Double> iterator1 = data1.iterator();
Iterator<Double> iterator2 = data2.iterator();
while (iterator1.hasNext()) {
Double temp = iterator1.next();
data3.add(temp * temp);
}
while (iterator2.hasNext()) {
Double temp = iterator2.next();
data4.add(temp * temp);
}
iterator1 = data1.iterator();
iterator2 = data2.iterator();
while (iterator1.hasNext()) {
data5.add(iterator1.next() * iterator2.next());
}
Double E_data1 = cla_EX(data1);
Double E_data2 = cla_EX(data2);
Double E_data3 = cla_EX(data3);
Double E_data4 = cla_EX(data4);
Double E_data5 = cla_EX(data5);
return (E_data5 - E_data1 * E_data2)
/ (Math.pow(E_data3 - E_data1 * E_data1, 0.5) * Math.pow(
E_data4 - E_data2 * E_data2, 0.5));
}
public void operaterSql(String dataBaseName,String tablename,String column1,String column2) {
Connection conn=new ConnectionSql(dataBaseName).connection();
try {
Statement st=conn.createStatement();
String sql="select "+column1+" from "+tablename;
ResultSet rs=st.executeQuery(sql);
List<Double> data1=new ArrayList<Double>();
List<Double> data2=new ArrayList<Double>();
while (rs.next()) {
double temp=rs.getDouble(1);
data1.add(temp);
}
sql="select "+column2+" from "+tablename;
rs=st.executeQuery(sql);
while (rs.next()) {
double temp=rs.getDouble(1);
data2.add(temp);
}
Double similarity=cla_sim(data1,data2);
System.out.println(similarity);
tablename="'"+tablename+"'";
column1="'"+column1+"'";
column2="'"+column2+"'";
sql="replace into similary_record(table_name,column1,column2,similarity) values ("+tablename+","+column1+","+column2+","+similarity+")";
st.execute(sql);
conn.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void main(String[] args) {
Similarity similarity=new Similarity();
// similarity.operaterSql("ossean_db","sourceforge_project","stars","downloadCount");
// similarity.operaterSql("ossean_db","match_simple","db_score","postsCount");
// similarity.operaterSql("ossean_db","match_simple","db_score","score");
// similarity.operaterSql("ossean_db","match_simple","db_score","favoriteCount");
// similarity.operaterSql("ossean_db","match_simple","db_score","viewCount");
// similarity.operaterSql("ossean_db","match_simple","db_score","answerCount");
similarity.operaterSql("ossean_db","match_simple","postsCount","viewCount");
}
}

@ -0,0 +1,5 @@
package support;
import java.util.HashMap;
import java.util.Map;

@ -0,0 +1,101 @@
package update;
import support.ConnectionSql;
import java.sql.*;
import java.util.*;
import java.util.Date;
public class StatisticsPostsCount {
String tablename="match_simple";
String column1="score";
String column2="favoriteCount";
String column3="viewCount";
String column4="answerCount";
String column5="postsCount";
String proj_name="proj_name";
String tag="tag";
String posts_split_tags="posts_split_tags";
String posts_id="posts_id";
String posts="posts";
String id="id";
String postTypeId="postTypeId";
Connection conn=new ConnectionSql("ossean_db").connection();
public StatisticsPostsCount() {
// TODO Auto-generated constructor stub
}
public StatisticsPostsCount(String tablename,String coulumn1,String coulumn2,String coulumn3,String coulumn4,String coulumn5) {
// TODO Auto-generated constructor stub
this.tablename=tablename;
this.column1=coulumn1;
this.column2=coulumn2;
this.column3=coulumn3;
this.column4=coulumn4;
this.column5=coulumn5;
}
/**
* :(psotsCount,answerCount,score,favoriteCount,viewCount)
* match_simplesplit_posts_tagsposts_idpostscount
*/
public boolean cal_count() {
try {
conn.setAutoCommit(false);
Statement st1=conn.createStatement();
Statement st2;
Statement st3;
String sql="select "+proj_name+","+tag+" from "+tablename;
ResultSet rs1=st1.executeQuery(sql);
ResultSet rs2;
ResultSet rs3;
System.out.print("start\t\t\t");System.out.println(new Date());
while(rs1.next()){
String temp_proj_name="'"+rs1.getString(proj_name)+"'";
String temp_tag="'"+rs1.getString(tag)+"'";
st2=conn.createStatement();
sql="select "+posts_id+" from "+posts_split_tags+" where "+tag+" = "+temp_tag;
rs2=st2.executeQuery(sql); //查询post_split_tags得到posts_id
int data1=0,data2=0,data3=0,data4=0,data5=0;
while(rs2.next()){
int temp_posts_id=rs2.getInt(posts_id);
st3=conn.createStatement();
sql="select "+column1+","+column2+","+column3+","+column4+" from "+posts+" where "+id+" = "+temp_posts_id;
rs3=st3.executeQuery(sql);
while(rs3.next()){
data1+=rs3.getInt(column1);
data2+=rs3.getInt(column2);
data3+=rs3.getInt(column3);
data4+=rs3.getInt(column4);
data5++;
}
rs3.close();
st3.close();
}
rs2.close();
sql="update "+tablename+" set "+column1+"="+data1+","+column2+"="+data2+","+column3+"="+data3+","+column4+"="+data4+","+column5+"="+data5+" where "+proj_name+" = "+temp_proj_name;
st2.execute(sql);
st2.close();
System.out.print(temp_proj_name+"\t\t");
System.out.println(new Date());
}
rs1.close();
st1.close();
conn.commit();
conn.close();
System.out.println("down");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
return false;
}
return true;
}
public static void main(String[] args) {
StatisticsPostsCount statisticsPostsCount=new StatisticsPostsCount();
statisticsPostsCount.cal_count();
}
}

@ -0,0 +1,214 @@
package update;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
import java.util.List;
import support.ConnectionSql;
/**
* rankrank
*/
public class UpdateRank extends Thread{
String tablename;
String columnname;
String columnname2;
String id="id";
String rank="rank";
// String rank="rank_";
public UpdateRank(String tablename,String columnname) {
// TODO Auto-generated constructor stub
this.tablename=tablename;
this.columnname=columnname;
this.rank=this.rank+this.columnname;
this.rank=this.columnname; //用本列更新本列排名
}
public UpdateRank(String tablename,String columnname,String rankString) {
// TODO Auto-generated constructor stub
this.tablename=tablename;
this.columnname=columnname;
this.rank=rankString;
}
public void operateSql() {
Connection conn = new ConnectionSql("ossean_db").connection();
try {
conn.setAutoCommit(false);
Statement st=conn.createStatement();
String sql1="select "+id+" from "+tablename+" order by "+columnname+" DESC";//数据越大排名越小
// String sql1="select "+id+" from "+tablename+" order by "+columnname;//数据越大,排名越大
ResultSet rs=st.executeQuery(sql1);//查询需要参考的数据排名结果
String updataSql="update "+tablename+" set "+rank+"=? where "+id+"=?";
// String updataSql="update "+tablename+" set stars=?";
PreparedStatement ps=conn.prepareStatement(updataSql);
int rank=1;
System.out.println("start!");
while (rs.next()) {
int num=rs.getInt(1);
ps.setInt(1, rank);
ps.setInt(2, num);
ps.addBatch();//添加更新rank语句
if(rank++%10000==0)
{
System.out.println(rank);
ps.executeBatch();
ps.clearBatch();
conn.commit();
}
}
ps.executeBatch();
ps.clearBatch();
conn.commit();
/*stars
if(columnname2!=null)
{
sql1="select "+columnname2+" from "+tablename+" order by "+columnname2;
rs=st.executeQuery(sql1);
updataSql="update "+tablename+" set stars=? where "+columnname2+"=?";
ps=conn.prepareStatement(updataSql);
rank=1;
while (rs.next()) {
int num=rs.getInt(1);
sql1="select stars from "+tablename+" where "+columnname2+"="+num;
Statement st2=conn.createStatement();
ResultSet rs2=st2.executeQuery(sql1);
int star1=0;
if(rs.next()){star1=rs2.getInt(1);}
ps.setInt(1, (rank++/count+star1)/2);
ps.setInt(2, num);
ps.addBatch();
if(rank%1000==0)
{
ps.execute();
conn.commit();
}
}
ps.execute();
conn.commit();
}
*/
conn.close();
System.out.print(tablename+" down!");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public void run() {
operateSql();
}
public static void main(String[] args) {
// UpdateRank updateRank1=new UpdateRank("tag","postsNum");
// updateRank1.run();
// UpdateRank updateRank2=new UpdateRank("oschina_project","usedNum");
// updateRank2.run();
// UpdateRank updateRank3=new UpdateRank("openhub_project","useCount");
// updateRank3.run();
// UpdateRank updateRank4=new UpdateRank("sourceforge_project","downloadCount");
// updateRank4.run();
/*
UpdateRank updatematch_simpleRank1=new UpdateRank("match_simple","postsCount");
updatematch_simpleRank1.run();
UpdateRank updatematch_simpleRank2=new UpdateRank("match_simple","score");
updatematch_simpleRank2.run();
UpdateRank updatematch_simpleRank3=new UpdateRank("match_simple","favoriteCount");
updatematch_simpleRank3.run();
UpdateRank updatematch_simpleRank4=new UpdateRank("match_simple","viewCount");
updatematch_simpleRank4.run();
UpdateRank updatematch_simpleRank5=new UpdateRank("match_simple","answerCount");
updatematch_simpleRank5.run();
*/
// UpdateRank updaterank_opensource_DB_engine=new UpdateRank("rank_open_source_DB_engine","score");
// updaterank_opensource_DB_engine.run();
// Update_match_result_rank umr_rank=new Update_match_result_rank();
// System.out.println(umr_rank.set_rank());
new UpdateRank("rank_open_source_db_engine", "rank_last_month").run();;
}
}
class Update_match_result_rank{ //更新match_result中proj_rank若相同同时存在在多个社区取排名最小的最为项目排名
Connection connection;
public Update_match_result_rank() {
// TODO Auto-generated method stub
connection = new ConnectionSql("ossean_db").connection();
}
int getMinRank(String[] onlySource,String name){
int rank=0;
name=name.replace('\'', '\"');
name="'"+name+"'";
int len=onlySource.length;
for(int i=0;i<len;i++)
{
String sql="select "+V.rank+" from "+onlySource[i]+" where "+V.proj_nameMap.get(onlySource[i])+"="+name;
try {
Statement st=connection.createStatement();
ResultSet rs=st.executeQuery(sql);
if(rs.next()){
int rank2=rs.getInt(1);
if(rank2<rank||rank==0){rank=rank2;}
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
// System.out.println(sql);
}
}
return rank;
}
public boolean set_rank() {
try {
connection.setAutoCommit(false);
UpdateMatch_result umr=new UpdateMatch_result(connection);
Statement st1=connection.createStatement();
String sql1="select distinct("+V.proj_id+") from "+V.match_result;
ResultSet rs=st1.executeQuery(sql1);
String sql2="update "+V.match_result+" set "+V.proj_rank+"=? where "+V.proj_id+"=?";
PreparedStatement ps=connection.prepareStatement(sql2);
int count=1;
int rank=0;
while(rs.next()){
int temp_id=rs.getInt(1);
ps.setInt(2, temp_id);
List<String> Source_name=umr.getSourceName(temp_id);
if(Source_name.isEmpty()){
rank=-1;
}
else if(Source_name.get(0).equals("FreeCode"))
{
rank=-2;
}
else{
String name=Source_name.get(1);
String[] onlySource=umr.sourceFormat(Source_name.get(0));
rank=getMinRank(onlySource, name);
}
ps.setInt(1, rank);
ps.addBatch();
if(count++%1000==0)
{
ps.executeBatch();
ps.clearBatch();
System.out.println(count);
}
}
ps.executeBatch();
ps.clearBatch();
connection.commit();
connection.close();
return true;
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
return false;
}
}
}

@ -0,0 +1,78 @@
package update;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Date;
import support.ConnectionSql;
public class UpdateStar extends Thread{
String tablename;
String columnname="rank";
int levs=6;
public UpdateStar(String tablename) {
// TODO Auto-generated constructor stub
this.tablename=tablename;
}
public UpdateStar(String tablename,String columnname) {
// TODO Auto-generated constructor stub
this.tablename=tablename;
this.columnname=columnname;
}
// public double cal_star(int count,int maxCount) {
// double star=0d;
// double step=(double)maxCount/6;
// star=count/step;
// return star;
// }
public void operateSql() {
Connection conn = new ConnectionSql("ossean_db").connection();
try {
conn.setAutoCommit(false);
String sql1="select max("+columnname+") from "+tablename;
int count=0;
Statement st=conn.createStatement();
ResultSet rs= st.executeQuery(sql1); //查询记录的数,确定排名的数据量
if(rs.next()){count=rs.getInt(1)/levs;} //确定每个级别数据量
sql1="select "+columnname+" from "+tablename+" order by "+columnname;
rs=st.executeQuery(sql1);//查询需要参考的数据排名结果
String updataSql="update "+tablename+" set stars=? where "+columnname+"=?";
// String updataSql="update "+tablename+" set stars=?";
PreparedStatement ps=conn.prepareStatement(updataSql);
System.out.println(tablename+" start!");
while (rs.next()) {
int num=rs.getInt(1);
ps.setInt(1, num/count);
ps.setInt(2, num);
ps.addBatch();//添加更新stars语句
if(num%10000==0)
{
ps.executeBatch();
ps.clearBatch();
}
}
ps.executeBatch();
ps.clearBatch();
conn.commit();
conn.close();
System.out.println(tablename+" down!");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public void run() {
operateSql();
}
public static void main(String[] args) {
UpdateStar updateStar1=new UpdateStar("tag");
// updateStar1.run();
UpdateStar updateStar2=new UpdateStar("oschina_project");
updateStar2.run();
UpdateStar updateStar3=new UpdateStar("openhub_project");
updateStar3.run();
}
}

@ -0,0 +1,53 @@
package update;
import java.sql.Connection;
import java.sql.Statement;
import support.ConnectionSql;
/**
* Á½¸ö±íµÄÁ¬½Ó¸üгÌÐò
* @author LiLy
*
*/
public class UpdateTableColumn {
String table1;
String table2;
String connColumn1;
String connColumn2;
String updateColumn;
String referColumn;
Connection conn;
public UpdateTableColumn(String table1,String table2,String connColumn1,String connColumn2,String updateColumn,String referColumn,Connection conn) {
// TODO Auto-generated constructor stub
this.table1=table1;
this.table2=table2;
this.connColumn1=connColumn1;
this.connColumn2=connColumn2;
this.updateColumn=updateColumn;
this.referColumn=referColumn;
this.conn=conn;
}
public boolean updateMethod () {
String sql="update "+table1+","+table2+" set "+
table1+"."+updateColumn+" = "+table2+"."+referColumn+
" where "+table1+"."+connColumn1+" = "+table2+"."+connColumn2;
try {
Statement st=conn.createStatement();
st.execute(sql);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
return false;
}
return true;
}
public static void main(String[] args) {
Connection conn=new ConnectionSql("ossean_db").connection();
UpdateTableColumn up=new UpdateTableColumn("posts_split_tags", "tag", "tag", "tag", "tag_id", "id", conn);
up.updateMethod();
}
}

@ -0,0 +1,53 @@
package update;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import support.ConnectionSql;
/**
* tagtagpostspostsNum
* @author LiLy
*
*/
public class UpdateTagPostsNum {
public void countPostsNum() throws SQLException{
Connection conn = new ConnectionSql("ossean_db").connection();
try {
conn.setAutoCommit(false);
Statement st=conn.createStatement();
String sqlSelect="select tag from tag";
ResultSet rs=st.executeQuery(sqlSelect);
Statement stm=conn.createStatement();
int flag=0;
while(rs.next())
{
String tag=rs.getString("tag");
tag="'"+tag+"'";
sqlSelect="select count(tag) from posts_split_tags where tag="+tag;
ResultSet rscount=stm.executeQuery(sqlSelect);
if(rscount.next())
{
String sql="update tag set tag.postsNum="+rscount.getInt(1)+" where tag.tag= "+tag;
stm.execute(sql);
}
if(flag++%1000==0){System.out.println(flag);}
}
System.out.println("Down!");
conn.commit();
conn.close();
} catch (Exception e) {
// TODO: handle exception
conn.rollback();
e.printStackTrace();
}
}
public static void main(String[] args) throws SQLException{
UpdateTagPostsNum updateTagPosts=new UpdateTagPostsNum();
updateTagPosts.countPostsNum();
}
}

@ -0,0 +1,179 @@
package update;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import support.ConnectionSql;
/**
*
*/
class V {
public static String match_result = "match_result";
public static String sourceTable = "open_source_projects";
public static String onlySourceTable1 = "openhub_projects";
public static String onlySourceTable2 = "oschina_projects";
public static String onlySourceTable3 = "sourceforge_projects";
public static String proj_star = "proj_star";
public static String proj_rank = "proj_rank";
public static String proj_id = "proj_id";
public static String rank = "rank";
public static String stars = "stars";
public static String source = "source";
public static Map<String, String> proj_nameMap = new HashMap<String, String>() {
{
put("SourceForge_project", "name");
put("OpenHub_project", "name");
put("OSChina_project", "projectShortName");
put("open_source_projects", "name");
}
};
}
/**
* match_resultrankstars
* sourceFormat(String source)sourceString[]FreeCode
* getSourceName(int id)idopen_source_projects
*/
class UpdateMatch_result {
Connection connection;
public UpdateMatch_result(Connection connection) {
// TODO Auto-generated constructor stub
this.connection = connection;
}
String[] sourceFormat(String source) {
String[] sources = source.split("FreeCode")[0].split(",");
List<String> formatSource = new ArrayList<String>();
;
for (int i = 0; i < sources.length; i++) {
if (!sources[i].equals("")) {
formatSource.add(sources[i] + "_project");
}
}
return (String[]) formatSource.toArray(new String[0]);
}
List<String> getSourceName(int id) {
List<String> source_name = new ArrayList<String>();
String sql = "select " + V.source + ","
+ V.proj_nameMap.get(V.sourceTable) + " from " + V.sourceTable
+ " where id=" + id + " and source!='FreeCode'";
try {
Statement st = connection.createStatement();
ResultSet rs = st.executeQuery(sql);
if (rs.next()) {
source_name.add(rs.getString(1));
source_name.add(rs.getString(2));
}
st.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
System.out.println(id);
}
return source_name;
}
}
public class Update_match_result_proj_star {
Connection connection;
public Update_match_result_proj_star(String database) {
// TODO Auto-generated constructor stub
connection = new ConnectionSql(database).connection();
}
// List<String> getSource()
double getAVGStar(String[] onlySource, String name) {
double star = 0;
name = name.replace('\'', '\"');
name = "'" + name + "'";
int len = onlySource.length;
for (int i = 0; i < len; i++) {
String sql = "select " + V.stars + " from " + onlySource[i]
+ " where " + V.proj_nameMap.get(onlySource[i]) + "="
+ name;
try {
Statement st = connection.createStatement();
ResultSet rs = st.executeQuery(sql);
if (rs.next()) {
star += rs.getInt(1);
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
return star / len;
}
public void setProj_star() {
try {
connection.setAutoCommit(false);
Statement st1 = connection.createStatement();
UpdateMatch_result umr = new UpdateMatch_result(connection);
String sql1 = "select distinct(" + V.proj_id + ") from "
+ V.match_result;
ResultSet rs = st1.executeQuery(sql1);
String sql2 = "update " + V.match_result + " set " + V.proj_star
+ "=? where " + V.proj_id + "=?";
PreparedStatement ps = connection.prepareStatement(sql2);
int count = 1;
double star = 0;
while (rs.next()) {
int temp_id = rs.getInt(1);
ps.setInt(2, temp_id);
List<String> Source_name = umr.getSourceName(temp_id);
if (Source_name.isEmpty()) {
star = -1;
} else if (Source_name.get(0).equals("FreeCode")) {
star = -2;
} else {
String name = Source_name.get(1);
String[] onlySource = umr.sourceFormat(Source_name.get(0));
star = getAVGStar(onlySource, name);
ps.setDouble(1, star);
}
ps.addBatch();
if (count++ % 1000 == 0) {
ps.executeBatch();
ps.clearBatch();
System.out.println(count);
}
}
ps.executeBatch();
ps.clearBatch();
connection.commit();
connection.close();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void main(String[] args) {
Update_match_result_proj_star u_proj_star = new Update_match_result_proj_star(
"ossean_db");
u_proj_star.setProj_star();
System.out.print("Down!");
}
}
Loading…
Cancel
Save