commit 8f3f24828cd1f1f4270ffe123f5a96aced036921 Author: unknown <1056102095@qq.com> Date: Wed Apr 29 15:24:21 2015 +0800 first commit diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..33044e9 --- /dev/null +++ b/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/.project b/.project new file mode 100644 index 0000000..3c6aebd --- /dev/null +++ b/.project @@ -0,0 +1,17 @@ + + + Ossean + + + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/bin/CalculateNDCG.class b/bin/CalculateNDCG.class new file mode 100644 index 0000000..3352795 Binary files /dev/null and b/bin/CalculateNDCG.class differ diff --git a/bin/ControlFormat.class b/bin/ControlFormat.class new file mode 100644 index 0000000..a35e659 Binary files /dev/null and b/bin/ControlFormat.class differ diff --git a/bin/Fields.class b/bin/Fields.class new file mode 100644 index 0000000..8c24a97 Binary files /dev/null and b/bin/Fields.class differ diff --git a/bin/InsertStrings.class b/bin/InsertStrings.class new file mode 100644 index 0000000..60abf6d Binary files /dev/null and b/bin/InsertStrings.class differ diff --git a/bin/Sort/FirstSort.class b/bin/Sort/FirstSort.class new file mode 100644 index 0000000..1f5024e Binary files /dev/null and b/bin/Sort/FirstSort.class differ diff --git a/bin/Sort/OperateData.class b/bin/Sort/OperateData.class new file mode 100644 index 0000000..be6cab8 Binary files /dev/null and b/bin/Sort/OperateData.class differ diff --git a/bin/Sort/PostsHot.class b/bin/Sort/PostsHot.class new file mode 100644 index 0000000..f6e6558 Binary files /dev/null and b/bin/Sort/PostsHot.class differ diff --git a/bin/Sort/SortWithNewpost.class b/bin/Sort/SortWithNewpost.class new file mode 100644 index 0000000..99a0bd3 Binary files /dev/null and b/bin/Sort/SortWithNewpost.class differ diff --git a/bin/Sort/SortWithPostsAndView.class b/bin/Sort/SortWithPostsAndView.class new file mode 100644 index 0000000..bc8199f Binary files /dev/null and b/bin/Sort/SortWithPostsAndView.class differ diff --git a/bin/Sort/SortWithTimeModel.class b/bin/Sort/SortWithTimeModel.class new file mode 100644 index 0000000..08c47b7 Binary files /dev/null and b/bin/Sort/SortWithTimeModel.class differ diff --git a/bin/SplitHugeFile.class b/bin/SplitHugeFile.class new file mode 100644 index 0000000..79be610 Binary files /dev/null and b/bin/SplitHugeFile.class differ diff --git a/bin/SplitTags.class b/bin/SplitTags.class new file mode 100644 index 0000000..5a0c7aa Binary files /dev/null and b/bin/SplitTags.class differ diff --git a/bin/StrToDate.class b/bin/StrToDate.class new file mode 100644 index 0000000..4b1347a Binary files /dev/null and b/bin/StrToDate.class differ diff --git a/bin/Test.class b/bin/Test.class new file mode 100644 index 0000000..bd6ef80 Binary files /dev/null and b/bin/Test.class differ diff --git a/bin/specification b/bin/specification new file mode 100644 index 0000000..ce1022b --- /dev/null +++ b/bin/specification @@ -0,0 +1,16 @@ +StatisticsPostsCount.java:统计每个项目对应的帖子信息(对应的帖子数,帖子的总回复数,获赞数,收藏数,浏览数:(psotsCount,answerCount,score,favoriteCount,viewCount)), + 到并更新原表的对应项中。 + 主要方法:cal_count():从match_simple读出项目和匹配的标签,根据标签在split_posts_tags表找出posts_id,统计每个posts的count。 + +Update_match_result_proj_star.java: 更新match_result中的proj_stars,用于计算项目在托管社区的热度平均值。 + 主要类:Update_match_result_proj_star: + 主要方法: setProj_star() :操作update语句更新结果 + getAVGStar(String[] onlySource, String name):返回onlySource表中,name项目的stars的平均值 + 类:V:定义一些用到的常量 + 类:UpdateMatch_result:定义一些方法 + 方法:sourceFormat(String source):由source还原项目的来源表名,返回一个该项目来源的String[]还原每个项目来源表,并去掉FreeCode,因为该网站项目数据没有可量化项 + getSourceName(int id):由项目id获得从open_source_projects获得项目名称和项目源 +UpdateRank.java:多线程算法,给定一表名和表名一列,通过这一列对记录排序并更新rank列座位名次,rank列名设置更改构造函数即可 + 类:Update_match_result_rank:更新match_result排名,若相同同时存在在多个社区,取排名最小的最为项目排名 +UpdateTagPosts.java:方法更新tag表中每个tag关联的posts个数postsNum。 + 方法:countPostsNum()实现功能 \ No newline at end of file diff --git a/bin/support/ConnectionSql.class b/bin/support/ConnectionSql.class new file mode 100644 index 0000000..8c1e5c5 Binary files /dev/null and b/bin/support/ConnectionSql.class differ diff --git a/bin/support/DataHandle.class b/bin/support/DataHandle.class new file mode 100644 index 0000000..af17502 Binary files /dev/null and b/bin/support/DataHandle.class differ diff --git a/bin/support/DateHandle.class b/bin/support/DateHandle.class new file mode 100644 index 0000000..826f74c Binary files /dev/null and b/bin/support/DateHandle.class differ diff --git a/bin/support/ReadBigFile.class b/bin/support/ReadBigFile.class new file mode 100644 index 0000000..9c63338 Binary files /dev/null and b/bin/support/ReadBigFile.class differ diff --git a/bin/support/Similarity.class b/bin/support/Similarity.class new file mode 100644 index 0000000..6adcc41 Binary files /dev/null and b/bin/support/Similarity.class differ diff --git a/bin/update/StatisticsPostsCount.class b/bin/update/StatisticsPostsCount.class new file mode 100644 index 0000000..0f89fe4 Binary files /dev/null and b/bin/update/StatisticsPostsCount.class differ diff --git a/bin/update/UpdateMatch_result.class b/bin/update/UpdateMatch_result.class new file mode 100644 index 0000000..17e3660 Binary files /dev/null and b/bin/update/UpdateMatch_result.class differ diff --git a/bin/update/UpdateRank.class b/bin/update/UpdateRank.class new file mode 100644 index 0000000..c3e7e8c Binary files /dev/null and b/bin/update/UpdateRank.class differ diff --git a/bin/update/UpdateStar.class b/bin/update/UpdateStar.class new file mode 100644 index 0000000..6027f84 Binary files /dev/null and b/bin/update/UpdateStar.class differ diff --git a/bin/update/UpdateTableColumn.class b/bin/update/UpdateTableColumn.class new file mode 100644 index 0000000..8a98b4e Binary files /dev/null and b/bin/update/UpdateTableColumn.class differ diff --git a/bin/update/UpdateTagPostsNum.class b/bin/update/UpdateTagPostsNum.class new file mode 100644 index 0000000..d41fd80 Binary files /dev/null and b/bin/update/UpdateTagPostsNum.class differ diff --git a/bin/update/Update_match_result_proj_star.class b/bin/update/Update_match_result_proj_star.class new file mode 100644 index 0000000..76acc5f Binary files /dev/null and b/bin/update/Update_match_result_proj_star.class differ diff --git a/bin/update/Update_match_result_rank.class b/bin/update/Update_match_result_rank.class new file mode 100644 index 0000000..e198a06 Binary files /dev/null and b/bin/update/Update_match_result_rank.class differ diff --git a/bin/update/V$1.class b/bin/update/V$1.class new file mode 100644 index 0000000..17bdcef Binary files /dev/null and b/bin/update/V$1.class differ diff --git a/bin/update/V.class b/bin/update/V.class new file mode 100644 index 0000000..9da2328 Binary files /dev/null and b/bin/update/V.class differ diff --git a/src/CalculateNDCG.java b/src/CalculateNDCG.java new file mode 100644 index 0000000..d4480a0 --- /dev/null +++ b/src/CalculateNDCG.java @@ -0,0 +1,48 @@ +import java.math.*; +public class CalculateNDCG { + public double cal_NDCG(String[] realSort,String[] ideaSort,int r){ + double nDCG=0.0; + double DCG=0.0; + double iDCG=0.0; + int[] gain_score=gain(realSort, ideaSort, r); + for(int i=0;i 1000) { + ps.executeBatch(); + ps.clearBatch(); + count = 0; + } + } + ps.executeBatch(); + ps.clearBatch(); + ps.close(); + conn.close(); + + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + +} +} diff --git a/src/Sort/FirstSort.java b/src/Sort/FirstSort.java new file mode 100644 index 0000000..763d2ae --- /dev/null +++ b/src/Sort/FirstSort.java @@ -0,0 +1,105 @@ +package Sort; + +import java.sql.*; +import java.util.*; + +import support.ConnectionSql; +import update.UpdateRank; +/** + * 用帖子的各个参数排序(postsCount,viewCount,score,favoriteCount)排序的结果 + * @author LiLy + * + */ +class OperateData{ + Connection conn; + String tablename="match_simple"; + String proj_name; + int postsCount; + int score; + int viewCount; + int answerCount; + int favoriteCount; + OperateData(){ + + } + public OperateData(String proj_name) { + // TODO Auto-generated constructor stub + conn=new ConnectionSql("ossean_db").connection(); + this.proj_name="'"+proj_name+"'"; + try { + Statement st=conn.createStatement(); + String sql="select postsCount,score,viewCount,answerCount,favoriteCount from "+tablename+" where proj_name="+this.proj_name; + ResultSet rs=st.executeQuery(sql); + if(rs.next()){ + postsCount=rs.getInt(1); + score=rs.getInt(2); + viewCount=rs.getInt(3); + answerCount=rs.getInt(4); + favoriteCount=rs.getInt(5); + } + st.close(); + } catch (Exception e) { + // TODO: handle exception + + e.printStackTrace(); + } + } + public boolean updateData(String column_allCount,int allCount) { + String sql="update "+tablename+" set "+column_allCount+" = "+allCount+" where proj_name="+proj_name; + try { + Statement st=conn.createStatement(); + st.execute(sql); + st.close(); + + return true; + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + return false; + } + + } + +} +public class FirstSort { + public List getAll_proj_names(String tablename) { + List proj_names=new ArrayList(); + Connection conn=new ConnectionSql("ossean_db").connection(); + try { + Statement st=conn.createStatement(); + String sql="select proj_name from "+tablename; + ResultSet rs=st.executeQuery(sql); + while(rs.next()){ + proj_names.add(rs.getString(1)); + } + st.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + + return proj_names; + + } + public void allCountSort() throws SQLException { + int allCount=0; + List proj_names=getAll_proj_names("match_simple"); + Iterator iterator=proj_names.iterator(); + while(iterator.hasNext()){ + String proj_name=iterator.next(); + OperateData opData=new OperateData(proj_name); + allCount=opData.answerCount+opData.postsCount+opData.score+opData.favoriteCount+opData.viewCount; + opData.updateData("allCount",allCount); + } + UpdateRank updateRank=new UpdateRank("match_simple", "allCount"); + updateRank.operateSql(); + } + public static void main(String[] args) { + try { + new FirstSort().allCountSort(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } +} diff --git a/src/Sort/PostsHot.java b/src/Sort/PostsHot.java new file mode 100644 index 0000000..827051a --- /dev/null +++ b/src/Sort/PostsHot.java @@ -0,0 +1,17 @@ +package Sort; +/** + * 通过贴子的浏览量和活跃度统计帖子的热度 + * @author LiLy + * + */ +public class PostsHot { + double w1=0d; + double w2=0d; + + public PostsHot(double w1,double w2) { + // TODO Auto-generated constructor stub + } +public double cla_postHot(int viewCount,String lastActivityDate){ + return 0d; +} +} diff --git a/src/Sort/SortWithNewpost.java b/src/Sort/SortWithNewpost.java new file mode 100644 index 0000000..d1e5e04 --- /dev/null +++ b/src/Sort/SortWithNewpost.java @@ -0,0 +1,215 @@ +package Sort; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import com.sun.corba.se.impl.encoding.OSFCodeSetRegistry.Entry; + +import support.ConnectionSql; +import support.DateHandle; +import update.UpdateRank; +/** + * 根据关联的最近一段时间活跃帖子数排序,时间控制用传入类的参数 + * @author LiLy + * + */ +public class SortWithNewpost { + String column="newPostsCount"; + String tablename="simple_test_newPostsCount"; + Connection conn=new ConnectionSql("ossean_db").connection(); + String date; +public SortWithNewpost(String date) { + // TODO Auto-generated constructor stub + this.date=date; +} + +public static void main(String[] args) { + SortWithNewpost sort=new SortWithNewpost("2014-08-31 00:00:00"); + sort.start(); +} + +/** + * 启动程序,更新newPostCount字段和Rank_newPostsCount字段 + */ +public void start() { + Map count=getnewPostsCount(); + updateTable(conn, tablename, "newPostsCount", "DBMS", count); + UpdateRank updateRank=new UpdateRank(tablename, "newPostsCount","rank_newPostsCount"); + updateRank.run(); +} + +/** + * 根据Map中条件更新表 + * @param conn + * @param tablename + * @param updatecolumn + * @param conditionColumn + * @param count + */ +public void updateTable(Connection conn,String tablename,String updatecolumn,String conditionColumn,Map count) { + String sql="update "+tablename+" set "+updatecolumn+"="+"? where "+conditionColumn+" = ?"; + try { + conn.setAutoCommit(false); + PreparedStatement ps=conn.prepareStatement(sql); + int flag=1; + Iterator> iterator=count.entrySet().iterator(); + System.out.println("start!"); + while(iterator.hasNext()){ + Map.Entry entry=iterator.next(); + String key=entry.getKey(); + int value=(int)entry.getValue(); + ps.setInt(1, value); + ps.setString(2, key); + + ps.addBatch(); + if(flag++%1000==0){ + ps.executeBatch(); + ps.clearBatch(); + System.out.println("update: "+flag); + } + } + ps.executeBatch(); + ps.clearBatch(); + ps.close(); + conn.commit(); + conn.close(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } +} + +/** + * 得到tablename的项目名称对应的最近一段时间的帖子数 + * @return + */ + + +public Map getnewPostsCount() { + Map count=new HashMap(); + try { + conn.setAutoCommit(false); + ResultSet rs1; + ResultSet rs2; + ResultSet rs3; + Statement st1=conn.createStatement(); + Statement st2=conn.createStatement(); + Statement st3=conn.createStatement(); + String projName; + int tag_id; + String sql; + int tempCount=0; + + /** + * 创建临时表保存需要的时间范围内的帖子 + */ + + sql="create temporary table temp select tag_id,lastActivityDate from posts_split_tags where lastActivityDate > '"+date+"'"; + st1.execute(sql); + sql="alter table temp add index index_tag_id (tag_id)"; + st1.execute(sql); + conn.commit(); + System.out.println("temp table create completement!"); + + sql="select DBMS from simple_test_newpostscount"; + rs1=st1.executeQuery(sql); + while(rs1.next()){ + tempCount=0; + projName=rs1.getString(1); + sql="select tag_id from simple_test_newpostscount_match where DBMS= '"+projName+"'"; + rs3=st3.executeQuery(sql); + while(rs3.next()) + { + tag_id=rs3.getInt(1); + sql="select count(*) from temp where tag_id = "+tag_id; + rs2=st2.executeQuery(sql); + + while(rs2.next()){ + tempCount+=rs2.getInt(1); + } + } + count.put(projName, tempCount); + System.out.println(projName); + } + st2.close(); + st1.close(); + + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return count; + } + + + +/** + * 根据表名和限制条件(没有选择全部)选择对应的列返回结果到ResultSet + * @param conn + * @param tablename + * @param column + * @param conditionColumn + * @param condition + * @return + * @throws SQLException + */ +public ResultSet getData(Connection conn,String tablename,String column,String conditionColumn,String condition){ + String sql; + ResultSet rs=null; + if(!conditionColumn.isEmpty()) + { + sql="select "+column+" from "+tablename+" where "+conditionColumn+"="+"'"+condition+"'"; + } + else { + sql="select "+column+" from "+tablename; + } + try { + Statement st=conn.createStatement(); + rs=st.executeQuery(sql); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + +// st.close(); + return rs; +} +/** + * 根据表名和限制条件(没有选择全部)选择对应的列返回结果到ResultSet + * @param conn + * @param tablename + * @param column + * @param conditionColumn + * @param condition + * @return + * @throws SQLException + */ +public ResultSet getData(Connection conn,String tablename,String column,String conditionColumn,int condition) { + String sql; + ResultSet rs=null; + if(!conditionColumn.isEmpty()) + { + sql="select "+column+" from "+tablename+" where "+conditionColumn+"="+condition; + } + else { + sql="select "+column+" from "+tablename; + } + try { + Statement st=conn.createStatement(); + rs=st.executeQuery(sql); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return rs; +} +} diff --git a/src/Sort/SortWithPostsAndView.java b/src/Sort/SortWithPostsAndView.java new file mode 100644 index 0000000..45a076e --- /dev/null +++ b/src/Sort/SortWithPostsAndView.java @@ -0,0 +1,12 @@ +package Sort; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class SortWithPostsAndView { +// public List name() { +// +// } + +} diff --git a/src/Sort/SortWithTimeModel.java b/src/Sort/SortWithTimeModel.java new file mode 100644 index 0000000..3901d27 --- /dev/null +++ b/src/Sort/SortWithTimeModel.java @@ -0,0 +1,263 @@ +package Sort; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import support.ConnectionSql; +import support.DateHandle; +import update.UpdateRank; + +public class SortWithTimeModel { + String column = "newPostsCount"; + String tablename = "simple_test_newPostsCount"; + Connection conn = new ConnectionSql("ossean_db").connection(); + String date; + int n; + DateHandle dateHandle; + + public SortWithTimeModel(String date, int n) { + // TODO Auto-generated constructor stub + this.date = date; + this.n = n; + dateHandle = new DateHandle(date, n); + } + + public static void main(String[] args) { + SortWithTimeModel sort = new SortWithTimeModel("2014-09-31 00:00:00", + 12); + sort.start(); + } + + /** + * 启动程序,更新newPostCount字段和Rank_newPostsCount字段 + */ + public void start() { + Map count = getnewPostsCountTime(); + updateTable(conn, tablename, "postsCountTime", "DBMS", count); + UpdateRank updateRank = new UpdateRank(tablename, "postsCountTime", + "rank_PostsCountTime"); + updateRank.run(); + } + + /** + * 根据Map中条件更新表 + * + * @param conn + * @param tablename + * @param updatecolumn + * @param conditionColumn + * @param count + */ + public void updateTable(Connection conn, String tablename, + String updatecolumn, String conditionColumn, + Map count) { + String sql = "update " + tablename + " set " + updatecolumn + "=" + + "? where " + conditionColumn + " = ?"; + try { + conn.setAutoCommit(false); + PreparedStatement ps = conn.prepareStatement(sql); + int flag = 1; + Iterator> iterator = count.entrySet() + .iterator(); + System.out.println("start!"); + while (iterator.hasNext()) { + Map.Entry entry = iterator.next(); + String key = entry.getKey(); + double value = (Double) entry.getValue(); + ps.setDouble(1, value); + ps.setString(2, key); + + ps.addBatch(); + if (flag++ % 1000 == 0) { + ps.executeBatch(); + ps.clearBatch(); + System.out.println("update: " + flag); + } + } + ps.executeBatch(); + ps.clearBatch(); + ps.close(); + conn.commit(); + conn.close(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + /** + * 根据参数给的时间段级别,返回影响因子gen的值 + * + * @param lev + * @return + */ + public double getGen(int lev) { + return (double) lev / (lev + 1); + } + + /** + * 得到tablename的项目名称对应的最近一段时间的帖子通过时间模型对于项目的影响值 + * + * @return + */ + public Map getnewPostsCountTime() { + Map count = new HashMap(); + try { + conn.setAutoCommit(false); + ResultSet rs1; + Statement st1; + String projName; + String sql; + DateHandle dateHandle = new DateHandle(); + + /** + * 初始化Map count放入每个项目名 + */ + sql = "select DBMS from simple_test_newpostscount"; + st1 = conn.createStatement(); + rs1 = st1.executeQuery(sql); + while (rs1.next()) { + count.put(rs1.getString(1), 0d); + } + rs1.close(); + st1.close(); + + /** + * 由于JDBC不能在一句查询语句中打开两次临时表,所以改建成物理表。 + */ + + sql = "create table temp " + + "SELECT p.id as id,s.DBMS as DBMS,s.tag_id as tag_id,p.lastActivityDate " + + "from simple_test_newpostscount_match as s " + + "INNER JOIN posts_split_tags as p ON s.tag_id=p.tag_id"; + st1 = conn.createStatement(); + st1.execute(sql); + sql = "alter table temp add index index_id (id)"; + st1.execute(sql); + conn.commit(); + st1.close(); + + System.out.println("temp table create completement!"); + + int page_count; + int page_size; + String lastActivityDate; + int lev = 0; + double gen = 0.0; + boolean isEmpty; + + /** + * 采用分页查询,防止内存溢出 + */ + + page_count = 0; // 设置页大小和页码 + page_size = 1000; + sql = "select DBMS,lastActivityDate from temp where id>(select id from temp order by id limit ?,1) limit ?"; + PreparedStatement ps = conn.prepareStatement(sql); + do { + ps.setInt(1, page_count * page_size); + ps.setInt(2, page_size); + rs1 = ps.executeQuery(); + isEmpty = true; + while (rs1.next()) { + isEmpty = false; + projName = rs1.getString(1); + lastActivityDate = rs1.getString(2); + lev = dateHandle.levDate(lastActivityDate); + gen = getGen(lev); + count.put(projName, count.get(projName) + gen); + } + + if (isEmpty) { + break; + } + ++page_count ; + System.out.print(new Date()); + System.out.println("\t\t" + page_count); + + } while (true); + rs1.close(); + ps.close(); + sql="drop table temp"; + st1=conn.createStatement(); + st1.execute(sql); + st1.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return count; + } + + /** + * 根据表名和限制条件(没有选择全部)选择对应的列返回结果到ResultSet + * + * @param conn + * @param tablename + * @param column + * @param conditionColumn + * @param condition + * @return + * @throws SQLException + */ + public ResultSet getData(Connection conn, String tablename, String column, + String conditionColumn, String condition) { + String sql; + ResultSet rs = null; + if (!conditionColumn.isEmpty()) { + sql = "select " + column + " from " + tablename + " where " + + conditionColumn + "=" + "'" + condition + "'"; + } else { + sql = "select " + column + " from " + tablename; + } + try { + Statement st = conn.createStatement(); + rs = st.executeQuery(sql); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + + // st.close(); + return rs; + } + + /** + * 根据表名和限制条件(没有选择全部)选择对应的列返回结果到ResultSet + * + * @param conn + * @param tablename + * @param column + * @param conditionColumn + * @param condition + * @return + * @throws SQLException + */ + public ResultSet getData(Connection conn, String tablename, String column, + String conditionColumn, int condition) { + String sql; + ResultSet rs = null; + if (!conditionColumn.isEmpty()) { + sql = "select " + column + " from " + tablename + " where " + + conditionColumn + "=" + condition; + } else { + sql = "select " + column + " from " + tablename; + } + try { + Statement st = conn.createStatement(); + rs = st.executeQuery(sql); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return rs; + } + +} diff --git a/src/SplitHugeFile.java b/src/SplitHugeFile.java new file mode 100644 index 0000000..c2395ca --- /dev/null +++ b/src/SplitHugeFile.java @@ -0,0 +1,35 @@ +import java.io.*; + +public class SplitHugeFile { + public void splitFile(String filename, int linesCount) { + try { + BufferedReader reader=new BufferedReader(new FileReader(new File(filename))); + int count=0; + int file_count=0; + String tempString=null; + BufferedWriter writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++)))); + while((tempString=reader.readLine())!=null){ + writer.write(tempString+'\n'); + count++; + if(count>=linesCount){ + count=0; + writer.flush(); + writer.close(); + writer=new BufferedWriter(new FileWriter(new File(writeFilename(filename,file_count++)))); + continue; + } + } + writer.flush(); + writer.close(); + reader.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + String writeFilename(String filename,int count){ + String wfilename=filename.substring(0, filename.length()-4); + wfilename=wfilename+count; + return wfilename+filename.substring(filename.length()-4, filename.length()); + } +} diff --git a/src/SplitTags.java b/src/SplitTags.java new file mode 100644 index 0000000..265f903 --- /dev/null +++ b/src/SplitTags.java @@ -0,0 +1,159 @@ +import java.sql.*; +import java.util.*; +import java.util.Date; + +import support.ConnectionSql; +import update.UpdateTableColumn; + +public class SplitTags { + + String source_table; + String source_column1; + String source_column2; + String des_table; + String des_column1; + String des_column2; + String postTypeId; + + public SplitTags() { + this.source_table = "posts"; + this.source_column1 = "id"; + this.source_column2 = "tags"; + this.des_table = "posts_split_tags"; + this.des_column1 = "posts_id"; + this.des_column2 = "tag"; + this.postTypeId="postTypeId"; + } + public SplitTags(String source_table, String source_column1, + String source_column2, String des_table, String des_column1, + String des_column2) { + this.source_table = source_table; + this.source_column1 = source_column1; + this.source_column2 = source_column2; + this.des_table = des_table; + this.des_column1 = des_column1; + this.des_column2 = des_column2; + } + public static void main(String[] args){ + SplitTags st=new SplitTags("posts","id","tags","posts_split_tags","posts_id","tag");//标签分离 + st.splitTags(); + } + + public int breakTest(Connection conn, String break_sql) { + int lastId = 0; + try { + Statement st = conn.createStatement(); + ResultSet rs = st.executeQuery(break_sql); + if (rs.next()) { + lastId = rs.getInt(des_column1); + String del_sqlString="delete from "+des_table+" where " + des_column1 + "="+ lastId; + st.execute(del_sqlString); + } + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return lastId; + } + + public void splitTags() { + + try { + Connection conn = new ConnectionSql("ossean_db").connection(); + /* + * 由于报内存溢出,所以采用分页查询处理 + */ + int page_count = 0; // 设置页大小和页码 + int page_size = 100000; + int id = 0; // 获得需要插入的数据 + conn.setAutoCommit(false);//设置手动提交事务 + String tags; + + /* + * 增加断点续传功能 + */ + + String break_sql = "select " + + des_column1 // 断点查询 + + " from " + des_table + " order by " + des_column1 + + " desc limit 0,1"; + int breakPoint = breakTest(conn, break_sql); + /* + * 测试程序速度 + */ + System.out.print(new Date()); + System.out.println("\t\tstart" ); + + /* + * 采用子查询提高分页效率,防止效率的衰减 + + String sql = "select " + source_column1 + "," + source_column2 + + " from " + source_table + " where " + source_column1 + + " >= " + breakPoint + " limit ?,?"; // 查询语句 + */ + String sql; + if(breakPoint>0){ + sql = "select " + source_column1 + "," + source_column2 + + " from " + source_table + " where "+ source_column1+ + " >(select id from "+source_table+" order by "+ source_column1+" limit ?,1) and "+ + source_column1+ " >= " + breakPoint + " limit ?"; // 查询语句 + } + + else{ + sql = "select " + source_column1 + "," + source_column2 + + " from " + source_table + " where "+ source_column1+ + " >(select id from "+source_table+" order by "+ source_column1+" limit ?,1) limit ?";// 查询语句 + } + + String sqlInsert = " insert into " + des_table + " (" + des_column1 + + "," + des_column2 + ") values(?,?)"; // 插入语句 + PreparedStatement p_statement = conn.prepareStatement(sql); + PreparedStatement ps = conn.prepareStatement(sqlInsert); + int count = 0; + do { + p_statement.setInt(1, page_count * page_size); + p_statement.setInt(2, page_size); + ResultSet rs = p_statement.executeQuery(); + boolean isEmpty = true; + while (rs.next()) { + isEmpty = false; + id = rs.getInt(source_column1); + tags = rs.getString(source_column2); + if (tags == null) + continue; + + tags = tags.substring(1, tags.length() - 1); + String tag[] = tags.split("><"); + + for (int i = 0; i < tag.length; i++) { + ps.setInt(1, id); + ps.setString(2, tag[i]); + ps.addBatch(); + } + if (count++ > 10000) { + ps.executeBatch(); + ps.clearBatch(); + conn.commit(); + count = 0; + } + } + if (isEmpty) { + break; + } + page_count++; + /* + * 测试程序速度 + */ + System.out.print(new Date()); + System.out.println("\t\t" + page_count); + } while (true); + ps.executeBatch(); + ps.clearBatch(); + p_statement.close(); + conn.commit(); + conn.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/src/StrToDate.java b/src/StrToDate.java new file mode 100644 index 0000000..06787c1 --- /dev/null +++ b/src/StrToDate.java @@ -0,0 +1,14 @@ +import java.text.SimpleDateFormat; +import java.util.*; +public class StrToDate { + public Date strToDate(String str){ + SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + Date date=null; + try{ + date=sdf.parse(str); + }catch (Exception e) { + e.printStackTrace(); + } + return date; + } +} diff --git a/src/Test.java b/src/Test.java new file mode 100644 index 0000000..c9e1fb6 --- /dev/null +++ b/src/Test.java @@ -0,0 +1,26 @@ +import java.util.*; +import java.text.*; + +import support.ReadBigFile; +public class Test { + public static void main(String[] args){ + SplitTags st=new SplitTags("posts","id","tags","posts_split_tags","posts_id","tag");//标签分离 + st.splitTags(); + +// StrToDate std=new StrToDate();//字符串转日期 +// Date date=std.strToDate("1992-12-2 02:1:2"); +//// System.out.print(date); + +// ReadBigFile readBigFile=new ReadBigFile(); +// readBigFile.readFile("C:\\Users\\lili\\Posts.xml"); +// (new SplitHugeFile()).splitFile("C:\\Users\\lili\\Posts.xml",2000000);//大文件分解,以行数为单位 + + +// String[] realSort={"aaa","ac","bc","g","d","ab","e"}; +// String[] ideaSort={"aaa","ab","ac","bc","d","e"}; +// System.out.println(new CalculateNDCG().cal_NDCG(realSort, ideaSort, 3)); + + + } + +} diff --git a/src/specification b/src/specification new file mode 100644 index 0000000..ce1022b --- /dev/null +++ b/src/specification @@ -0,0 +1,16 @@ +StatisticsPostsCount.java:统计每个项目对应的帖子信息(对应的帖子数,帖子的总回复数,获赞数,收藏数,浏览数:(psotsCount,answerCount,score,favoriteCount,viewCount)), + 到并更新原表的对应项中。 + 主要方法:cal_count():从match_simple读出项目和匹配的标签,根据标签在split_posts_tags表找出posts_id,统计每个posts的count。 + +Update_match_result_proj_star.java: 更新match_result中的proj_stars,用于计算项目在托管社区的热度平均值。 + 主要类:Update_match_result_proj_star: + 主要方法: setProj_star() :操作update语句更新结果 + getAVGStar(String[] onlySource, String name):返回onlySource表中,name项目的stars的平均值 + 类:V:定义一些用到的常量 + 类:UpdateMatch_result:定义一些方法 + 方法:sourceFormat(String source):由source还原项目的来源表名,返回一个该项目来源的String[]还原每个项目来源表,并去掉FreeCode,因为该网站项目数据没有可量化项 + getSourceName(int id):由项目id获得从open_source_projects获得项目名称和项目源 +UpdateRank.java:多线程算法,给定一表名和表名一列,通过这一列对记录排序并更新rank列座位名次,rank列名设置更改构造函数即可 + 类:Update_match_result_rank:更新match_result排名,若相同同时存在在多个社区,取排名最小的最为项目排名 +UpdateTagPosts.java:方法更新tag表中每个tag关联的posts个数postsNum。 + 方法:countPostsNum()实现功能 \ No newline at end of file diff --git a/src/support/ConnectionSql.java b/src/support/ConnectionSql.java new file mode 100644 index 0000000..250084d --- /dev/null +++ b/src/support/ConnectionSql.java @@ -0,0 +1,27 @@ +package support; +import java.sql.Connection; +import java.sql.DriverManager; + + +public class ConnectionSql { + String driver="com.mysql.jdbc.Driver"; + String url; + String user="root"; + String password="mysql"; +public ConnectionSql(String sqlName) { + // TODO Auto-generated constructor stub + url="jdbc:mysql://127.0.0.1:3306/"+sqlName+"?useUnicode=true&charsetEncoding=utf8&useCursorFetch=true&defaultFetchSize=100"; +// url="jdbc:mysql://127.0.0.1:3306/"+sqlName; +} +public Connection connection(){ + Connection conn=null; + try{ + Class.forName(driver); + conn=DriverManager.getConnection(url, user, password); + }catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + return conn; +} +} diff --git a/src/support/DataHandle.java b/src/support/DataHandle.java new file mode 100644 index 0000000..3d10168 --- /dev/null +++ b/src/support/DataHandle.java @@ -0,0 +1,29 @@ +package support; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class DataHandle { + /** + * + * @param 对data列表进行Frobenius范数归一化 + * @return + */ +public static List frobenius(List data) { + Double sum_square=0d; + Iterator iterator=data.iterator(); + while(iterator.hasNext()){ + double datai=iterator.next(); + sum_square+=datai*datai; + } + double frob_num= Math.pow(sum_square, 0.5); + List frob_list=new ArrayList(); + iterator=data.listIterator(); + + while(iterator.hasNext()){ + frob_list.add(iterator.next()/frob_num); + } + return frob_list; +} +} diff --git a/src/support/DateHandle.java b/src/support/DateHandle.java new file mode 100644 index 0000000..16f2563 --- /dev/null +++ b/src/support/DateHandle.java @@ -0,0 +1,126 @@ +package support; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + + +/** + * 对日期的相关处理 + * @author LiLy + * + */ +public class DateHandle { + static String levDate[]; // + + public DateHandle() { + // TODO Auto-generated constructor stub + } + + public DateHandle(String refDate,int n) { + // TODO Auto-generated constructor stub + this.levDate=new String[n+1]; + Date date=stringToDate(refDate); + Calendar c = Calendar.getInstance(); + c.setTime(date); + for(int i=1;i<=n;i++) + { + date=c.getTime(); + this.levDate[i]=DateHandle.FormatDate(date); + c.add(Calendar.MONTH, -1); + } + } + + /** + * 比较两个日期(date1,date2)的大小,date1>date2大于(即date1在date2之后的时间)返回1,等于返回0,小于返回-1 + * @return + */ +public static int compareDate(Date date1,Date date2) { + + try { + if (date1.getTime() > date2.getTime()) { + return 1; + } else if (date1.getTime() < date2.getTime()) { + return -1; + } else { + return 0; + } + } catch (Exception exception) { + exception.printStackTrace(); + } + return 0; + } + +/** + * 判断Strings是否可以转换为yyyy-mm-dd HH:MM:SS形式的Date型 + * + * @param strings + * @return + */ +public static boolean canFormatToDate(String string) { + String dateFormat="yyyy-MM-dd HH:mm:ss"; + SimpleDateFormat sdf = new SimpleDateFormat(dateFormat); + if (string.isEmpty()) + return false; + + try { + sdf.parse(string); + } catch (ParseException e) { + return false; + } + return true; +} + +/** + * 将date格式转换为yyyy-mm-dd HH:MM:SS形式的Date型 + * + * @param strings + * @return + */ +public static String FormatDate(Date date) { + String dateFormat="yyyy-MM-dd HH:mm:ss"; + SimpleDateFormat sdf = new SimpleDateFormat(dateFormat); + String formatDate=sdf.format(date); + return formatDate; +} + + +/** + * 将string转成Date + * @param string + * @return + */ +public static Date stringToDate(String string){ + + String dateFormat="yyyy-MM-dd HH:mm:ss"; + SimpleDateFormat sdf = new SimpleDateFormat(dateFormat); + Date date=null; + try { + date = sdf.parse(string); + } catch (ParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return date; +} + +public static int levDate(String date) { + if(date==null) + return 0; + for(int i=DateHandle.levDate.length-1;i>0;i--){ + if(date.compareTo(DateHandle.levDate[i])>0){ + return i; + } + } + return 0; +} + +public static void main(String[] args) { + +// new DateHandle("2014-02-01 00:00:00", 12); +// if ("2014-02-01 00:00:00".compareTo("2014-02-01 00:00:00.23")==1) + System.out.print("2014-02-01 00:01:00".compareTo("2014-02-01 01:00:00.23")); +} +} diff --git a/src/support/ReadBigFile.class b/src/support/ReadBigFile.class new file mode 100644 index 0000000..60292e0 Binary files /dev/null and b/src/support/ReadBigFile.class differ diff --git a/src/support/ReadBigFile.java b/src/support/ReadBigFile.java new file mode 100644 index 0000000..4af989d --- /dev/null +++ b/src/support/ReadBigFile.java @@ -0,0 +1,71 @@ +package support; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.net.URL; +import java.net.URLEncoder; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +public class ReadBigFile extends Thread{ + String filename; + public ReadBigFile(String filename) { + // TODO Auto-generated constructor stub + this.filename=filename; + } + public void insertSql(List sqls) + { + Connection con=new ConnectionSql("ossean_db").connection(); + + try{ + con.setAutoCommit(false); + Statement st=con.createStatement(); + for(String s:sqls) + { + st.addBatch(s); + } + st.executeBatch(); + st.close(); + con.commit(); + con.close(); + }catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + + + } + public void readFile() { + File file = new File(filename); + BufferedReader reader = null; + try { + reader = new BufferedReader(new FileReader(file), 20 * 1024 * 1024); + String tempString = null; + int count = 1; + List sqls=new ArrayList(); + while ((tempString = reader.readLine()) != null) { +// tempString=URLEncoder.encode(tempString, "utf-8"); + sqls.add(tempString); + if(count++%1000==0){ + insertSql(sqls); + sqls.clear(); + System.out.println(count); + } + } + insertSql(sqls); + reader.close(); + } catch (Exception e) { + // TODO: handle exception + } + } + public void run() { + readFile(); + } + public static void main(String[] args) { + String path="C:\\Users\\lili\\Desktop\\sql\\"; + new ReadBigFile(path+"oschina_project.sql").run(); + } +} diff --git a/src/support/Similarity.java b/src/support/Similarity.java new file mode 100644 index 0000000..eca7903 --- /dev/null +++ b/src/support/Similarity.java @@ -0,0 +1,91 @@ +package support; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.*; +import java.lang.Math; +public class Similarity extends Thread{ + public Double cla_EX(List data) { + int len = data.size(); + Double ex = 0d; + Iterator iterator = data.iterator(); + Double sum = 0d; + while (iterator.hasNext()) { + sum += iterator.next(); + } + return ex = sum / len; + } + public Double cla_sim(List data1, List data2) { + int len = data1.size(); + List data3 = new ArrayList(); + List data4 = new ArrayList(); + List data5 = new ArrayList(); + Iterator iterator1 = data1.iterator(); + Iterator iterator2 = data2.iterator(); + while (iterator1.hasNext()) { + Double temp = iterator1.next(); + data3.add(temp * temp); + } + while (iterator2.hasNext()) { + Double temp = iterator2.next(); + data4.add(temp * temp); + } + iterator1 = data1.iterator(); + iterator2 = data2.iterator(); + while (iterator1.hasNext()) { + data5.add(iterator1.next() * iterator2.next()); + } + Double E_data1 = cla_EX(data1); + Double E_data2 = cla_EX(data2); + Double E_data3 = cla_EX(data3); + Double E_data4 = cla_EX(data4); + Double E_data5 = cla_EX(data5); + return (E_data5 - E_data1 * E_data2) + / (Math.pow(E_data3 - E_data1 * E_data1, 0.5) * Math.pow( + E_data4 - E_data2 * E_data2, 0.5)); + } + public void operaterSql(String dataBaseName,String tablename,String column1,String column2) { + Connection conn=new ConnectionSql(dataBaseName).connection(); + try { + Statement st=conn.createStatement(); + String sql="select "+column1+" from "+tablename; + ResultSet rs=st.executeQuery(sql); + List data1=new ArrayList(); + List data2=new ArrayList(); + while (rs.next()) { + double temp=rs.getDouble(1); + data1.add(temp); + } + sql="select "+column2+" from "+tablename; + rs=st.executeQuery(sql); + while (rs.next()) { + double temp=rs.getDouble(1); + data2.add(temp); + } + + Double similarity=cla_sim(data1,data2); + System.out.println(similarity); + tablename="'"+tablename+"'"; + column1="'"+column1+"'"; + column2="'"+column2+"'"; + sql="replace into similary_record(table_name,column1,column2,similarity) values ("+tablename+","+column1+","+column2+","+similarity+")"; + st.execute(sql); + conn.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + public static void main(String[] args) { + Similarity similarity=new Similarity(); + // similarity.operaterSql("ossean_db","sourceforge_project","stars","downloadCount"); + +// similarity.operaterSql("ossean_db","match_simple","db_score","postsCount"); +// similarity.operaterSql("ossean_db","match_simple","db_score","score"); +// similarity.operaterSql("ossean_db","match_simple","db_score","favoriteCount"); +// similarity.operaterSql("ossean_db","match_simple","db_score","viewCount"); +// similarity.operaterSql("ossean_db","match_simple","db_score","answerCount"); + similarity.operaterSql("ossean_db","match_simple","postsCount","viewCount"); + } + +} \ No newline at end of file diff --git a/src/support/StringHandle.java b/src/support/StringHandle.java new file mode 100644 index 0000000..14ed8d9 --- /dev/null +++ b/src/support/StringHandle.java @@ -0,0 +1,5 @@ +package support; + +import java.util.HashMap; +import java.util.Map; + diff --git a/src/update/StatisticsPostsCount.java b/src/update/StatisticsPostsCount.java new file mode 100644 index 0000000..9cf1a95 --- /dev/null +++ b/src/update/StatisticsPostsCount.java @@ -0,0 +1,101 @@ +package update; +import support.ConnectionSql; + +import java.sql.*; +import java.util.*; +import java.util.Date; + +public class StatisticsPostsCount { +String tablename="match_simple"; +String column1="score"; +String column2="favoriteCount"; +String column3="viewCount"; +String column4="answerCount"; +String column5="postsCount"; +String proj_name="proj_name"; +String tag="tag"; +String posts_split_tags="posts_split_tags"; +String posts_id="posts_id"; +String posts="posts"; +String id="id"; +String postTypeId="postTypeId"; +Connection conn=new ConnectionSql("ossean_db").connection(); +public StatisticsPostsCount() { + // TODO Auto-generated constructor stub +} +public StatisticsPostsCount(String tablename,String coulumn1,String coulumn2,String coulumn3,String coulumn4,String coulumn5) { + // TODO Auto-generated constructor stub +this.tablename=tablename; +this.column1=coulumn1; +this.column2=coulumn2; +this.column3=coulumn3; +this.column4=coulumn4; +this.column5=coulumn5; +} + +/** + * 统计每个项目对应的帖子数,帖子的总回复数,获赞数,收藏数,浏览数:(psotsCount,answerCount,score,favoriteCount,viewCount) + * 从match_simple读出项目和匹配的标签,根据标签在split_posts_tags表找出posts_id,统计每个posts的count + */ +public boolean cal_count() { + try { + conn.setAutoCommit(false); + Statement st1=conn.createStatement(); + Statement st2; + Statement st3; + String sql="select "+proj_name+","+tag+" from "+tablename; + ResultSet rs1=st1.executeQuery(sql); + ResultSet rs2; + ResultSet rs3; + System.out.print("start\t\t\t");System.out.println(new Date()); + + while(rs1.next()){ + String temp_proj_name="'"+rs1.getString(proj_name)+"'"; + String temp_tag="'"+rs1.getString(tag)+"'"; + st2=conn.createStatement(); + sql="select "+posts_id+" from "+posts_split_tags+" where "+tag+" = "+temp_tag; + rs2=st2.executeQuery(sql); //查询post_split_tags得到posts_id + int data1=0,data2=0,data3=0,data4=0,data5=0; + while(rs2.next()){ + int temp_posts_id=rs2.getInt(posts_id); + st3=conn.createStatement(); + sql="select "+column1+","+column2+","+column3+","+column4+" from "+posts+" where "+id+" = "+temp_posts_id; + rs3=st3.executeQuery(sql); + while(rs3.next()){ + data1+=rs3.getInt(column1); + data2+=rs3.getInt(column2); + data3+=rs3.getInt(column3); + data4+=rs3.getInt(column4); + data5++; + + } + rs3.close(); + st3.close(); + } + rs2.close(); + sql="update "+tablename+" set "+column1+"="+data1+","+column2+"="+data2+","+column3+"="+data3+","+column4+"="+data4+","+column5+"="+data5+" where "+proj_name+" = "+temp_proj_name; + st2.execute(sql); + st2.close(); + System.out.print(temp_proj_name+"\t\t"); + System.out.println(new Date()); + + } + rs1.close(); + st1.close(); + conn.commit(); + conn.close(); + System.out.println("down"); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + + return false; + } + return true; +} + +public static void main(String[] args) { + StatisticsPostsCount statisticsPostsCount=new StatisticsPostsCount(); + statisticsPostsCount.cal_count(); +} +} diff --git a/src/update/UpdateRank.java b/src/update/UpdateRank.java new file mode 100644 index 0000000..50a9b9f --- /dev/null +++ b/src/update/UpdateRank.java @@ -0,0 +1,214 @@ +package update; + +import java.sql.Connection; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.Date; +import java.util.List; + +import support.ConnectionSql; +/** + * 给定一列,通过这一列对记录排序并更新rank列座位名次,rank列名设置更改构造函数即可 + */ +public class UpdateRank extends Thread{ + String tablename; + String columnname; + String columnname2; + String id="id"; + String rank="rank"; +// String rank="rank_"; + public UpdateRank(String tablename,String columnname) { + // TODO Auto-generated constructor stub + this.tablename=tablename; + this.columnname=columnname; + this.rank=this.rank+this.columnname; + this.rank=this.columnname; //用本列更新本列排名 + } + public UpdateRank(String tablename,String columnname,String rankString) { + // TODO Auto-generated constructor stub + this.tablename=tablename; + this.columnname=columnname; + this.rank=rankString; + } + public void operateSql() { + Connection conn = new ConnectionSql("ossean_db").connection(); + try { + conn.setAutoCommit(false); + Statement st=conn.createStatement(); + String sql1="select "+id+" from "+tablename+" order by "+columnname+" DESC";//数据越大排名越小 +// String sql1="select "+id+" from "+tablename+" order by "+columnname;//数据越大,排名越大 + ResultSet rs=st.executeQuery(sql1);//查询需要参考的数据排名结果 + String updataSql="update "+tablename+" set "+rank+"=? where "+id+"=?"; +// String updataSql="update "+tablename+" set stars=?"; + PreparedStatement ps=conn.prepareStatement(updataSql); + int rank=1; + System.out.println("start!"); + while (rs.next()) { + int num=rs.getInt(1); + ps.setInt(1, rank); + ps.setInt(2, num); + ps.addBatch();//添加更新rank语句 + if(rank++%10000==0) + { + + System.out.println(rank); + ps.executeBatch(); + ps.clearBatch(); + conn.commit(); + } + } + ps.executeBatch(); + ps.clearBatch(); + conn.commit(); + /*用两个参考stars + if(columnname2!=null) + { + + sql1="select "+columnname2+" from "+tablename+" order by "+columnname2; + rs=st.executeQuery(sql1); + updataSql="update "+tablename+" set stars=? where "+columnname2+"=?"; + ps=conn.prepareStatement(updataSql); + rank=1; + while (rs.next()) { + int num=rs.getInt(1); + sql1="select stars from "+tablename+" where "+columnname2+"="+num; + Statement st2=conn.createStatement(); + ResultSet rs2=st2.executeQuery(sql1); + int star1=0; + if(rs.next()){star1=rs2.getInt(1);} + ps.setInt(1, (rank++/count+star1)/2); + ps.setInt(2, num); + ps.addBatch(); + if(rank%1000==0) + { + ps.execute(); + conn.commit(); + } + } + ps.execute(); + conn.commit(); + } + */ + conn.close(); + System.out.print(tablename+" down!"); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + public void run() { + operateSql(); + } + public static void main(String[] args) { + +// UpdateRank updateRank1=new UpdateRank("tag","postsNum"); +// updateRank1.run(); +// UpdateRank updateRank2=new UpdateRank("oschina_project","usedNum"); +// updateRank2.run(); +// UpdateRank updateRank3=new UpdateRank("openhub_project","useCount"); +// updateRank3.run(); +// UpdateRank updateRank4=new UpdateRank("sourceforge_project","downloadCount"); +// updateRank4.run(); + + /* + UpdateRank updatematch_simpleRank1=new UpdateRank("match_simple","postsCount"); + updatematch_simpleRank1.run(); + UpdateRank updatematch_simpleRank2=new UpdateRank("match_simple","score"); + updatematch_simpleRank2.run(); + UpdateRank updatematch_simpleRank3=new UpdateRank("match_simple","favoriteCount"); + updatematch_simpleRank3.run(); + UpdateRank updatematch_simpleRank4=new UpdateRank("match_simple","viewCount"); + updatematch_simpleRank4.run(); + UpdateRank updatematch_simpleRank5=new UpdateRank("match_simple","answerCount"); + updatematch_simpleRank5.run(); + */ + +// UpdateRank updaterank_opensource_DB_engine=new UpdateRank("rank_open_source_DB_engine","score"); +// updaterank_opensource_DB_engine.run(); + +// Update_match_result_rank umr_rank=new Update_match_result_rank(); +// System.out.println(umr_rank.set_rank()); + + new UpdateRank("rank_open_source_db_engine", "rank_last_month").run();; + } +} +class Update_match_result_rank{ //更新match_result中proj_rank,若相同同时存在在多个社区,取排名最小的最为项目排名 + Connection connection; + public Update_match_result_rank() { + // TODO Auto-generated method stub + connection = new ConnectionSql("ossean_db").connection(); + } + int getMinRank(String[] onlySource,String name){ + int rank=0; + name=name.replace('\'', '\"'); + name="'"+name+"'"; + int len=onlySource.length; + for(int i=0;i Source_name=umr.getSourceName(temp_id); + if(Source_name.isEmpty()){ + rank=-1; + } + else if(Source_name.get(0).equals("FreeCode")) + { + rank=-2; + } + else{ + String name=Source_name.get(1); + String[] onlySource=umr.sourceFormat(Source_name.get(0)); + rank=getMinRank(onlySource, name); + } + ps.setInt(1, rank); + ps.addBatch(); + if(count++%1000==0) + { + ps.executeBatch(); + ps.clearBatch(); + System.out.println(count); + } + } + ps.executeBatch(); + ps.clearBatch(); + connection.commit(); + connection.close(); + return true; + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + return false; + } + } +} \ No newline at end of file diff --git a/src/update/UpdateStar.java b/src/update/UpdateStar.java new file mode 100644 index 0000000..92ce1c5 --- /dev/null +++ b/src/update/UpdateStar.java @@ -0,0 +1,78 @@ +package update; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.Date; + +import support.ConnectionSql; +public class UpdateStar extends Thread{ + String tablename; + String columnname="rank"; + int levs=6; + public UpdateStar(String tablename) { + // TODO Auto-generated constructor stub + this.tablename=tablename; + } + public UpdateStar(String tablename,String columnname) { + // TODO Auto-generated constructor stub + this.tablename=tablename; + this.columnname=columnname; + } +// public double cal_star(int count,int maxCount) { +// double star=0d; +// double step=(double)maxCount/6; +// star=count/step; +// return star; +// } + public void operateSql() { + Connection conn = new ConnectionSql("ossean_db").connection(); + try { + conn.setAutoCommit(false); + + String sql1="select max("+columnname+") from "+tablename; + int count=0; + Statement st=conn.createStatement(); + ResultSet rs= st.executeQuery(sql1); //查询记录的数,确定排名的数据量 + if(rs.next()){count=rs.getInt(1)/levs;} //确定每个级别数据量 + sql1="select "+columnname+" from "+tablename+" order by "+columnname; + rs=st.executeQuery(sql1);//查询需要参考的数据排名结果 + String updataSql="update "+tablename+" set stars=? where "+columnname+"=?"; +// String updataSql="update "+tablename+" set stars=?"; + PreparedStatement ps=conn.prepareStatement(updataSql); + System.out.println(tablename+" start!"); + while (rs.next()) { + int num=rs.getInt(1); + ps.setInt(1, num/count); + ps.setInt(2, num); + ps.addBatch();//添加更新stars语句 + if(num%10000==0) + { + ps.executeBatch(); + ps.clearBatch(); + } + } + ps.executeBatch(); + ps.clearBatch(); + conn.commit(); + conn.close(); + System.out.println(tablename+" down!"); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + public void run() { + operateSql(); + } + public static void main(String[] args) { + + UpdateStar updateStar1=new UpdateStar("tag"); +// updateStar1.run(); + UpdateStar updateStar2=new UpdateStar("oschina_project"); + updateStar2.run(); + UpdateStar updateStar3=new UpdateStar("openhub_project"); + updateStar3.run(); + + } +} diff --git a/src/update/UpdateTableColumn.java b/src/update/UpdateTableColumn.java new file mode 100644 index 0000000..fdcf2e5 --- /dev/null +++ b/src/update/UpdateTableColumn.java @@ -0,0 +1,53 @@ +package update; + +import java.sql.Connection; +import java.sql.Statement; + +import support.ConnectionSql; + +/** + * 两个表的连接更新程序 + * @author LiLy + * + */ +public class UpdateTableColumn { +String table1; +String table2; +String connColumn1; +String connColumn2; +String updateColumn; +String referColumn; +Connection conn; +public UpdateTableColumn(String table1,String table2,String connColumn1,String connColumn2,String updateColumn,String referColumn,Connection conn) { + // TODO Auto-generated constructor stub +this.table1=table1; +this.table2=table2; +this.connColumn1=connColumn1; +this.connColumn2=connColumn2; +this.updateColumn=updateColumn; +this.referColumn=referColumn; +this.conn=conn; +} + +public boolean updateMethod () { + String sql="update "+table1+","+table2+" set "+ + table1+"."+updateColumn+" = "+table2+"."+referColumn+ + " where "+table1+"."+connColumn1+" = "+table2+"."+connColumn2; + try { + Statement st=conn.createStatement(); + st.execute(sql); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + return false; + } + + return true; +} + +public static void main(String[] args) { + Connection conn=new ConnectionSql("ossean_db").connection(); + UpdateTableColumn up=new UpdateTableColumn("posts_split_tags", "tag", "tag", "tag", "tag_id", "id", conn); + up.updateMethod(); +} +} diff --git a/src/update/UpdateTagPostsNum.java b/src/update/UpdateTagPostsNum.java new file mode 100644 index 0000000..5ddcbbd --- /dev/null +++ b/src/update/UpdateTagPostsNum.java @@ -0,0 +1,53 @@ +package update; + + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +import support.ConnectionSql; +/** + * 统计tag表中tag标签在posts中关联帖子的个数并更新postsNum字段 + * @author LiLy + * + */ +public class UpdateTagPostsNum { + public void countPostsNum() throws SQLException{ + Connection conn = new ConnectionSql("ossean_db").connection(); + try { + conn.setAutoCommit(false); + Statement st=conn.createStatement(); + String sqlSelect="select tag from tag"; + ResultSet rs=st.executeQuery(sqlSelect); + Statement stm=conn.createStatement(); + int flag=0; + while(rs.next()) + { + String tag=rs.getString("tag"); + tag="'"+tag+"'"; + sqlSelect="select count(tag) from posts_split_tags where tag="+tag; + + ResultSet rscount=stm.executeQuery(sqlSelect); + if(rscount.next()) + { + String sql="update tag set tag.postsNum="+rscount.getInt(1)+" where tag.tag= "+tag; + stm.execute(sql); + } + if(flag++%1000==0){System.out.println(flag);} + } + System.out.println("Down!"); + conn.commit(); + conn.close(); + } catch (Exception e) { + // TODO: handle exception + conn.rollback(); + e.printStackTrace(); + } + + } + public static void main(String[] args) throws SQLException{ + UpdateTagPostsNum updateTagPosts=new UpdateTagPostsNum(); + updateTagPosts.countPostsNum(); + } +} diff --git a/src/update/Update_match_result_proj_star.java b/src/update/Update_match_result_proj_star.java new file mode 100644 index 0000000..f84e518 --- /dev/null +++ b/src/update/Update_match_result_proj_star.java @@ -0,0 +1,179 @@ +package update; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import support.ConnectionSql; + + +/** + * 定义用到的表名和列名 + */ +class V { + public static String match_result = "match_result"; + public static String sourceTable = "open_source_projects"; + public static String onlySourceTable1 = "openhub_projects"; + public static String onlySourceTable2 = "oschina_projects"; + public static String onlySourceTable3 = "sourceforge_projects"; + public static String proj_star = "proj_star"; + public static String proj_rank = "proj_rank"; + public static String proj_id = "proj_id"; + public static String rank = "rank"; + public static String stars = "stars"; + public static String source = "source"; + public static Map proj_nameMap = new HashMap() { + { + put("SourceForge_project", "name"); + put("OpenHub_project", "name"); + put("OSChina_project", "projectShortName"); + put("open_source_projects", "name"); + } + }; + +} + + +/** + * 更新match_result,定义计算rank和stars公用的函数 + * sourceFormat(String source):由source还原项目的来源表名,返回一个该项目来源的String[]还原每个项目来源表,并去掉FreeCode,因为该网站项目数据没有可量化项 + * getSourceName(int id):由项目id获得从open_source_projects获得项目名称和项目源 + */ +class UpdateMatch_result { + Connection connection; + + public UpdateMatch_result(Connection connection) { + // TODO Auto-generated constructor stub + this.connection = connection; + } + + String[] sourceFormat(String source) { + String[] sources = source.split("FreeCode")[0].split(","); + List formatSource = new ArrayList(); + ; + for (int i = 0; i < sources.length; i++) { + if (!sources[i].equals("")) { + formatSource.add(sources[i] + "_project"); + } + } + return (String[]) formatSource.toArray(new String[0]); + } + + List getSourceName(int id) { + List source_name = new ArrayList(); + String sql = "select " + V.source + "," + + V.proj_nameMap.get(V.sourceTable) + " from " + V.sourceTable + + " where id=" + id + " and source!='FreeCode'"; + try { + Statement st = connection.createStatement(); + ResultSet rs = st.executeQuery(sql); + if (rs.next()) { + source_name.add(rs.getString(1)); + source_name.add(rs.getString(2)); + } + st.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + System.out.println(id); + } + return source_name; + } +} + +public class Update_match_result_proj_star { + Connection connection; + + public Update_match_result_proj_star(String database) { + // TODO Auto-generated constructor stub + connection = new ConnectionSql(database).connection(); + + } + + // List getSource() + + double getAVGStar(String[] onlySource, String name) { + double star = 0; + name = name.replace('\'', '\"'); + name = "'" + name + "'"; + int len = onlySource.length; + for (int i = 0; i < len; i++) { + String sql = "select " + V.stars + " from " + onlySource[i] + + " where " + V.proj_nameMap.get(onlySource[i]) + "=" + + name; + try { + Statement st = connection.createStatement(); + ResultSet rs = st.executeQuery(sql); + if (rs.next()) { + star += rs.getInt(1); + } + + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + return star / len; + } + + public void setProj_star() { + try { + connection.setAutoCommit(false); + Statement st1 = connection.createStatement(); + UpdateMatch_result umr = new UpdateMatch_result(connection); + String sql1 = "select distinct(" + V.proj_id + ") from " + + V.match_result; + ResultSet rs = st1.executeQuery(sql1); + String sql2 = "update " + V.match_result + " set " + V.proj_star + + "=? where " + V.proj_id + "=?"; + PreparedStatement ps = connection.prepareStatement(sql2); + int count = 1; + double star = 0; + while (rs.next()) { + int temp_id = rs.getInt(1); + ps.setInt(2, temp_id); + List Source_name = umr.getSourceName(temp_id); + if (Source_name.isEmpty()) { + star = -1; + } else if (Source_name.get(0).equals("FreeCode")) { + star = -2; + } else { + String name = Source_name.get(1); + String[] onlySource = umr.sourceFormat(Source_name.get(0)); + + star = getAVGStar(onlySource, name); + ps.setDouble(1, star); + + } + + ps.addBatch(); + if (count++ % 1000 == 0) { + ps.executeBatch(); + ps.clearBatch(); + System.out.println(count); + } + } + ps.executeBatch(); + ps.clearBatch(); + connection.commit(); + connection.close(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + + } + + } + + public static void main(String[] args) { + Update_match_result_proj_star u_proj_star = new Update_match_result_proj_star( + "ossean_db"); + u_proj_star.setProj_star(); + System.out.print("Down!"); + + } +}