From 32fcafb5f0705cdc1278fdea12f58fd0e5c33103 Mon Sep 17 00:00:00 2001 From: oeljeklaus-you Date: Sat, 23 Jun 2018 21:26:55 +0800 Subject: [PATCH] =?UTF-8?q?=E7=83=AD=E9=97=A8=E5=95=86=E5=93=81Top10?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 606 ++++++++++-------- .../cn/edu/hust/dao/SessionAggrStatDao.java | 2 + .../cn/edu/hust/dao/Top10CategoryDao.java | 10 + .../cn/edu/hust/dao/factory/DaoFactory.java | 12 +- .../hust/dao/impl/SessionAggrStatDaoImpl.java | 22 + .../hust/dao/impl/Top10CategoryDaoImpl.java | 36 ++ .../cn/edu/hust/domain/Top10Category.java | 61 ++ .../cn/edu/hust/session/CategorySortKey.java | 11 +- .../cn/edu/hust/session/UserVisitAnalyze.java | 71 +- 9 files changed, 544 insertions(+), 287 deletions(-) create mode 100644 src/main/java/cn/edu/hust/dao/Top10CategoryDao.java create mode 100644 src/main/java/cn/edu/hust/dao/impl/Top10CategoryDaoImpl.java create mode 100644 src/main/java/cn/edu/hust/domain/Top10Category.java diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 34d6455..08e8b76 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,8 +2,15 @@ - + + + + + + + + @@ -16,94 +23,173 @@ - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + + + + + + + + + + + - - - - - - - - - - - - + - - + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -145,7 +231,6 @@ @@ -272,6 +361,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -594,7 +721,7 @@ - + 1529592741848 @@ -646,7 +773,7 @@ - @@ -662,7 +789,7 @@ - + @@ -750,32 +877,11 @@ - - - - - - - - - - - - - - - - - - - - - - @@ -926,87 +1032,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1154,19 +1179,6 @@ - - - - - - - - - - - - - @@ -1283,24 +1295,6 @@ - - - - - - - - - - - - - - - - - - @@ -1323,10 +1317,10 @@ - + - - + + @@ -1339,74 +1333,170 @@ - + - - + + - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/java/cn/edu/hust/dao/SessionAggrStatDao.java b/src/main/java/cn/edu/hust/dao/SessionAggrStatDao.java index 00f6823..f35d073 100644 --- a/src/main/java/cn/edu/hust/dao/SessionAggrStatDao.java +++ b/src/main/java/cn/edu/hust/dao/SessionAggrStatDao.java @@ -3,7 +3,9 @@ package cn.edu.hust.dao; import cn.edu.hust.domain.SessionAggrStat; import java.io.Serializable; +import java.util.List; public interface SessionAggrStatDao extends Serializable{ void insert(SessionAggrStat sessionAggrStat); + void batchInsert(List sessionAggrStatList); } diff --git a/src/main/java/cn/edu/hust/dao/Top10CategoryDao.java b/src/main/java/cn/edu/hust/dao/Top10CategoryDao.java new file mode 100644 index 0000000..23ff26e --- /dev/null +++ b/src/main/java/cn/edu/hust/dao/Top10CategoryDao.java @@ -0,0 +1,10 @@ +package cn.edu.hust.dao; + +import cn.edu.hust.domain.Top10Category; + +import java.util.List; + +public interface Top10CategoryDao { + void insert(Top10Category top10Category); + void batchInsert(List top10CategoryList); +} diff --git a/src/main/java/cn/edu/hust/dao/factory/DaoFactory.java b/src/main/java/cn/edu/hust/dao/factory/DaoFactory.java index 08d6a41..ea9cfa1 100644 --- a/src/main/java/cn/edu/hust/dao/factory/DaoFactory.java +++ b/src/main/java/cn/edu/hust/dao/factory/DaoFactory.java @@ -1,13 +1,7 @@ package cn.edu.hust.dao.factory; -import cn.edu.hust.dao.SessionAggrStatDao; -import cn.edu.hust.dao.SessionDetailDao; -import cn.edu.hust.dao.SessionRandomExtractDao; -import cn.edu.hust.dao.TaskDao; -import cn.edu.hust.dao.impl.SessionAggrStatDaoImpl; -import cn.edu.hust.dao.impl.SessionDetailDaoImpl; -import cn.edu.hust.dao.impl.SessionRandomExtractDaoImpl; -import cn.edu.hust.dao.impl.TaskDaoImpl; +import cn.edu.hust.dao.*; +import cn.edu.hust.dao.impl.*; import cn.edu.hust.domain.SessionDetail; import cn.edu.hust.domain.SessionRandomExtract; @@ -34,4 +28,6 @@ public class DaoFactory { { return new SessionDetailDaoImpl(); } + + public static Top10CategoryDao getTop10CategoryDao(){ return new Top10CategoryDaoImpl();} } diff --git a/src/main/java/cn/edu/hust/dao/impl/SessionAggrStatDaoImpl.java b/src/main/java/cn/edu/hust/dao/impl/SessionAggrStatDaoImpl.java index b3f944c..23d21a1 100644 --- a/src/main/java/cn/edu/hust/dao/impl/SessionAggrStatDaoImpl.java +++ b/src/main/java/cn/edu/hust/dao/impl/SessionAggrStatDaoImpl.java @@ -4,6 +4,9 @@ import cn.edu.hust.dao.SessionAggrStatDao; import cn.edu.hust.domain.SessionAggrStat; import cn.edu.hust.jdbc.JDBCHelper; +import java.util.ArrayList; +import java.util.List; + public class SessionAggrStatDaoImpl implements SessionAggrStatDao{ @Override public void insert(SessionAggrStat sessionAggrStat) { @@ -18,4 +21,23 @@ public class SessionAggrStatDaoImpl implements SessionAggrStatDao{ sessionAggrStat.getStep_Length_30_60()}; JDBCHelper.getInstance().excuteUpdate(sql,params); } + + @Override + public void batchInsert(List sessionAggrStatList) { + String sql="insert into session_aggr_stat values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; + List paramList=new ArrayList(); + for (SessionAggrStat sessionAggrStat:sessionAggrStatList) + { + Object[] params={sessionAggrStat.getTaskId(),sessionAggrStat.getSessionCount(),sessionAggrStat.getVisit_Length_1s_3s(), + sessionAggrStat.getVisit_Length_4s_6s(),sessionAggrStat.getVisit_Length_7s_9s(), + sessionAggrStat.getVisit_Length_10s_30s(),sessionAggrStat.getVisit_Length_30s_60s(), + sessionAggrStat.getVisit_Length_1m_3m(),sessionAggrStat.getVisit_Length_3m_10m() + ,sessionAggrStat.getVisit_Length_10m_30m(),sessionAggrStat.getVisit_Length_30m(), + sessionAggrStat.getStep_Length_1_3(),sessionAggrStat.getStep_Length_4_6(),sessionAggrStat.getStep_Length_7_9(), + sessionAggrStat.getStep_Length_7_9(),sessionAggrStat.getStep_Length_10_30(), + sessionAggrStat.getStep_Length_30_60()}; + paramList.add(params); + } + JDBCHelper.getInstance().excuteBatch(sql,paramList); + } } diff --git a/src/main/java/cn/edu/hust/dao/impl/Top10CategoryDaoImpl.java b/src/main/java/cn/edu/hust/dao/impl/Top10CategoryDaoImpl.java new file mode 100644 index 0000000..d87ed43 --- /dev/null +++ b/src/main/java/cn/edu/hust/dao/impl/Top10CategoryDaoImpl.java @@ -0,0 +1,36 @@ +package cn.edu.hust.dao.impl; + +import cn.edu.hust.dao.Top10CategoryDao; +import cn.edu.hust.domain.Top10Category; +import cn.edu.hust.jdbc.JDBCHelper; + +import java.util.ArrayList; +import java.util.List; + +public class Top10CategoryDaoImpl implements Top10CategoryDao { + @Override + public void insert(Top10Category top10Category) { + String sql="insert into top10_category values(?,?,?,?,?)"; + Object[] params=new Object[]{ + top10Category.getTaskId(),top10Category.getCategoryId(),top10Category.getCategoryId(), + top10Category.getClickCount(),top10Category.getOrderCount(),top10Category.getPayCount() + }; + JDBCHelper.getInstance().excuteUpdate(sql,params); + } + + @Override + public void batchInsert(List top10CategoryList) { + String sql="insert into top10_category values(?,?,?,?,?)"; + List paramList=new ArrayList(); + for (Top10Category top10Category:top10CategoryList) + { + Object[] params=new Object[]{ + top10Category.getTaskId(),top10Category.getCategoryId(), + top10Category.getClickCount(),top10Category.getOrderCount(),top10Category.getPayCount() + }; + paramList.add(params); + } + + JDBCHelper.getInstance().excuteBatch(sql,paramList); + } +} diff --git a/src/main/java/cn/edu/hust/domain/Top10Category.java b/src/main/java/cn/edu/hust/domain/Top10Category.java new file mode 100644 index 0000000..8b7eac4 --- /dev/null +++ b/src/main/java/cn/edu/hust/domain/Top10Category.java @@ -0,0 +1,61 @@ +package cn.edu.hust.domain; + +public class Top10Category { + private Long taskId; + private Long categoryId; + private Long clickCount; + private Long orderCount; + private Long payCount; + + + public Top10Category() { + } + + public void set(Long taskId, Long categoryId, Long clickCount, Long orderCount, Long payCount) { + this.taskId = taskId; + this.categoryId = categoryId; + this.clickCount = clickCount; + this.orderCount = orderCount; + this.payCount = payCount; + } + + public Long getTaskId() { + return taskId; + } + + public void setTaskId(Long taskId) { + this.taskId = taskId; + } + + public Long getCategoryId() { + return categoryId; + } + + public void setCategoryId(Long categoryId) { + this.categoryId = categoryId; + } + + public Long getClickCount() { + return clickCount; + } + + public void setClickCount(Long clickCount) { + this.clickCount = clickCount; + } + + public Long getOrderCount() { + return orderCount; + } + + public void setOrderCount(Long orderCount) { + this.orderCount = orderCount; + } + + public Long getPayCount() { + return payCount; + } + + public void setPayCount(Long payCount) { + this.payCount = payCount; + } +} diff --git a/src/main/java/cn/edu/hust/session/CategorySortKey.java b/src/main/java/cn/edu/hust/session/CategorySortKey.java index 3fa8435..cdc07ee 100644 --- a/src/main/java/cn/edu/hust/session/CategorySortKey.java +++ b/src/main/java/cn/edu/hust/session/CategorySortKey.java @@ -1,13 +1,14 @@ package cn.edu.hust.session; + import scala.math.Ordered; -public class CategorySortKey implements Ordered { +public class CategorySortKey implements Ordered, java.io.Serializable { private Long clickCount; private Long orderCount; private Long payCount; - + @Override public int compare(CategorySortKey categorySortKey) { if(clickCount-categorySortKey.getClickCount()!=0) @@ -109,4 +110,10 @@ public class CategorySortKey implements Ordered { public void setPayCount(Long payCount) { this.payCount = payCount; } + + public void set(Long clickCount, Long orderCount, Long payCount) { + this.clickCount = clickCount; + this.orderCount = orderCount; + this.payCount = payCount; + } } diff --git a/src/main/java/cn/edu/hust/session/UserVisitAnalyze.java b/src/main/java/cn/edu/hust/session/UserVisitAnalyze.java index 94672c8..0fc87ef 100644 --- a/src/main/java/cn/edu/hust/session/UserVisitAnalyze.java +++ b/src/main/java/cn/edu/hust/session/UserVisitAnalyze.java @@ -4,10 +4,7 @@ import cn.edu.hust.conf.ConfigurationManager; import cn.edu.hust.constant.Constants; import cn.edu.hust.dao.TaskDao; import cn.edu.hust.dao.factory.DaoFactory; -import cn.edu.hust.domain.SessionAggrStat; -import cn.edu.hust.domain.SessionDetail; -import cn.edu.hust.domain.SessionRandomExtract; -import cn.edu.hust.domain.Task; +import cn.edu.hust.domain.*; import cn.edu.hust.mockData.MockData; import cn.edu.hust.util.*; import com.alibaba.fastjson.JSONObject; @@ -102,7 +99,7 @@ public class UserVisitAnalyze { calculateAndPersist(sessionAggrStatAccumulator.value(),taskId); // - getTop10Category(filteredSessionRDD,sessionInfoPairRDD); + getTop10Category(taskId,filteredSessionRDD,sessionInfoPairRDD); //关闭spark上下文 context.close(); } @@ -594,17 +591,20 @@ public class UserVisitAnalyze { visit_Length_7s_9s_ratio,visit_Length_10s_30s_ratio,visit_Length_30s_60s_ratio, visit_Length_1m_3m_ratio,visit_Length_3m_10m_ratio,visit_Length_10m_30m_ratio,visit_Length_30m_ratio ,step_Length_1_3_ratio,step_Length_4_6_ratio,step_Length_7_9_ratio,step_Length_7_9_ratio,step_Length_30_60_ratio,step_Length_60_ratio); + List sessionAggrStatList=new ArrayList(); + sessionAggrStatList.add(sessionAggrStat); // 插入数据库 - DaoFactory.getSessionAggrStatDao().insert(sessionAggrStat); + DaoFactory.getSessionAggrStatDao().batchInsert(sessionAggrStatList); } /** * 获取top热门品类 + * @param taskId * @param filteredSessionRDD * @param sessionInfoPairRDD */ - private static void getTop10Category(JavaPairRDD filteredSessionRDD, JavaPairRDD sessionInfoPairRDD) { + private static void getTop10Category(Long taskId,JavaPairRDD filteredSessionRDD, JavaPairRDD sessionInfoPairRDD) { //1.获取符合条件的session梵文的所有品类 JavaPairRDD sessionId2DetailRDD=filteredSessionRDD.join(sessionInfoPairRDD).mapToPair(new PairFunction>, String, Row>() { @Override @@ -626,21 +626,27 @@ public class UserVisitAnalyze { if(clickCategoryId!=null) visitCategoryList.add(new Tuple2(clickCategoryId,clickCategoryId)); - String[] orderCategoryIdsSplited=row.getString(8).split(","); - for (String orderCategoryId: - orderCategoryIdsSplited) { - visitCategoryList.add(new Tuple2(Long.valueOf(orderCategoryId),Long.valueOf(orderCategoryId))); + if(row.get(8)!=null){ + String[] orderCategoryIdsSplited=row.getString(8).split(","); + for (String orderCategoryId: + orderCategoryIdsSplited) { + visitCategoryList.add(new Tuple2(Long.valueOf(orderCategoryId),Long.valueOf(orderCategoryId))); + } } - String[] payCategoryIdsSplited=row.getString(10).split(","); - for (String payCategoryId: - payCategoryIdsSplited) { - visitCategoryList.add(new Tuple2(Long.valueOf(payCategoryId),Long.valueOf(payCategoryId))); + if(row.get(10)!=null){ + String[] payCategoryIdsSplited=row.getString(10).split(","); + for (String payCategoryId: + payCategoryIdsSplited) { + visitCategoryList.add(new Tuple2(Long.valueOf(payCategoryId),Long.valueOf(payCategoryId))); + } } return visitCategoryList; } }); + //需要去重 + categoryRDD=categoryRDD.distinct(); //3。计算各个品类的点击,下单和支付次数 // 3.1 计算点击品类的数量 JavaPairRDD clickCategoryRDD = getLClickCategoryRDD(sessionId2DetailRDD); @@ -655,8 +661,36 @@ public class UserVisitAnalyze { JavaPairRDD categoryCountRDD=joinCategoryAndData(categoryRDD,clickCategoryRDD,orderCategoryRDD,payCategoryRDD); //5.自定义二次排序的key + JavaPairRDD sortKeyCountRDD=categoryCountRDD.mapToPair(new PairFunction, CategorySortKey, String>() { + @Override + public Tuple2 call(Tuple2 longStringTuple2) throws Exception { + String countInfo=longStringTuple2._2; + Long clickCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_CLICK_CATEGORY)); + Long orderCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_ORDER_CATEGORY)); + Long payCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_ORDER_CATEGORY)); + CategorySortKey key=new CategorySortKey(); + key.set(clickCount,orderCount,payCount); + return new Tuple2(key,countInfo); + } + }); - + JavaPairRDD sortedCategoryRDD=sortKeyCountRDD.sortByKey(false); + //取出前10个,写入数据库 + List> top10CategoryList=sortedCategoryRDD.take(10); + List top10Categories=new ArrayList(); + for(Tuple2 tuple2:top10CategoryList) + { + String countInfo=tuple2._2; + Long categoryId=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_CATEGORY_ID)); + Long clickCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_CLICK_CATEGORY)); + Long orderCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_ORDER_CATEGORY)); + Long payCount=Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\|",Constants.FIELD_ORDER_CATEGORY)); + Top10Category top10Category=new Top10Category(); + top10Category.set(taskId,categoryId,clickCount,orderCount,payCount); + top10Categories.add(top10Category); + } + //插入数据库 + DaoFactory.getTop10CategoryDao().batchInsert(top10Categories); } /** @@ -681,7 +715,7 @@ public class UserVisitAnalyze { clickCount=clickIOptional.get(); } - String value=Constants.FIELD_CATEGORY_ID+"="+categoryId+"|"+Constants.FIELD_CLICK_CATEGORYIDS+"="+clickCount; + String value=Constants.FIELD_CATEGORY_ID+"="+categoryId+"|"+Constants.FIELD_CLICK_CATEGORY+"="+clickCount; return new Tuple2(categoryId,value); } }); @@ -799,8 +833,7 @@ public class UserVisitAnalyze { @Override public Boolean call(Tuple2 stringRowTuple2) throws Exception { Row row=stringRowTuple2._2; - Long categoryId=row.getLong(6); - if(categoryId==null) return false; + if(row.get(6)==null) return false; return true; } });