From edba0b7fa86ef9f8dfdaa6ced78449603372e228 Mon Sep 17 00:00:00 2001
From: gaosiao <1761735028@qq.com>
Date: Wed, 9 Aug 2023 11:46:33 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.idea/.name | 1 +
.idea/Flink-pyg.iml | 2 +
.idea/codeStyles/codeStyleConfig.xml | 5 +
.idea/compiler.xml | 17 +
.idea/encodings.xml | 6 +
.idea/hydra.xml | 9 +
.idea/misc.xml | 17 +
.idea/uiDesigner.xml | 124 ++
.idea/vcs.xml | 6 +
.idea/workspace.xml | 1335 ++++++++++++++
README.md | 1536 ++++++++++++++++-
batch-process/pom.xml | 15 +
canal-kafka/pom.xml | 35 +
canal-kafka/src/main/java/CanalClient.java | 212 +++
.../canal_kafka/util/GlobalConfigUtil.java | 30 +
.../henry/canal_kafka/util/KafkaSender.java | 42 +
.../src/main/resources/application.properties | 14 +
.../src/main/resources/log4j.properties | 4 +
pom.xml | 20 +
pyg.iml | 2 +
real-process/pom.xml | 163 ++
.../src/main/resources/application.conf | 26 +
.../src/main/resources/hbase-site.xml | 60 +
.../src/main/resources/log4j.properties | 296 ++++
.../scala/com/henry/realprocess/App.scala | 166 ++
.../com/henry/realprocess/bean/ClickLog.scala | 77 +
.../henry/realprocess/bean/ClickLogWide.scala | 61 +
.../com/henry/realprocess/bean/Message.scala | 12 +
.../com/henry/realprocess/task/BaseTask.scala | 80 +
.../realprocess/task/ChannelAreaTask.scala | 170 ++
.../realprocess/task/ChannelBrowserTask.scala | 130 ++
.../task/ChannelFreshnessTask.scala | 115 ++
.../task/ChannelFreshnessTaskTrait.scala | 134 ++
.../realprocess/task/ChannelNetworkTask.scala | 173 ++
.../realprocess/task/ChannelPvUvTask.scala | 108 ++
.../task/ChannelPvUvTaskMerge.scala | 105 ++
.../realprocess/task/ChannelRealHotTask.scala | 88 +
.../realprocess/task/PreprocessTask.scala | 165 ++
.../realprocess/util/GlobalConfigutil.scala | 33 +
.../henry/realprocess/util/HBaseUtil.scala | 274 +++
real-process/src/test/temp.txt | 7 +
report/pom.xml | 101 ++
.../com/henry/report/ReportApplication.java | 18 +
.../java/com/henry/report/bean/Clicklog.java | 136 ++
.../java/com/henry/report/bean/Message.java | 51 +
.../report/controller/ReportController.java | 53 +
.../report/controller/TestController.java | 22 +
.../henry/report/util/ClickLogGenerator.java | 139 ++
.../report/util/KafkaProducerConfig.java | 98 ++
.../report/util/RoundRobinPartitioner.java | 46 +
.../src/main/resources/application.properties | 18 +
.../test/java/com/henry/report/KafkaTest.java | 29 +
screenshot/036a079d.png | Bin 0 -> 259324 bytes
screenshot/03ef7ace.png | Bin 0 -> 142168 bytes
screenshot/04e25b5a.png | Bin 0 -> 56685 bytes
screenshot/07a78b77.png | Bin 0 -> 116041 bytes
screenshot/0b4d0c1b.png | Bin 0 -> 210051 bytes
screenshot/0b4ea4e1.png | Bin 0 -> 34397 bytes
screenshot/0bd763d1.png | Bin 0 -> 49761 bytes
screenshot/0ced234a.png | Bin 0 -> 92440 bytes
screenshot/0e6080a2.png | Bin 0 -> 195909 bytes
screenshot/0fcd02b7.png | Bin 0 -> 184645 bytes
screenshot/1.png | Bin 0 -> 61831 bytes
screenshot/121bf948.png | Bin 0 -> 140293 bytes
screenshot/12f712f9.png | Bin 0 -> 440704 bytes
screenshot/13c61ea9.png | Bin 0 -> 62231 bytes
screenshot/14679e84.png | Bin 0 -> 75322 bytes
screenshot/1a3addd7.png | Bin 0 -> 95039 bytes
screenshot/1d504cce.png | Bin 0 -> 140216 bytes
screenshot/2.png | Bin 0 -> 53223 bytes
screenshot/201507bb.png | Bin 0 -> 160509 bytes
screenshot/21733492.png | Bin 0 -> 50390 bytes
screenshot/2193cbd1.png | Bin 0 -> 51371 bytes
screenshot/22cd7b3c.png | Bin 0 -> 158858 bytes
screenshot/277372f9.png | Bin 0 -> 185309 bytes
screenshot/2b7f3937.png | Bin 0 -> 3134 bytes
screenshot/2c0ad8e2.png | Bin 0 -> 232947 bytes
screenshot/2d11fecd.png | Bin 0 -> 78623 bytes
screenshot/2f5a312e.png | Bin 0 -> 147458 bytes
screenshot/3.png | Bin 0 -> 71351 bytes
screenshot/3254e2ca.png | Bin 0 -> 163491 bytes
screenshot/32a6daaf.png | Bin 0 -> 292697 bytes
screenshot/342dcc3e.png | Bin 0 -> 21551 bytes
screenshot/34a79ff7.png | Bin 0 -> 49247 bytes
screenshot/34f66a92.png | Bin 0 -> 29069 bytes
screenshot/3754f480.png | Bin 0 -> 122361 bytes
screenshot/3936fce5.png | Bin 0 -> 623514 bytes
screenshot/3ab50051.png | Bin 0 -> 48575 bytes
screenshot/3b6d6d1f.png | Bin 0 -> 57983 bytes
screenshot/3c8d398c.png | Bin 0 -> 1410 bytes
screenshot/3d2cda96.png | Bin 0 -> 37389 bytes
screenshot/3f08b9d0.png | Bin 0 -> 3240 bytes
screenshot/4.png | Bin 0 -> 10211 bytes
screenshot/48cd018e.png | Bin 0 -> 44873 bytes
screenshot/4b18ecbe.png | Bin 0 -> 279165 bytes
screenshot/4cf81224.png | Bin 0 -> 112569 bytes
screenshot/520fd656.png | Bin 0 -> 56790 bytes
screenshot/5326b634.png | Bin 0 -> 22977 bytes
screenshot/54187145.png | Bin 0 -> 68919 bytes
screenshot/544d0e7a.png | Bin 0 -> 19397 bytes
screenshot/565c64ed.png | Bin 0 -> 285521 bytes
screenshot/58926ce0.png | Bin 0 -> 1754 bytes
screenshot/58945558.png | Bin 0 -> 247897 bytes
screenshot/5a321628.png | Bin 0 -> 228163 bytes
screenshot/62c03232.png | Bin 0 -> 72147 bytes
screenshot/64a0b856.png | Bin 0 -> 13792 bytes
screenshot/65e75e0f.png | Bin 0 -> 18855 bytes
screenshot/69907922.png | Bin 0 -> 52364 bytes
screenshot/6ac8e320.png | Bin 0 -> 61832 bytes
screenshot/6c04e485.png | Bin 0 -> 85337 bytes
screenshot/6c99f78b.png | Bin 0 -> 317002 bytes
screenshot/6f5af076.png | Bin 0 -> 47136 bytes
screenshot/6f897038.png | Bin 0 -> 144039 bytes
screenshot/6fcd4a44.png | Bin 0 -> 191363 bytes
screenshot/70a923ce.png | Bin 0 -> 56642 bytes
screenshot/72d64e76.png | Bin 0 -> 80341 bytes
screenshot/74d009f4.png | Bin 0 -> 310042 bytes
screenshot/75fcc253.png | Bin 0 -> 80523 bytes
screenshot/76c4fbf8.png | Bin 0 -> 63588 bytes
screenshot/79c600b1.png | Bin 0 -> 105238 bytes
screenshot/7b5e4836.png | Bin 0 -> 28940 bytes
screenshot/7cba404f.png | Bin 0 -> 1612 bytes
screenshot/7cd00637.png | Bin 0 -> 179244 bytes
screenshot/7cf4425b.png | Bin 0 -> 20789 bytes
screenshot/7fe930e0.png | Bin 0 -> 18462 bytes
screenshot/820fe570.png | Bin 0 -> 253586 bytes
screenshot/831e1859.png | Bin 0 -> 120709 bytes
screenshot/880c750d.png | Bin 0 -> 77164 bytes
screenshot/8c5fa195.png | Bin 0 -> 40126 bytes
screenshot/8cca6196.png | Bin 0 -> 19652 bytes
screenshot/8f89e666.png | Bin 0 -> 40605 bytes
screenshot/8fe964b8.png | Bin 0 -> 52424 bytes
screenshot/908989c5.png | Bin 0 -> 29438 bytes
screenshot/9379b632.png | Bin 0 -> 34724 bytes
screenshot/946fe86f.png | Bin 0 -> 18406 bytes
screenshot/9897be78.png | Bin 0 -> 80852 bytes
screenshot/98ddfe9a.png | Bin 0 -> 104397 bytes
screenshot/9e4179c5.png | Bin 0 -> 201651 bytes
screenshot/9e67979f.png | Bin 0 -> 27165 bytes
screenshot/a13d8808.png | Bin 0 -> 19010 bytes
screenshot/a2ab75e3.png | Bin 0 -> 35746 bytes
screenshot/a35893be.png | Bin 0 -> 141040 bytes
screenshot/a47efd66.png | Bin 0 -> 4642 bytes
screenshot/a560cff6.png | Bin 0 -> 406876 bytes
screenshot/a66b3e6f.png | Bin 0 -> 36212 bytes
screenshot/a8d36972.png | Bin 0 -> 114280 bytes
screenshot/aa3dbfbf.png | Bin 0 -> 55436 bytes
screenshot/abb5e847.png | Bin 0 -> 322073 bytes
screenshot/aef2abe1.png | Bin 0 -> 55575 bytes
screenshot/af73ebaa.png | Bin 0 -> 60833 bytes
screenshot/b35e8d12.png | Bin 0 -> 66020 bytes
screenshot/b77622b6.png | Bin 0 -> 108936 bytes
screenshot/c1186185.png | Bin 0 -> 32298 bytes
screenshot/c33fe1b4.png | Bin 0 -> 35577 bytes
screenshot/c6d0728b.png | Bin 0 -> 66467 bytes
screenshot/c84f6044.png | Bin 0 -> 42372 bytes
screenshot/cba7b53e.png | Bin 0 -> 22594 bytes
screenshot/cdefdf02.png | Bin 0 -> 29526 bytes
screenshot/cf67e612.png | Bin 0 -> 221516 bytes
screenshot/cfd8e121.png | Bin 0 -> 2976 bytes
screenshot/d068b5c0.png | Bin 0 -> 9842 bytes
screenshot/d1a2dc81.png | Bin 0 -> 17532 bytes
screenshot/d42bd3f1.png | Bin 0 -> 270716 bytes
screenshot/d452de1b.png | Bin 0 -> 125427 bytes
screenshot/d457be6b.png | Bin 0 -> 272863 bytes
screenshot/d57e648a.png | Bin 0 -> 118023 bytes
screenshot/d6cc806c.png | Bin 0 -> 117800 bytes
screenshot/d99a61f4.png | Bin 0 -> 97362 bytes
screenshot/d9fcfcf5.png | Bin 0 -> 10033 bytes
screenshot/dc0e0c05.png | Bin 0 -> 21098 bytes
screenshot/dc64a356.png | Bin 0 -> 46905 bytes
screenshot/dedf144c.png | Bin 0 -> 278439 bytes
screenshot/df332a64.png | Bin 0 -> 83254 bytes
screenshot/e219a541.png | Bin 0 -> 288855 bytes
screenshot/e4022013.png | Bin 0 -> 180198 bytes
screenshot/e44c5879.png | Bin 0 -> 97527 bytes
screenshot/e6130b81.png | Bin 0 -> 76215 bytes
screenshot/e61c1e01.png | Bin 0 -> 116383 bytes
screenshot/e751cb2d.png | Bin 0 -> 52290 bytes
screenshot/ea8764de.png | Bin 0 -> 99189 bytes
screenshot/ebf3c65b.png | Bin 0 -> 30601 bytes
screenshot/ec1f3fda.png | Bin 0 -> 84935 bytes
screenshot/fc27880f.png | Bin 0 -> 80164 bytes
screenshot/fe002ea4.png | Bin 0 -> 200056 bytes
screenshot/ff2dcb9b.png | Bin 0 -> 65891 bytes
sync-db/pom.xml | 175 ++
sync-db/src/main/resources/application.conf | 17 +
sync-db/src/main/resources/hbase-site.xml | 56 +
sync-db/src/main/resources/log4j.properties | 4 +
.../src/main/scala/com/henry/syncdb/App.scala | 90 +
.../scala/com/henry/syncdb/bean/Cannal.scala | 38 +
.../henry/syncdb/bean/HBaseOperation.scala | 23 +
.../henry/syncdb/task/PreprocessTask.scala | 62 +
.../com/henry/syncdb/util/FlinkUtils.scala | 76 +
.../henry/syncdb/util/GlobalConfigutil.scala | 33 +
.../com/henry/syncdb/util/HBaseUtil.scala | 274 +++
196 files changed, 7433 insertions(+), 1 deletion(-)
create mode 100644 .idea/.name
create mode 100644 .idea/Flink-pyg.iml
create mode 100644 .idea/codeStyles/codeStyleConfig.xml
create mode 100644 .idea/compiler.xml
create mode 100644 .idea/encodings.xml
create mode 100644 .idea/hydra.xml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/uiDesigner.xml
create mode 100644 .idea/vcs.xml
create mode 100644 .idea/workspace.xml
create mode 100644 batch-process/pom.xml
create mode 100644 canal-kafka/pom.xml
create mode 100644 canal-kafka/src/main/java/CanalClient.java
create mode 100644 canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java
create mode 100644 canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java
create mode 100644 canal-kafka/src/main/resources/application.properties
create mode 100644 canal-kafka/src/main/resources/log4j.properties
create mode 100644 pom.xml
create mode 100644 pyg.iml
create mode 100644 real-process/pom.xml
create mode 100644 real-process/src/main/resources/application.conf
create mode 100644 real-process/src/main/resources/hbase-site.xml
create mode 100644 real-process/src/main/resources/log4j.properties
create mode 100644 real-process/src/main/scala/com/henry/realprocess/App.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/bean/Message.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala
create mode 100644 real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala
create mode 100644 real-process/src/test/temp.txt
create mode 100644 report/pom.xml
create mode 100644 report/src/main/java/com/henry/report/ReportApplication.java
create mode 100644 report/src/main/java/com/henry/report/bean/Clicklog.java
create mode 100644 report/src/main/java/com/henry/report/bean/Message.java
create mode 100644 report/src/main/java/com/henry/report/controller/ReportController.java
create mode 100644 report/src/main/java/com/henry/report/controller/TestController.java
create mode 100644 report/src/main/java/com/henry/report/util/ClickLogGenerator.java
create mode 100644 report/src/main/java/com/henry/report/util/KafkaProducerConfig.java
create mode 100644 report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java
create mode 100644 report/src/main/resources/application.properties
create mode 100644 report/src/test/java/com/henry/report/KafkaTest.java
create mode 100644 screenshot/036a079d.png
create mode 100644 screenshot/03ef7ace.png
create mode 100644 screenshot/04e25b5a.png
create mode 100644 screenshot/07a78b77.png
create mode 100644 screenshot/0b4d0c1b.png
create mode 100644 screenshot/0b4ea4e1.png
create mode 100644 screenshot/0bd763d1.png
create mode 100644 screenshot/0ced234a.png
create mode 100644 screenshot/0e6080a2.png
create mode 100644 screenshot/0fcd02b7.png
create mode 100644 screenshot/1.png
create mode 100644 screenshot/121bf948.png
create mode 100644 screenshot/12f712f9.png
create mode 100644 screenshot/13c61ea9.png
create mode 100644 screenshot/14679e84.png
create mode 100644 screenshot/1a3addd7.png
create mode 100644 screenshot/1d504cce.png
create mode 100644 screenshot/2.png
create mode 100644 screenshot/201507bb.png
create mode 100644 screenshot/21733492.png
create mode 100644 screenshot/2193cbd1.png
create mode 100644 screenshot/22cd7b3c.png
create mode 100644 screenshot/277372f9.png
create mode 100644 screenshot/2b7f3937.png
create mode 100644 screenshot/2c0ad8e2.png
create mode 100644 screenshot/2d11fecd.png
create mode 100644 screenshot/2f5a312e.png
create mode 100644 screenshot/3.png
create mode 100644 screenshot/3254e2ca.png
create mode 100644 screenshot/32a6daaf.png
create mode 100644 screenshot/342dcc3e.png
create mode 100644 screenshot/34a79ff7.png
create mode 100644 screenshot/34f66a92.png
create mode 100644 screenshot/3754f480.png
create mode 100644 screenshot/3936fce5.png
create mode 100644 screenshot/3ab50051.png
create mode 100644 screenshot/3b6d6d1f.png
create mode 100644 screenshot/3c8d398c.png
create mode 100644 screenshot/3d2cda96.png
create mode 100644 screenshot/3f08b9d0.png
create mode 100644 screenshot/4.png
create mode 100644 screenshot/48cd018e.png
create mode 100644 screenshot/4b18ecbe.png
create mode 100644 screenshot/4cf81224.png
create mode 100644 screenshot/520fd656.png
create mode 100644 screenshot/5326b634.png
create mode 100644 screenshot/54187145.png
create mode 100644 screenshot/544d0e7a.png
create mode 100644 screenshot/565c64ed.png
create mode 100644 screenshot/58926ce0.png
create mode 100644 screenshot/58945558.png
create mode 100644 screenshot/5a321628.png
create mode 100644 screenshot/62c03232.png
create mode 100644 screenshot/64a0b856.png
create mode 100644 screenshot/65e75e0f.png
create mode 100644 screenshot/69907922.png
create mode 100644 screenshot/6ac8e320.png
create mode 100644 screenshot/6c04e485.png
create mode 100644 screenshot/6c99f78b.png
create mode 100644 screenshot/6f5af076.png
create mode 100644 screenshot/6f897038.png
create mode 100644 screenshot/6fcd4a44.png
create mode 100644 screenshot/70a923ce.png
create mode 100644 screenshot/72d64e76.png
create mode 100644 screenshot/74d009f4.png
create mode 100644 screenshot/75fcc253.png
create mode 100644 screenshot/76c4fbf8.png
create mode 100644 screenshot/79c600b1.png
create mode 100644 screenshot/7b5e4836.png
create mode 100644 screenshot/7cba404f.png
create mode 100644 screenshot/7cd00637.png
create mode 100644 screenshot/7cf4425b.png
create mode 100644 screenshot/7fe930e0.png
create mode 100644 screenshot/820fe570.png
create mode 100644 screenshot/831e1859.png
create mode 100644 screenshot/880c750d.png
create mode 100644 screenshot/8c5fa195.png
create mode 100644 screenshot/8cca6196.png
create mode 100644 screenshot/8f89e666.png
create mode 100644 screenshot/8fe964b8.png
create mode 100644 screenshot/908989c5.png
create mode 100644 screenshot/9379b632.png
create mode 100644 screenshot/946fe86f.png
create mode 100644 screenshot/9897be78.png
create mode 100644 screenshot/98ddfe9a.png
create mode 100644 screenshot/9e4179c5.png
create mode 100644 screenshot/9e67979f.png
create mode 100644 screenshot/a13d8808.png
create mode 100644 screenshot/a2ab75e3.png
create mode 100644 screenshot/a35893be.png
create mode 100644 screenshot/a47efd66.png
create mode 100644 screenshot/a560cff6.png
create mode 100644 screenshot/a66b3e6f.png
create mode 100644 screenshot/a8d36972.png
create mode 100644 screenshot/aa3dbfbf.png
create mode 100644 screenshot/abb5e847.png
create mode 100644 screenshot/aef2abe1.png
create mode 100644 screenshot/af73ebaa.png
create mode 100644 screenshot/b35e8d12.png
create mode 100644 screenshot/b77622b6.png
create mode 100644 screenshot/c1186185.png
create mode 100644 screenshot/c33fe1b4.png
create mode 100644 screenshot/c6d0728b.png
create mode 100644 screenshot/c84f6044.png
create mode 100644 screenshot/cba7b53e.png
create mode 100644 screenshot/cdefdf02.png
create mode 100644 screenshot/cf67e612.png
create mode 100644 screenshot/cfd8e121.png
create mode 100644 screenshot/d068b5c0.png
create mode 100644 screenshot/d1a2dc81.png
create mode 100644 screenshot/d42bd3f1.png
create mode 100644 screenshot/d452de1b.png
create mode 100644 screenshot/d457be6b.png
create mode 100644 screenshot/d57e648a.png
create mode 100644 screenshot/d6cc806c.png
create mode 100644 screenshot/d99a61f4.png
create mode 100644 screenshot/d9fcfcf5.png
create mode 100644 screenshot/dc0e0c05.png
create mode 100644 screenshot/dc64a356.png
create mode 100644 screenshot/dedf144c.png
create mode 100644 screenshot/df332a64.png
create mode 100644 screenshot/e219a541.png
create mode 100644 screenshot/e4022013.png
create mode 100644 screenshot/e44c5879.png
create mode 100644 screenshot/e6130b81.png
create mode 100644 screenshot/e61c1e01.png
create mode 100644 screenshot/e751cb2d.png
create mode 100644 screenshot/ea8764de.png
create mode 100644 screenshot/ebf3c65b.png
create mode 100644 screenshot/ec1f3fda.png
create mode 100644 screenshot/fc27880f.png
create mode 100644 screenshot/fe002ea4.png
create mode 100644 screenshot/ff2dcb9b.png
create mode 100644 sync-db/pom.xml
create mode 100644 sync-db/src/main/resources/application.conf
create mode 100644 sync-db/src/main/resources/hbase-site.xml
create mode 100644 sync-db/src/main/resources/log4j.properties
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/App.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/bean/Cannal.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/bean/HBaseOperation.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/task/PreprocessTask.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/util/FlinkUtils.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/util/GlobalConfigutil.scala
create mode 100644 sync-db/src/main/scala/com/henry/syncdb/util/HBaseUtil.scala
diff --git a/.idea/.name b/.idea/.name
new file mode 100644
index 0000000..5e56a6b
--- /dev/null
+++ b/.idea/.name
@@ -0,0 +1 @@
+pyg
\ No newline at end of file
diff --git a/.idea/Flink-pyg.iml b/.idea/Flink-pyg.iml
new file mode 100644
index 0000000..78b2cc5
--- /dev/null
+++ b/.idea/Flink-pyg.iml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml
new file mode 100644
index 0000000..a55e7a1
--- /dev/null
+++ b/.idea/codeStyles/codeStyleConfig.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..8f6a619
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..a677e0c
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/hydra.xml b/.idea/hydra.xml
new file mode 100644
index 0000000..fb53412
--- /dev/null
+++ b/.idea/hydra.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..3ccb27b
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000..e96534f
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+ -
+
+
+ -
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..4b75462
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,1335 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ l
+
+ network
+ hdfs://master
+ Canal
+
+
+ browser
+ Cannal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1568945125416
+
+
+ 1568945125416
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1568945373051
+
+
+
+ 1568945373051
+
+
+ 1570496232137
+
+
+
+ 1570496232137
+
+
+ 1570588388613
+
+
+
+ 1570588388613
+
+
+ 1570635213055
+
+
+
+ 1570635213055
+
+
+ 1570867392660
+
+
+
+ 1570867392660
+
+
+ 1571066045046
+
+
+
+ 1571066045046
+
+
+ 1571289458944
+
+
+
+ 1571289458944
+
+
+ 1571478418758
+
+
+
+ 1571478418758
+
+
+ 1571562410200
+
+
+
+ 1571562410200
+
+
+ 1571671569278
+
+
+
+ 1571671569278
+
+
+ 1571758416310
+
+
+
+ 1571758416310
+
+
+ 1572105489860
+
+
+
+ 1572105489860
+
+
+ 1572154037577
+
+
+
+ 1572154037578
+
+
+ 1572161325564
+
+
+
+ 1572161325565
+
+
+ 1572161356849
+
+
+
+ 1572161356849
+
+
+ 1572268127843
+
+
+
+ 1572268127843
+
+
+ 1572275154768
+
+
+
+ 1572275154768
+
+
+ 1572358283898
+
+
+
+ 1572358283898
+
+
+ 1572447370061
+
+
+
+ 1572447370061
+
+
+ 1572532502058
+
+
+
+ 1572532502058
+
+
+ 1572780331701
+
+
+
+ 1572780331702
+
+
+ 1572878031455
+
+
+
+ 1572878031456
+
+
+ 1572963643744
+
+
+
+ 1572963643744
+
+
+ 1573054140890
+
+
+
+ 1573054140891
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Spring
+
+
+
+
+
+
+
+
+
+
+
+ scala-sdk-2.11.8
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+ batch-process
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+ Maven: ch.qos.logback:logback-classic:1.1.11
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 5ab3ff1..84cf909 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,1536 @@
-# Flink-Analysis-of-Electronic-Commerce
+### 4、工程结构
+![screenshot](screenshot/1.png)
+
+### 5、上报服务系统
+![screenshot](screenshot/2.png)
+#### 5.1、Spring Boot
+上报服务系统是一个 Java Web 工程,为了快速开发 web 项目,采用 JavaWeb 最流行的 Spring Boot。
+
+Spring Boot 是一个基于 Spring 之上的快速应用快速构建框架。Spring Boot 主要解决两方面问题:
+
+- 依赖太多问题
+ - 轻量级 JavaEE 开发,需要导入大量的依赖
+ - 依赖直接还存在版本冲突问题
+- 配置太多问题
+ - 大量的 XML 配置
+
+**开发 Spring Boot 程序的基本步骤:**
+ - 导入 Spring Boot 依赖(起步依赖)
+ - 编写 `application.properties` 配置文件
+ - 编写 `Application` 入口程序
+
+#### 5.2、配置 Maven 本地仓库
+#### 5.3、导入 Maven 依赖
+[pom文件](/report/pom.xml)
+[配置文件](/resources/application.properties)
+
+#### 5.4、创建项目包结构
+
+包名 | 说明
+---|---
+`com.henry.report.controller` | 存放 Spring MVC 的controller
+`com.henry.report.bean` | 存放相关的 Java Bean 实体类
+`com.henry.report.util` | 存放相关的工具类
+
+#### 5.5、验证 Spring Boot 工程是否创建成功
+**步骤:**
+1. 创建 SpringBoot 入口程序 Application
+2. 创建`application.properties` 配置文件
+3. 编写一个简单的`Spring MVC` Controller/Handler,接收浏览器请求参数并打印回显
+4. 打开浏览器测试
+
+**实现:**
+1. 创建 SpringBoot 入口程序`ReportApplication`,用来启动 SpringBoot 程序
+ - 在类上添加注解
+ ```
+ @SpringBootApplication
+ ```
+ - 在 main 方法中添加代码,用来运行 Spring Boot 程序
+ ```
+ SpringApplication.run(ReportApplication.class);
+ ```
+2. 创建一个`TestController`
+ 在该类上要添加注解
+ ```java
+ @RestController
+ public class TestController{
+
+ }
+ ```
+3. 编写一个`test Handler`
+ 从浏览器上接收一个名为 json 的参数,并打印显示
+ ```java
+ @RequestMapping("/test")
+ public String test(String json){
+ System.out.println(json);
+ return json;
+ }
+ ```
+
+4. 编写配置文件
+ - 配置端口号
+ ```properties
+ server.port=8888
+ ```
+5. 启动 Spring Boot 程序
+6. 打开浏览器测试 Handler 是否能够接收到数据
+
+[访问连接: http://localhost:8888/test?json=666](http://localhost:8888/test?json=666)
+
+访问结果:
+![接收显示](screenshot/3.png)
+
+---
+
+#### 5.6、安装 Kafka-Manager
+
+Kafka-Manager 是 Yahool 开源的一款 Kafka 监控管理工具。
+
+**安装步骤:**
+
+1. 下载安装包 [Kafka-Manager下载地址](https://github.com/yahoo/kafka-manager/releases)
+
+2. 解压到 `/usr/local/src/` 下
+ 只需要在一台机器装就可以
+ ```bash
+ tar -zxvf kafka-manager-1.3.3.7.tar.gz
+ ```
+ 需要编译
+ ```bash
+ cd kafka-manager-1.3.3.7
+ ./sbt clean dist
+ ```
+3. 修改 `conf/application.conf`
+ ```bash
+ kafka-manager.zkhosts="master:2181,slave1:2181,slave2:2181"
+ ```
+
+4. 启动 zookeeper
+ ```bash
+ zkServer.sh start
+ ```
+5. 启动 kafka
+ ```bash
+ ./kafka-server-start.sh ../config/server.properties > /dev/null 2>&1 &
+ ```
+ ![screenshot](screenshot/4.png)
+
+
+6. 启动 kafka-manager
+ ```bash
+ cd /usr/local/src/kafka-manager-1.3.3.17/bin
+ nohup ./kafka-manager 2>&1 & # 默认启动9000 端口
+ nohup ./kafka-manager 2>&1 -Dhttp.port=9900 & # 指定端口
+ ```
+ ![](screenshot/dc0e0c05.png)
+
+ 页面显示:
+
+ ![](screenshot/a66b3e6f.png)
+
+
+---
+
+#### 5.7、编写 Kafka 生产者配置工具类
+
+由于项目需要操作 Kafka,所以需要先构建出 KafkaTemplate,这是一个 Kafka 的模板对象,通过它可以很方便的发送消息到 Kafka。
+
+**开发步骤**
+
+1. 编写 Kafka 生产者配置
+2. 编写 Kafka 生产者 SpringBoot 配置工具类 `KafkaProducerConfig`,构建 `KafkaTemplate`
+
+**实现**
+
+1. 导入 Kafka 生产者配置文件
+ 将下面的代码拷贝到`application.properties`中
+
+ ```bash
+ #
+ # Kafka
+ #
+ #============编写kafka的配置文件(生产者)===============
+ # kafka的服务器地址
+ kafka.bootstrap_servers_config=master:9092,slave1:9092,slave2:9092
+ # 如果出现发送失败的情况,允许重试的次数
+ kafka.retries_config=0
+ # 每个批次发送多大的数据
+ kafka.batch_size=4096
+ # 定时发送,达到 1ms 发送
+ kafka.linger_ms_config=1
+ # 缓存的大小
+ kafka.buffer_memory_config=40960
+ # TOPOC 名字
+ kafka.topic=pyg
+ ```
+
+2. 编写 `kafkaTemplate`
+ ```java
+ @Bean // 2、表示该对象是受 Spring 管理的一个 Bean
+ public KafkaTemplate kafkaTemplate() {
+
+ // 构建工程需要的配置
+ Map configs = new HashMap<>();
+
+ // 3、设置相应的配置
+ // 将成员变量的值设置到Map中,在创建kafka_producer中用到
+ configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers_config);
+ configs.put(ProducerConfig.RETRIES_CONFIG, retries_config);
+ configs.put(ProducerConfig.BATCH_SIZE_CONFIG, batch_size_config);
+ configs.put(ProducerConfig.LINGER_MS_CONFIG, linger_ms_config);
+ configs.put(ProducerConfig.BUFFER_MEMORY_CONFIG, buffer_memory_config);
+
+
+ // 4、创建生产者工厂
+ ProducerFactory producerFactory = new DefaultKafkaProducerFactory(configs);
+
+ // 5、再把工厂传递给Template构造方法
+ // 表示需要返回一个 kafkaTemplate 对象
+ return new KafkaTemplate(producerFactory);
+ }
+ ```
+
+3. 在`test`测试源码中创建一个Junit测试用例
+ - 整合 Spring Boot Test
+ - 注入`kafkaTemplate`
+ - 测试发送100条消息到`test` Topic
+ ```java
+ @RunWith(SpringRunner.class)
+ @SpringBootTest
+ public class KafkaTest {
+
+ @Autowired
+ KafkaTemplate kafkaTemplate;
+
+ @Test
+ public void sendMsg(){
+ for (int i = 0; i < 100; i++)
+ kafkaTemplate.send("test", "key","this is test msg") ;
+ }
+
+ }
+ ```
+
+4. 在KafkaManager创建`test` topic,三个分区,两个副本
+创建连接kafka集群
+![](screenshot/544d0e7a.png)
+![](screenshot/0b4ea4e1.png)
+![](screenshot/cba7b53e.png)
+创建连接成功
+![](screenshot/7cf4425b.png)
+![](screenshot/a2ab75e3.png)
+创建topic
+![](screenshot/5326b634.png)
+![](screenshot/8f89e666.png)
+创建topic成功
+![](screenshot/13c61ea9.png)
+
+
+5. 启动`kafka-conslole-consumer`
+ ```bash
+ /usr/local/src/kafka_2.11-1.1.0/bin
+ ./kafka-console-consumer.sh --zookeeper master:2181 --from-beginning --topic test
+ ```
+ 运行 test 程序,报错如下:
+ ![](screenshot/abb5e847.png)
+ 添加序列化器代码:
+ ```java
+ // 设置 key、value 的序列化器
+ configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
+ configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
+ ```
+
+
+6. 打开kafka-manager的consumer监控页面,查看对应的`logsize`参数,消息是否均匀的分布在不同的分区中
+ 添加序列化器后重新运行,消费者终端打印消息:
+ ![](screenshot/7fe930e0.png)
+ 打开页面管理:
+ ![](screenshot/2b7f3937.png)
+ 消息全落在了一个分区上,这样会影响kafka性能
+ ![](screenshot/6ac8e320.png)
+ 解决的最简单的方法:将 "key" 去掉
+ ```java
+ @Test
+ public void sendMsg(){
+ for (int i = 0; i < 100; i++)
+ kafkaTemplate.send("test","this is test msg") ;
+ // kafkaTemplate.send("test", "key","this is test msg") ;
+ }
+ ```
+
+
+#### 5.8、自定义分区的实现
+
+ ```java
+ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+ // 获取分区的数量
+ Integer partitions = cluster.partitionCountForTopic(topic) ;
+ int curpartition = counter.incrementAndGet() % partitions ; // 当前轮询的 partition 号
+ if(counter.get() > 65535){
+ counter.set(0);
+ }
+ return curpartition;
+ }
+ ```
+ ![](screenshot/8fe964b8.png)
+
+
+#### 5.9、上报服务开发
+ 上报服务系统要能够接收 http 请求,并将 http 请求中的数据写入到 kafka 中。
+ ![](screenshot/64a0b856.png)
+
+ 步骤:
+ 1. 创建`Message`实体类对象
+ 所有的点击流消息都会封装带 Message 实体类中
+ 2. 设计一个 Controller 来接收 http 请求
+ 3. 将 http 请求发送的消息封装到一个`Message`实体类对象
+ 4. 使用`FastJSON`将`Message`实体类对象转换为JSON字符串
+ 5. 将JSON字符串使用`KafkaTemplate`写入到`kafka`
+ 6. 返回给客户端一个写入结果JSON字符串
+
+ #### 5.10、模拟生产点击流日志消息到Kafka
+ 为了方便调试,可以使用一个消息生成工具来生产点击流日志,然后发送个上报服务系统。该消息生成工具可以一次性生产100条
+ Clicklog 信息,并转换成 JSON ,通过 HTTPClient 把消息内容发送到编写的 ReportController 上。
+ ![](screenshot/3ab50051.png)
+
+ 步骤:
+ 1. 导入 ClickLog 实体类(ClickLog.java)
+ 2. 导入点击流日志生成器(ClickLogGenerator.java)
+ 3. 创建 Kafka 的 Topic(pyg)
+ 4. 使用 `kafka-console-sonsumer.sh` 消费 topic 中的数据
+ 5. 启动上报服务
+ 6. 执行 `ClickLogGenerator`的main方法,生成 100 条用户浏览数据消息发送到 Kafka
+
+ 实现:
+ 1. 创建 kafka topic
+ ```jshelllanguage
+ ./kafka-topics.sh --create --zookeeper master:2181 --replication-factor 2 --partitions 3 --topic pyg
+```
+ 2. 启动消费者
+ ```jshelllanguage
+ ./kafka-console-consumer.sh --zookeeper master:2181 --from-beginning --topic pyg
+```
+
+ 运行消息模拟器,运行结果如下(此时,上报服务也是在运行中):
+ ![](screenshot/565c64ed.png)
+
+
+### 6、Flink 实时数据分析系统开发
+ 前边已经开发完成了`上报服务系统`,可以通过上报服务系统把电商页面中的点击流数据发送到 Kafka 中。那么,接下来就是开发
+ `Flink 实时分析系统`,通过流的方式读取 kafka 中的消息,进而分析数据。
+ ![](screenshot/98ddfe9a.png)
+
+ **业务**
+ - 实时分析频道热点
+ - 实时分析频道PV/UV
+ - 实时分析频道新鲜度
+ - 实时分析频道地域分布
+ - 实时分析运营商平台
+ - 实时分析浏览器类型
+
+
+ **技术**
+ - Flink 实时处理算子
+ - 使用`CheckPoint`和`水印`解决Flink生产中遇到的问题(网络延迟、丢数据)
+ - Flink整合Kafka
+ - Flink整合HBase
+
+#### 6.1、搭建 Flink 实时数据分析系统 环境
+
+##### 6.1.1 导入Maven项目依赖
+
+##### 6.1.2 创建项目包结构
+包名 | 说明
+---|---
+`com.henry.realprocess.util` | 存放存放相关的工具类
+`com.henry.realprocess.bean` | 存放相关的实体类
+`com.henry.realprocess.task` | 存放具体的分析任务,每一个业务都是一个任务,对应的分析处理都写在这里
+
+
+##### 6.1.3 导入实时系统Kafka/Hbase配置
+
+1. 将`application.conf`导入到`resources`目录
+2. 将`log4j.properties`导入到`resources`目录
+
+> 注意修改`kafka服务器` 和`hbase服务器` 的机器名称
+
+##### 6.1.4 获取配置文件API介绍
+`ConfigFactory.load()`介绍
+![](screenshot/d6cc806c.png)
+
+常用 API
+![](screenshot/df332a64.png)
+
+##### 6.1.5 编写 Scala 读取代码配置工具类
+`com.henry.realprocess.util.GlobalConfigutil`
+
+#### 6.2 初始化Flink流式计算环境
+`com.henry.realprocess.App`
+
+
+#### 6.3 Flink添加checkPoint容错支持
+
+![](screenshot/75fcc253.png)
+增量更新的,不会因为创建了很多状态的快照,导致快照数据很庞大,存储到HDFS中。
+
+**实现**:
+
+1. 在Flink流式处理环境中,添加一下`checkpoint`的支持,确保Flink的高容错性,数据不丢失。
+![](screenshot/2193cbd1.png)
+
+
+#### 6.4 Flink整合Kafka
+##### 6.4.1 Flink读取Kafka数据
+![](screenshot/54187145.png)
+
+**实现**:
+
+1、启动上报服务系统 `ReportApplication`
+2、启动kafka
+3、启动kafka消息生成模拟器
+4、启动App.scala
+
+消息生成模拟器发送的消息:
+![](screenshot/831e1859.png)
+App实时分析系统接收到的消息:
+![](screenshot/5a321628.png)
+消息者接收到的消息:
+![](screenshot/b77622b6.png)
+
+
+##### 6.4.2 Kafka消息解析为元组
+
+步骤:
+- 使用map算子,遍历kafka中消费到的数据
+- 使用FastJSON转换为JSON对象
+- 将JSON的数据解析为一个元组
+
+代码:
+1. 使用map算子,将Kafka中消费到的数据,使用FastJSON转换为JSON对象
+2. 将JSON的数据解析为一个元组
+3. 打印经过map映射后的元组数据,测试能否正确解析
+App实时分析系统接收到的消息(Tuple类型):
+![](screenshot/6f897038.png)
+![](screenshot/14679e84.png)
+
+
+##### 6.4.3 Flink封装点击流消息为样例类
+
+**步骤**:
+1. 创建一个`ClikLog`样例类来封装消息
+2. 使用map算子将数据封装到`ClickLog`样例类
+
+
+**代码**:
+1. 在bean包中,创建`ClikLog`样例类,添加以下字段
+ - 频道ID(channelID)
+ - 产品类别ID(categoryID)
+ - 产品ID(produceID)
+ - 国家(country)
+ - 省份(province)
+ - 城市(city)
+ - 网络方式(network)
+ - 来源方式(source)
+ - 来源方式(browserType)
+ - 进入网站时间(entryTime)
+ - 离开网站时间(leaveTime)
+ - 用户ID(userID)
+2. 在`ClikLog`半生对象中实现`apply`方法
+3. 使用FastJSON的`JSON.parseObject`方法将JSON字符串构建一个`ClikLog`实例对象
+4. 使用map算子将数据封装到`ClikLog`样例类
+5. 在样例类中编写一个main方法,传入一些JSON字符串测试是否能够正确解析
+6. 重新运行Flink程序,测试数据是否能够完成封装
+
+App实时分析系统接收到的消息(样例类):
+![](screenshot/d452de1b.png)
+
+
+##### 6.4.4 封装KafKa消息为Message样例类
+
+
+**步骤**:
+1. 创建一个`Message`样例类,将ClickLog、时间戳、数量封装
+2. 将Kafka中的数据整个封装到`Message`类中
+3. 运行Flink测试
+
+App实时分析系统接收到的消息(Message样例类):
+![](screenshot/0fcd02b7.png)
+
+#### 6.5 Flink添加水印支持
+![](screenshot/e751cb2d.png)
+![](screenshot/c6d0728b.png)
+![](screenshot/9e4179c5.png)
+![](screenshot/72d64e76.png)
+
+
+
+### 7、HBaseUtil 工具类开发
+#### 7.1、HBase工具类介绍
+前面实现了Flink整合Kafka,可以从Kafka中获取数据进行分析,分析之后,把结果存入HBase
+中,为了方便,提前编写一个操作HBase工具类。HBase作为一个数据库面肯定需要进行数据的增删改差,
+那么就需要围绕这几个进行开发。
+![](screenshot/c84f6044.png)
+
+##### 7.1.1、API介绍
+![](screenshot/d1a2dc81.png)
+![](screenshot/d457be6b.png)
+
+**HBase操作基本类**
+![](screenshot/2f5a312e.png)
+
+
+##### 7.1.2、获取表
+代码实现添加HBase配置信息
+```scala
+val conf:Configuration = HBaseConfiguration.create()
+```
+![](screenshot/0ced234a.png)
+
+![](screenshot/e4022013.png)
+
+不存在表时,则创建表
+![](screenshot/908989c5.png)
+创建后:
+![](screenshot/69907922.png)
+![](screenshot/8cca6196.png)
+
+
+##### 7.1.3、存储数据
+创建`putData`方法
+- 调用 getTable获取表
+- 构建`put`对象
+- 添加列、列值
+- 对 table 进行 put 操作
+- 启动编写 main 进行测试
+
+![](screenshot/af73ebaa.png)
+
+
+##### 7.1.4、获取数据
+1、 使用Connection 获取表
+2、 创建 getData 方法
+ - 调用 getTable 获取表
+ - 构建 get 对象
+ - 对 table 执行 get 操作,获取 result
+ - 使用 Result.getValue 获取列族列对应的值
+ - 捕获异常
+ - 关闭表
+
+
+##### 7.1.5、批量存储数据
+创建 putMapData 方法
+ - 调用 getTable 获取表
+ - 构建 get 对象
+ - 添加 Map 中的列、列值
+ - 对 table 执行 put 操作
+ - 捕获异常
+ - 关闭表
+![](screenshot/ea8764de.png)
+
+
+##### 7.1.6、批量获取数据
+创建 getMapData 方法
+ - 调用 getTable 获取表
+ - 构建 get 对象
+ - 根据 get 对象查询表
+ - 构建可变 Map
+ - 遍历查询各个列的列值
+ - 过滤掉不符合的结果
+ - 把结果转换为 Map 返回
+ - 捕获异常
+ - 关闭表
+ - 启动编写 main 进行测试
+![](screenshot/a35893be.png)
+
+
+##### 7.1.7、删除数据
+创建 deleteData 方法
+ - 调用 getTable 获取表
+ - 构建 Delete 对象
+ - 对 table 执行 delete 操作
+ - 捕获异常
+ - 关闭表
+ - 启动编写 main 进行测试
+ ![](screenshot/d99a61f4.png)
+ ![](screenshot/d068b5c0.png)
+
+
+
+
+
+ ### 8、实时数据分析业务目标
+ ![](screenshot/520fd656.png)
+
+
+ ### 9、业务开发一般流程
+![](screenshot/79c600b1.png)
+
+**一般流程**
+![](screenshot/e6130b81.png)
+
+
+ ### 10、点击流日志实时数据预处理
+ #### 10.1、业务分析
+ 为了方便后续分析,需要对点击流日志,使用 Flink 进行实时预处理。在原有点击流日志的基础上添加
+ 一些字段,方便进行后续业务功能的统计开发。
+
+ 以下为 kafka 中消费得到的原始点击流日志字段:
+![](screenshot/e61c1e01.png)
+
+需要在原有点击流日志字段基础上,再添加以下字段:
+![](screenshot/201507bb.png)
+
+不能直接从点击流日志中,直接计算得到上述后4个字段的值。而是需要在 hbase 中有一个 **历史记录表**
+,来保存用户的历史访问状态才能计算得到。
+**历史记录表** 表结构:
+![](screenshot/76c4fbf8.png)
+
+
+ #### 10.2、创建 ClickLogWide 样例类
+
+ 使用 ClickLogWide 样例类来保存拓宽后的点击流日志数据。直接**复制**原有的`ClickLog`样例类,
+ 然后给它额外加上下列额外的字段;
+
+ **步骤*
+![](screenshot/0b4d0c1b.png)
+![](screenshot/0e6080a2.png)
+
+
+#### 10.3、预处理:isNew字段处理
+isNew 字段是判断某个`用户ID`,是否已经访问过`某个频道`。
+
+**实现思路**
+![](screenshot/1d504cce.png)
+
+
+user_history 表的列
+- 用户ID:频道ID(rowkey)
+- 用户ID(userID)
+- 频道ID(channelid)
+- 最后访问时间(时间戳)(lastVisitedTime)
+
+![](screenshot/a560cff6.png)
+
+
+
+### 11、实时频道热点分析业务开发
+#### 11.1、业务介绍
+频道热点,就是要统计频道访问(点击)的数量。
+分析得到以下的数据:
+![](screenshot/cdefdf02.png)
+
+> 需要将历史的点击数据进行累加
+
+![](screenshot/fc27880f.png)
+
+其中, 第一步预处理已经完成。
+
+```scala
+ // 转换
+ ChannelRealHotTask.process(clickLogWideDateStream).print()
+```
+![](screenshot/b35e8d12.png)
+
+
+
+落地 HBase
+```scala
+ // 落地 HBase
+ ChannelRealHotTask.process(clickLogWideDateStream)
+```
+```
+ hbase shell
+ scan 'channel'
+```
+![](screenshot/3254e2ca.png)
+
+
+
+
+
+### 6、实时频道PV/UV分析
+针对频道的PV、UV进行不同维度的分析,有以下三个维度:
+- 小时
+- 天
+- 月
+
+#### 6.1、业务介绍
+PV(访问量)
+即Page View,页面刷新一次计算一次
+
+UV(独立访客)
+即Unique Visitor,指定时间内相同的客户端只被计算一次
+
+统计分析后得到的数据如下所示:
+![](screenshot/6f5af076.png)
+
+
+#### 6.2、小时维度PV/UV
+```scala
+ // 落地 HBase
+ ChannelPvUvTask.process(clickLogWideDateStream)
+```
+```
+ hbase shell
+ scan 'channel_pvuv'
+```
+![](screenshot/cf67e612.png)
+
+
+
+
+#### 6.3、天维度PV/UV业务开发
+
+按天的维度来统计 PV、UV 与按小时维度类似,就是分组字段不一样。可以直接复制按小时维度的
+ PV/UV ,然后修改就可以。
+
+
+
+#### 6.4、小时/天/月维度PV/UV业务开发
+
+将按**小时**、**天**、**月** 三个时间维度的数据放在一起来进行分组。
+
+**思路**
+![](screenshot/aa3dbfbf.png)
+
+![](screenshot/fe002ea4.png)
+
+```scala
+ // 落地 HBase
+ ChannelPvUvTaskMerge.process(clickLogWideDateStream)
+```
+```
+ hbase shell
+ scan 'channel_pvuv'
+```
+![](screenshot/12f712f9.png)
+
+
+
+
+### 7、实时频道用户新鲜度分析
+
+#### 7.1、业务介绍
+
+用户新鲜度,即分析网站每个小时、每天、每月活跃用户的新老用户占比
+
+可以通过新鲜度;
+- 从宏观层面上了解每天的新老用户比例以及来源结构
+- 当天新增用户与当天`推广行为`是否相关
+
+统计分析要得到的数据如下:
+![](screenshot/0bd763d1.png)
+
+
+![](screenshot/6c99f78b.png)
+
+
+
+## Day 04
+### 1、模板方法提取公共类
+
+**模板方法:**
+模板方法模式是在父类中定义算法的骨架,把具体实现到子类中去,可以在不改变一个算法的结构时
+可重定义该算法的某些步骤。
+
+前面我们已经编写了三个业务的分析代码,代码结构都是分为5个部分,非常的相似。针对这样
+的代码,我们可以进行优化,提取模板类,让所有的任务类都按照模板的顺序去执行。
+
+![](screenshot/277372f9.png)
+
+
+继承父类方法:
+```scala
+ChannelFreshnessTaskTrait.scala
+```
+![](screenshot/3d2cda96.png)
+
+
+```scala
+// 重构模板方法
+ ChannelFreshnessTaskTrait.process(clickLogWideDateStream)
+```
+
+![](screenshot/32a6daaf.png)
+
+
+
+### 2、实时频道低于分析业务开发
+
+#### 2.1、业务介绍
+通过地域分析,可以帮助查看地域相关的PV/UV、用户新鲜度。
+
+
+需要分析出来指标
+- PV
+- UV
+- 新用户
+- 老用户
+
+统计分析后的结果如下:
+![](screenshot/2d11fecd.png)
+
+
+#### 2.2、 业务开发
+
+**步骤**
+1. 创建频道地域分析样例类(频道、地域(国省市)、时间、PV、UV、新用户、老用户)
+2. 将预处理后的数据,使用 flatMap 转换为样例类
+3. 按照 频道 、 时间 、 地域 进行分组(分流)
+4. 划分时间窗口(3秒一个窗口)
+5. 进行合并计数统计
+6. 打印测试
+7. 将计算后的数据下沉到Hbase
+
+
+**实现**
+1. 创建一个 ChannelAreaTask 单例对象
+2. 添加一个 ChannelArea 样例类,它封装要统计的四个业务字段:频道ID(channelID)、地域(area)、日期
+(date)pv、uv、新用户(newCount)、老用户(oldCount)
+3. 在 ChannelAreaTask 中编写一个 process 方法,接收预处理后的 DataStream
+4. 使用 flatMap 算子,将 ClickLog 对象转换为三个不同时间维度 ChannelArea
+5. 按照 频道ID 、 时间 、 地域 进行分流
+6. 划分时间窗口(3秒一个窗口)
+7. 执行reduce合并计算
+8. 打印测试
+9. 将合并后的数据下沉到hbase
+ - 准备hbase的表名、列族名、rowkey名、列名
+ - 判断hbase中是否已经存在结果记录
+ - 若存在,则获取后进行累加
+ - 若不存在,则直接写入
+
+
+`ChannelAreaTask` 测试
+![](screenshot/e219a541.png)
+
+
+### 3、 实时运营商分析业务开发
+#### 3.1、 业务介绍
+根据运营商来统计相关的指标。分析出流量的主要来源是哪个运营商的,这样就可以进行较准确的网络推广。
+
+**需要分析出来指标**
+- PV
+- UV
+- 新用户
+- 老用户
+
+**需要分析的维度**
+- 运营商
+- 时间维度(时、天、月)
+
+统计分析后的结果如下:
+![](screenshot/ff2dcb9b.png)
+
+
+#### 3.2、 业务开发
+
+**步骤**
+1. 将预处理后的数据,转换为要分析出来数据(频道、运营商、时间、PV、UV、新用户、老用户)样例类
+2. 按照 频道 、 时间 、 运营商 进行分组(分流)
+3. 划分时间窗口(3秒一个窗口)
+4. 进行合并计数统计
+5. 打印测试
+6. 将计算后的数据下沉到Hbase
+
+
+**实现**
+1. 创建一个 ChannelNetworkTask 单例对象
+2. 添加一个 ChannelNetwork 样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商
+(network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
+3. 在 ChannelNetworkTask 中编写一个 process 方法,接收预处理后的 DataStream
+4. 使用 flatMap 算子,将 ClickLog 对象转换为三个不同时间维度 ChannelNetwork
+5. 按照 频道ID 、 时间 、 运营商 进行分流
+6. 划分时间窗口(3秒一个窗口)
+7. 执行reduce合并计算
+8. 打印测试
+9. 将合并后的数据下沉到hbase
+ - 准备hbase的表名、列族名、rowkey名、列名
+ - 判断hbase中是否已经存在结果记录
+ - 若存在,则获取后进行累加
+ - 若不存在,则直接写入
+
+ `ChannelNetworkTask` 测试
+报错:
+![](screenshot/3936fce5.png)
+
+```scala
+// 错误代码:
+// totalPv
+if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap(pvColName))) {
+ totalPv = resultMap(pvColName).toLong + network.pv
+}
+else {
+ totalPv = network.pv
+}
+...
+
+// 正确代码: 即列空的时候写入一个 "" 空字符串
+// totalPv
+if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(pvColName,""))) {
+ totalPv = resultMap(pvColName).toLong + network.pv
+}
+else {
+ totalPv = network.pv
+}
+
+```
+![](screenshot/2c0ad8e2.png)
+
+
+
+### 4、 实时频道浏览器分析业务开发
+
+#### 4.1、 业务介绍
+
+需要分别统计不同浏览器(或者客户端)的占比
+**需要分析出来指标**
+- PV
+- UV
+- 新用户
+- 老用户
+
+**需要分析的维度**
+- 浏览器
+- 时间维度(时、天、月)
+
+统计分析后的结果如下:
+![](screenshot/3b6d6d1f.png)
+
+
+#### 4.2、 业务开发
+
+**步骤**
+1. 创建频道浏览器分析样例类(频道、浏览器、时间、PV、UV、新用户、老用户)
+2. 将预处理后的数据,使用 flatMap 转换为要分析出来数据样例类
+3. 按照 频道 、 时间 、 浏览器 进行分组(分流)
+4. 划分时间窗口(3秒一个窗口)
+5. 进行合并计数统计
+6. 打印测试
+7. 将计算后的数据下沉到Hbase
+
+**实现**
+1. 创建一个 ChannelBrowserTask 单例对象
+2. 添加一个 ChannelBrowser 样例类,它封装要统计的四个业务字段:频道ID(channelID)、浏览器
+(browser)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
+3. 在 ChannelBrowserTask 中编写一个 process 方法,接收预处理后的 DataStream
+4. 使用 flatMap 算子,将 ClickLog 对象转换为三个不同时间维度 ChannelBrowser
+5. 按照 频道ID 、 时间 、 浏览器 进行分流
+6. 划分时间窗口(3秒一个窗口)
+7. 执行reduce合并计算
+8. 打印测试
+9. 将合并后的数据下沉到hbase
+ - 准备hbase的表名、列族名、rowkey名、列名
+ - 判断hbase中是否已经存在结果记录
+ - 若存在,则获取后进行累加
+ - 若不存在,则直接写入
+
+
+对重复使用的代码整合到`BaseTask`中进行重构
+
+// ChannelBrowserTask 测试
+`ChannelBrowserTask.process(clickLogWideDateStream)`
+
+![](screenshot/58945558.png)
+
+
+
+
+5.2.4、 canal 解决方案三
+
+![](screenshot/65e75e0f.png)
+
+1. 通过 canal 来解析mysql中的 binlog 日志来获取数据
+2. 不需要使用sql 查询mysql,不会增加mysql的压力
+
+> binlog : mysql 的日志文件,手动开启,二进制文件,增删改命令
+
+1. 通过 canal 来解析mysql中的 binlog 日志来获取数据
+2. 不需要使用sql 查询mysql,不会增加mysql的压力
+
+
+> MySQL 的主从复制,因为 canal 为伪装成 MySQL 的一个从节点,
+这样才能获取到 bilog 文件
+
+
+
+### 6、Canal数据采集平台
+
+接下来我们去搭建Canal的数据采集平台,它是去操作Canal获取MySql的binlog文件,解析之后再把数据存入到Kafka
+中。
+
+![](screenshot/62c03232.png)
+
+
+**学习顺序:**
+- 安装MySql
+- 开启binlog
+- 安装canal
+- 搭建采集系统
+
+
+#### 6.1、 MySql安装
+
+
+#### 6.2、 MySql创建测试表
+
+
+**步骤**
+1. 创建 pyg 数据库
+2. 创建数据库表
+
+**实现**
+推荐使用 sqlyog 来创建数据库、创建表
+1. 创建 pyg 数据库
+2. 将 资料\mysql脚本\ 下的 创建表.sql 贴入到sqlyog中执行,创建数据库表
+
+
+#### 6.3、 binlog 日志介绍
+
+
+- 用来记录mysql中的 增加 、 删除 、 修改 操作
+- select操作 不会 保存到binlog中
+- 必须要 打开 mysql中的binlog功能,才会生成binlog日志
+- binlog日志就是一系列的二进制文件
+
+```
+-rw-rw---- 1 mysql mysql 669 11⽉11日 10 21:29 mysql-bin.000001
+-rw-rw---- 1 mysql mysql 126 11⽉11日 10 22:06 mysql-bin.000002
+-rw-rw---- 1 mysql mysql 11799 11⽉11日 15 18:17 mysql-bin.00000
+
+```
+
+#### 6.4、 开启 binlog
+步骤
+1. 修改mysql配置文件,添加binlog支持
+2. 重启mysql,查看binlog是否配置成功
+实现
+1. 使用vi打开 /etc/my.cnf
+2. 添加以下配置
+
+```
+[mysqld]
+log-bin=/var/lib/mysql/mysql-bin
+binlog-format=ROW
+server_id=1
+```
+
+
+> 注释说明
+> 配置binlog日志的存放路径为/var/lib/mysql目录,文件以mysql-bin开头 log-bin=/var/lib/mysql/mysql-bin
+> 配置mysql中每一行记录的变化都会详细记录下来 binlog-format=ROW
+> 配置当前机器器的服务ID(如果是mysql集群,不能重复) server_id=1
+
+
+3. 重启mysql
+`service mysqld restart`
+或
+`systemctl restart mysqld.service`
+
+4. mysql -u root -p 登录到mysql,执行以下命令
+`show variables like '%log_bin%';`
+
+
+5. mysql输出以下内容,表示binlog已经成功开启
+![](screenshot/48cd018e.png)
+
+
+6. 进入到 /var/lib/mysql 可以查看到mysql-bin.000001文件已经生成
+![](screenshot/e44c5879.png)
+
+
+
+
+### 6.5. 安装Canal
+
+#### 6.5.1. Canal介绍
+- canal是 阿里巴巴 的一个使用Java开发的开源项目
+- 它是专门用来进行 数据库同步 的
+- 目前支持 mysql 、以及(mariaDB)
+
+> MariaDB数据库管理系统是MySQL的一个分支,主要由开源社区在维护,采用GPL授权许可 MariaDB的目的是完
+ 全兼容MySQL,包括API和命令行,使之能轻松成为MySQL的代替品。
+
+
+#### 6.5.2. MySql主从复制原理
+mysql主从复制用途
+- 实时灾备,用于故障切换
+- 读写分离,提供查询服务
+- 备份,避免影响业务
+
+主从形式
+- 一主一从
+- 一主多从--扩展系统读取性能
+> 一主一从和一主多从是最常见的主从架构,实施起来简单并且有效,不仅可以实现HA,而且还能读写分离,进而提升集群
+ 的并发能力。
+- 多主一从--5.7开始支持
+> 多主一从可以将多个mysql数据库备份到一台存储性能比较好的服务器上。
+- 主主复制
+> 双主复制,也就是互做主从复制,每个master既是master,又是另外一台服务器的slave。这样任何一方所做的变更,
+ 都会通过复制应用到另外一方的数据库中。
+- 联级复制
+> 级联复制模式下,部分slave的数据同步不连接主节点,而是连接从节点。因为如果主节点有太多的从节点,就会损耗一
+ 部分性能用于replication,那么我们可以让3~5个从节点连接主节点,其它从节点作为二级或者三级与从节点连接,这
+ 样不仅可以缓解主节点的压力,并且对数据一致性没有负面影响。
+
+
+![](screenshot/a8d36972.png)
+
+
+主从部署必要条件:
+- 主库开启 binlog 日志
+- 主从 server-id 不同
+- 从库服务器能连通主库
+
+主从复制原理图:
+![](screenshot/7cd00637.png)
+
+1. master 将改变记录到二进制日志( binary log )中(这些记录叫做二进制日志事件, binary log
+events ,可以通过 show binlog events 进行查看);
+2. slave 的I/O线程去请求主库的binlog,拷贝到它的中继日志( relay log );
+3. master 会生成一个 log dump 线程,用来给从库I/O线程传输binlog;
+4. slave重做中继日志中的事件,将改变反映它自己的数据。
+
+
+#### 6.5.3. Canal原理
+![](screenshot/a13d8808.png)
+
+
+1. Canal模拟mysql slave的交互协议,伪装自己为mysql slave
+2. 向mysql master发送dump协议
+3. mysql master收到dump协议,发送binary log给slave(canal)
+4. canal解析binary log字节流对象
+
+
+#### 6.5.4. Canal架构设计
+![](screenshot/946fe86f.png)
+
+说明:
+- server 代表一个canal运行实例,对应于一个jvm
+- instance 对应于一个数据队列 (1个server对应1..n个instance)
+
+
+instance模块(一个数据队列):
+- eventParser (数据源接入,模拟slave协议和master进行交互,协议解析)
+- eventSink (Parser和Store链接器,进行数据过滤,加工,分发的工作)
+- eventStore (数据存储)
+- metaManager (增量订阅[读完之后,之前读过的就不再读了]&消费信息管理器)
+
+
+EventParser
+![](screenshot/c33fe1b4.png)
+
+整个parser过程大致可分为六步:
+1. Connection获取上一次解析成功的位置
+2. Connection建立连接,发送BINLOG_DUMP命令
+3. Mysql开始推送Binary Log
+4. 接收到的Binary Log通过Binlog parser进行协议解析,补充一些特定信息
+5. 传递给EventSink模块进行数据存储,是一个阻塞操作,直到存储成功
+6. 存储成功后,定时记录Binary Log位置
+
+
+EventSink设计
+![](screenshot/ebf3c65b.png)
+
+说明:
+- 数据过滤:支持通配符的过滤模式,表名,字段内容等
+- 数据路由/分发:解决1:n (1个parser对应多个store的模式)
+- 数据归并:解决n:1 (多个parser对应1个store)
+- 数据加工:在进入store之前进行额外的处理,比如join
+
+
+EventStore设计
+目前实现了Memory内存、本地file存储以及持久化到zookeeper以保障数据集群共享。 Memory内存的RingBuwer设
+计:
+![](screenshot/9e67979f.png)
+
+
+定义了3个cursor
+- Put : Sink模块进行数据存储的最后一次写入位置
+- Get : 数据订阅获取的最后一次提取位置
+- Ack : 数据消费成功的最后一次消费位置
+
+
+ #### 6.5.5. 安装Canal
+
+**步骤**
+1. 上传canal安装包
+2. 解压canal
+3. 配置canal
+4. 启动canal
+
+
+**实现**
+1. 上传 \资料\软件包\canal.deployer-1.0.24.tar.gz 到 /export/software 目录
+2. 在 /export/servers 下创建 canal 目录,一会直接将canal的文件解压到这个目录中
+```
+cd /export/servers
+mkdir canal
+```
+3. 解压canal到 /export/servers 目录
+```
+tar -xvzf canal.deployer-1.0.24.tar.gz -C ../servers/canal
+
+```
+4. 修改 canal/conf/example 目录中的 instance.properties 文件
+```
+## mysql serverId
+canal.instance.mysql.slaveId = 1234
+
+# position info
+canal.instance.master.address = node01:3306
+canal.instance.dbUsername = root
+canal.instance.dbPassword = 123456
+
+```
+> 1. canal.instance.mysql.slaveId这个ID不能与之前配置的 service_id 重复
+> 2. canal.instance.master.address配置为mysql安装的机器名和端口号
+
+5. 执行/export/servers/canal/bin目录中的 startup.sh 启动canal
+> cd /export/servers/canal/bin
+> ./startup.sh
+
+6. 控制台如果输出如下,表示canal已经启动成功
+![](screenshot/820fe570.png)
+![](screenshot/342dcc3e.png)
+
+
+
+### 6.6. Canal数据采集系统 - 项目初始化
+
+**步骤**
+ 1. 导入Maven依赖
+ 2. 拷贝 资料\工具类\03.Canal数据采集系统 中的 pom.xml 的依赖到 canal-kakfa 项目的pom.xml文件中
+ 3. 拷贝 资料\工具类\03.Canal数据采集系统 中的 log4j.properties 配置文件
+ 4. 拷贝 资料\工具类\03.Canal数据采集系统 中的 application.properties 文件
+
+
+
+### 6.7. Canal采集程序搭建
+
+使用java语言将canal中的binlog日志解析,并写入到Kafka中
+
+![](screenshot/d9fcfcf5.png)
+
+在canal-kafka项目的 java 目录中,创建以下包结构:
+![](screenshot/dc64a356.png)
+
+
+
+#### 6.7.1. 编写配置文件加载代码
+**步骤**
+1. 创建 GlobalConfigUtil 工具类,读取 application.properties 中的 canal 和 kafka 配置
+2. 添加main方法,测试是否能正确读取配置
+
+**实现**
+1. 在 util 包中创建 GlobalConfigUtil ,用来读取 application.properties 中的配置。我们使用以下代
+码来读取 application.properties 中的配置
+```
+ResourceBundle bundle = ResourceBundle.getBundle("配置文件名"
+, Locale.ENGLISH);
+String host = bundle.getString("属性key");
+
+```
+将 application.properties 中的 canal 和 kafka 配置读取出来
+
+2. 编写main方法测试是否能够正确读取配置
+
+
+
+GlobalConfigUtil.java
+![](screenshot/04e25b5a.png)
+
+
+> 注意:
+ 使用ResourceBundle.getBundle("application", Locale.ENGLISH); 读取 application.properties 时 不需要 写
+ 后缀名
+
+
+#### 6.7.2. 导入Kafka工具类代码
+KafkaSender.java
+
+
+#### 6.7.3. 导入Canal解析binlog日志工具类代码
+- 将mysql中的 binlog 日志解析
+- 将解析后的数据写入到 Kafka
+CanalClient.java
+
+
+#### 6.7.4. 测试工具类代码
+
+**步骤**
+ 1. 启动 mysql
+ 2. 启动 canal
+ 3. 启动 zookeeper 集群
+ 4. 启动 kafka 集群
+ 5. 在kafka创建一个 canal topic
+```
+bin/kafka-topics.sh --create --zookeeper node01:2181 --replication-factor 2 --partitions 3
+--topic canal
+
+```
+
+ 6. 启动kafka的控制台消费者程序
+```
+bin/kafka-console-consumer.sh --zookeeper node01:2181 --from-beginning --topic canal
+
+```
+
+
+ 7. 启动工具类 canal同步程序
+ 8. 打开 navicat ,往mysql中插入一些数据
+```sql
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (1 , '耐克' , 1 , 888.00 , 820.00);
+
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (2 , '阿迪达斯' , 1 , 900.00 , 870.00);
+
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (3 , 'MacBook Pro' , 2 , 18000.00 , 17500.00);
+
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (4 , '联想' , 2 , 5500.00 , 5320.00);
+
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (5 , '索菲亚' , 3 , 35000.00 , 30100.00);
+
+INSERT INTO commodity(commodityId , commodityName , commodityTypeId , originalPrice ,
+activityPrice) VALUES (6 , '欧派' , 3 , 43000.00 , 40000.00);
+
+```
+
+ 9. 如果kafka中能看到打印以下消息,表示canal已经正常工作
+![](screenshot/d42bd3f1.png)
+
+
+
+
+### 1. Flink实时数据同步系统开发
+
+![](screenshot/3f08b9d0.png)
+
+
+其中,MySQL连接Cannal,Canal操作MySQL的binlog文件。
+实时同步系统Flink将Kafka中的Json数据读取过来,进行转换,存入HBase。
+
+
+#### 1.1. binlog日志格式分析
+
+测试日志数据
+```json
+{
+ "emptyCount": 2,
+ "logFileName": "mysql-bin.000002",
+ "dbName": "pyg",
+ "logFileOffset": 250,
+ "eventType": "INSERT",
+ "columnValueList": [
+ {
+ "columnName": "commodityId",
+ "columnValue": "1",
+ "isValid": "true"
+ },
+ {
+ "columnName": "commodityName",
+ "columnValue": "耐克",
+ "isValid": "true"
+ },
+ {
+ "columnName": "commodityTypeId",
+ "columnValue": "1",
+ "isValid": "true"
+ },
+ {
+ "columnName": "originalPrice",
+ "columnValue": "888.0",
+ "isValid": "true"
+ },
+ {
+ "columnName": "activityPrice",
+ "columnValue": "820.0",
+ "isValid": "true"
+ }
+],
+ "tableName": "commodity",
+ "timestamp": 1553741346000
+}
+
+```
+
+格式分析
+字段以及说明
+![](screenshot/4cf81224.png)
+
+
+
+#### 1.2. Flink实时同步应用开发
+整体架构
+![](screenshot/3c8d398c.png)
+Kafka的数据来源于binlog,当Flink同步程序拿到binlog之后会进行处理和转换,然后
+写入到HBase中。
+
+
+具体架构
+![](screenshot/cfd8e121.png)
+
+1. Flink对接Kafka
+2. 对数据进行预处理(将原始样例类转换成HBase可以操作的样例类,存储到HBase)
+3. 将数据落地到Hbase
+
+
+数据同步说明
+![](screenshot/7cba404f.png)
+
+> 要确保hbase中的rowkey是唯一的,数据落地不能被覆盖
+
+
+
+#### 1.3. 实时数据同步项目初始化
+在sync-db项目的scala 目录中,创建以下包结构:
+![](screenshot/c1186185.png)
+
+步骤
+1. 将资料\工具类\04.Flink数据同步系统目录的pom.xml文件中的依赖导入到sync-db 项目的pom.xml
+2. sync-db 模块添加scala支持
+3. main和test创建scala 文件夹,并标记为源代码和测试代码目录
+4. 将资料\工具类\04.Flink数据同步系统目录的application.conf 和log4j.properties 配置文件
+5. 复制之前Flink项目中的GlobalConfigUtil 和HBaseUtil
+
+
+#### 1.4. Flink程序开发
+
+步骤
+1. 编写App.scala ,初始化Flink环境
+2. 运行Flink程序,测试是否能够消费到kafka中topic为canal 的数据
+3. 编写FlinkUtils.scala
+
+`App.scala`
+![](screenshot/1a3addd7.png)
+
+整合kafka
+![](screenshot/036a079d.png)
+
+
+
+#### 1.4.1. 定义原始Canal消息样例类
+
+步骤
+1. 在bean 包下创建Canal原始消息映射样例类
+2. 在Cannal样例类中编写apply方法,使用FastJSON来解析数据,转换为Cannal样例类对象
+3. 编写main 方法测试是否能够成功构建样例类对象
+
+
+
+
+#### 1.4.2. 解析Kafka数据流为Canal样例类
+
+步骤
+1. 在map 算子将消息转换为Canal样例类对象
+2. 打印测试,如果能输出以下信息,表示成功
+![](screenshot/03ef7ace.png)
+
+
+#### 1.4.3. 添加水印支持
+步骤
+1. 使用Canal中的timestamp 字段,生成水印数据
+2. 重新运行Flink,打印添加水印后的数据
+![](screenshot/22cd7b3c.png)
+
+
+
+
+#### 1.4.4. 定义HBaseOperation样例类
+
+HbaseOperation样例类主要封装对Hbase的操作,主要封装以下字段:
+- 操作类型(opType)= INSERT/DELETE/UPDATE
+- 表名(tableName)= mysql.binlog数据库名.binlog表名
+- 列族名(cfName)= 固定为info
+- rowkey = 唯一主键(取binlog中列数据的第一个)
+- 列名(colName)= binlog中列名
+- 列值(colValue)= binlog中列值
+
+![](screenshot/58926ce0.png)
+
+
+
+#### 1.4.5. 将Canal样例类转换为HBaseOperation样例类
+一个binlog消息中,有会有多个列的操作。它们的映射关系如下:
+
+可以使用flatMap 算子,来生成一组HBaseOperation 操作
+步骤
+1. 创建一个预处理任务对象
+2. 使用flatMap对水印数据流转换为HBaseOperation
+ - 根据eventType分别处理HBaseOperation 列表
+ - 生成的表名为mysql.数据库名.表名
+ - rowkey就是第一个列的值
+ - INSERT操作 -> 将所有列值转换为HBaseOperation
+ - UPDATE操作 -> 过滤掉isValid字段为false 的列,再转换为HBaseOperation
+ - DELETE操作 -> 只生成一条DELETE的HBaseOperation的List
+ - INSERT操作记录
+![](screenshot/34f66a92.png)
+
+![](screenshot/a47efd66.png)
+
+实现
+1. 在task 包下创建PreprocessTask 单例对象,添加process 方法
+2. 使用flatMap对Canal样例类进行扩展
+3. 使用FastJSON 解析Canal样例类中的列值列表数据,并存储到一个Seq中
+4. 遍历集合,构建HBaseOperation 样例类对象
+5. 打印测试
+6. 启动Flink验证程序是否正确处理
+
+
+> JSON字符串转List
+ List parseArray(String text, Class clazz)
+ classOf[T] : 获取class对象
+ Java的List转Scala的集合
+ 注意要导入: import scala.collection.JavaConverters._
+ var scalaList: mutable.Buffer[T] = javaList.asScala
+
+
+#### 1.4.6. Flink数据同步到hbase
+步骤
+1. 分两个落地实现,一个是delete ,一个是insert/update (因为hbase中只有一个put操作,所以只要是
+
+2. 启动hbase
+
+3. 启动flink 测试
+
+
+
+
+
+#### 1.4.7. 验证Flink同步数据功能
+步骤
+1. 启动mysql
+2. 启动canal
+3. 启动zookeeper 集群
+4. 启动kafka 集群
+5. 启动hdfs 集群
+6. 启动hbase 集群
+7. 启动Flink数据同步程序
+8. 启动Canal数据同步程序
+9. 在mysql中执行insert、update、delete语句,查看hbase 数据是否落地
+
+
+
+insert/update都转换为put操作)
+执行插入:
+![](screenshot/4b18ecbe.png)
+修改数据:
+![](screenshot/880c750d.png)
+![](screenshot/07a78b77.png)
+
+删除操作:
+![](screenshot/ec1f3fda.png)
+
+
+落地HBase:
+删除动作:
+![](screenshot/dedf144c.png)
+![](screenshot/8c5fa195.png)
+
+修改动作:
+![](screenshot/21733492.png)
+![](screenshot/6c04e485.png)
diff --git a/batch-process/pom.xml b/batch-process/pom.xml
new file mode 100644
index 0000000..7cb2e6c
--- /dev/null
+++ b/batch-process/pom.xml
@@ -0,0 +1,15 @@
+
+
+
+ pyg
+ com.henry
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ batch-process
+
+
+
\ No newline at end of file
diff --git a/canal-kafka/pom.xml b/canal-kafka/pom.xml
new file mode 100644
index 0000000..de32919
--- /dev/null
+++ b/canal-kafka/pom.xml
@@ -0,0 +1,35 @@
+
+
+
+ pyg
+ com.henry
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ canal-kafka
+
+
+
+ com.alibaba.otter
+ canal.client
+ 1.0.24
+
+
+
+ org.apache.kafka
+ kafka_2.11
+ 0.10.1.0
+
+
+
+ com.alibaba
+ fastjson
+ 1.2.83
+
+
+
+
+
\ No newline at end of file
diff --git a/canal-kafka/src/main/java/CanalClient.java b/canal-kafka/src/main/java/CanalClient.java
new file mode 100644
index 0000000..3b0b105
--- /dev/null
+++ b/canal-kafka/src/main/java/CanalClient.java
@@ -0,0 +1,212 @@
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.otter.canal.client.CanalConnector;
+import com.alibaba.otter.canal.client.CanalConnectors;
+import com.alibaba.otter.canal.protocol.CanalEntry;
+import com.alibaba.otter.canal.protocol.Message;
+import com.henry.canal_kafka.util.GlobalConfigUtil;
+import com.henry.canal_kafka.util.KafkaSender;
+
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Canal解析binlog日志工具类
+ */
+public class CanalClient {
+
+ static class ColumnValuePair {
+ private String columnName;
+ private String columnValue;
+ private Boolean isValid;
+
+ public ColumnValuePair(String columnName, String columnValue, Boolean isValid) {
+ this.columnName = columnName;
+ this.columnValue = columnValue;
+ this.isValid = isValid;
+ }
+
+ public String getColumnName() { return columnName; }
+ public void setColumnName(String columnName) { this.columnName = columnName; }
+ public String getColumnValue() { return columnValue; }
+ public void setColumnValue(String columnValue) { this.columnValue = columnValue; }
+ public Boolean getIsValid() { return isValid; }
+ public void setIsValid(Boolean isValid) { this.isValid = isValid; }
+ }
+
+ /**
+ * 获取Canal连接
+ *
+ * @param host 主机名
+ * @param port 端口号
+ * @param instance Canal实例名
+ * @param username 用户名
+ * @param password 密码
+ * @return Canal连接器
+ */
+ public static CanalConnector getConn(String host, int port, String instance, String username, String password) {
+ CanalConnector canalConnector = CanalConnectors.newSingleConnector(new InetSocketAddress(host, port), instance, username, password);
+
+ return canalConnector;
+ }
+
+ /**
+ * 解析Binlog日志
+ *
+ * @param entries Binlog消息实体
+ * @param emptyCount 操作的序号
+ */
+ public static void analysis(List entries, int emptyCount) {
+ for (CanalEntry.Entry entry : entries) {
+ // 只解析mysql事务的操作,其他的不解析
+ if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN ||
+ entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {
+ continue;
+ }
+
+ // 那么解析binlog
+ CanalEntry.RowChange rowChange = null;
+
+ try {
+ rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ // 获取操作类型字段(增加 删除 修改)
+ CanalEntry.EventType eventType = rowChange.getEventType();
+ // 获取binlog文件名称
+ String logfileName = entry.getHeader().getLogfileName();
+ // 读取当前操作在binlog文件的位置
+ long logfileOffset = entry.getHeader().getLogfileOffset();
+ // 获取当前操作所属的数据库
+ String dbName = entry.getHeader().getSchemaName();
+ // 获取当前操作所属的表
+ String tableName = entry.getHeader().getTableName();//当前操作的是哪一张表
+ long timestamp = entry.getHeader().getExecuteTime();//执行时间
+
+ // 解析操作的行数据
+ for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) {
+ // 删除操作
+ if (eventType == CanalEntry.EventType.DELETE) {
+ // 获取删除之前的所有列数据
+ dataDetails(rowData.getBeforeColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
+ }
+ // 新增操作
+ else if (eventType == CanalEntry.EventType.INSERT) {
+ // 获取新增之后的所有列数据
+ dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
+ }
+ // 更新操作
+ else {
+ // 获取更新之后的所有列数据
+ dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
+ }
+ }
+ }
+ }
+
+ /**
+ * 解析具体一条Binlog消息的数据
+ *
+ * @param columns 当前行所有的列数据
+ * @param logFileName binlog文件名
+ * @param logFileOffset 当前操作在binlog中的位置
+ * @param dbName 当前操作所属数据库名称
+ * @param tableName 当前操作所属表名称
+ * @param eventType 当前操作类型(新增、修改、删除)
+ * @param emptyCount 操作的序号
+ */
+ private static void dataDetails(List columns,
+ String logFileName,
+ Long logFileOffset,
+ String dbName,
+ String tableName,
+ CanalEntry.EventType eventType,
+ int emptyCount,
+ long timestamp) {
+
+ // 找到当前那些列发生了改变 以及改变的值
+ List columnValueList = new ArrayList();
+
+ for (CanalEntry.Column column : columns) {
+ ColumnValuePair columnValuePair = new ColumnValuePair(column.getName(), column.getValue(), column.getUpdated());
+ columnValueList.add(columnValuePair);
+ }
+
+ String key = UUID.randomUUID().toString();
+
+ JSONObject jsonObject = new JSONObject();
+ jsonObject.put("logFileName", logFileName);
+ jsonObject.put("logFileOffset", logFileOffset);
+ jsonObject.put("dbName", dbName);
+ jsonObject.put("tableName", tableName);
+ jsonObject.put("eventType", eventType);
+ jsonObject.put("columnValueList", columnValueList);
+ jsonObject.put("emptyCount", emptyCount);
+ jsonObject.put("timestamp", timestamp);
+
+
+ // 拼接所有binlog解析的字段
+ String data = JSON.toJSONString(jsonObject);
+
+ System.out.println(data);
+
+ // 解析后的数据发送到kafka
+ KafkaSender.sendMessage(GlobalConfigUtil.kafkaInputTopic, key, data);
+ }
+
+
+ public static void main(String[] args) {
+
+ // 加载配置文件
+ String host = GlobalConfigUtil.canalHost;
+ int port = Integer.parseInt(GlobalConfigUtil.canalPort);
+ String instance = GlobalConfigUtil.canalInstance;
+ String username = GlobalConfigUtil.mysqlUsername;
+ String password = GlobalConfigUtil.mysqlPassword;
+
+ // 获取Canal连接
+ CanalConnector conn = getConn(host, port, instance, username, password);
+
+ // 从binlog中读取数据
+ int batchSize = 100;
+ int emptyCount = 1;
+
+ try {
+ // 连接cannal
+ conn.connect();
+ //订阅实例中所有的数据库和表
+ conn.subscribe(".*\\..*");
+ // 回滚到未进行ack的地方
+ conn.rollback();
+
+ int totalCount = 120; //循环次数
+
+ while (totalCount > emptyCount) {
+ // 获取数据
+ Message message = conn.getWithoutAck(batchSize);
+
+ long id = message.getId();
+ int size = message.getEntries().size();
+ if (id == -1 || size == 0) {
+ //没有读取到任何数据
+ } else {
+ //有数据,那么解析binlog日志
+ analysis(message.getEntries(), emptyCount);
+ emptyCount++;
+ }
+
+ // 确认消息
+ conn.ack(message.getId());
+
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ conn.disconnect();
+ }
+ }
+}
diff --git a/canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java b/canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java
new file mode 100644
index 0000000..ef5ce5b
--- /dev/null
+++ b/canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java
@@ -0,0 +1,30 @@
+package com.henry.canal_kafka.util;
+
+import java.util.ResourceBundle;
+
+public class GlobalConfigUtil {
+ // 获取一个资源加载器
+ // 资源加载器会自动去加载CLASSPATH中的application.properties配置文件
+ private static ResourceBundle resourceBundle = ResourceBundle.getBundle("application");
+
+ // 使用ResourceBundle.getString方法来读取配置
+ public static String canalHost = resourceBundle.getString("canal.host");
+ public static String canalPort = resourceBundle.getString("canal.port");
+ public static String canalInstance = resourceBundle.getString("canal.instance");
+ public static String mysqlUsername = resourceBundle.getString("mysql.username");
+ public static String mysqlPassword = resourceBundle.getString("mysql.password");
+ public static String kafkaBootstrapServers = resourceBundle.getString("kafka.bootstrap.servers");
+ public static String kafkaZookeeperConnect = resourceBundle.getString("kafka.zookeeper.connect");
+ public static String kafkaInputTopic = resourceBundle.getString("kafka.input.topic");
+
+ public static void main(String[] args) {
+ System.out.println(canalHost);
+ System.out.println(canalPort);
+ System.out.println(canalInstance);
+ System.out.println(mysqlUsername);
+ System.out.println(mysqlPassword);
+ System.out.println(kafkaBootstrapServers);
+ System.out.println(kafkaZookeeperConnect);
+ System.out.println(kafkaInputTopic);
+ }
+}
diff --git a/canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java b/canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java
new file mode 100644
index 0000000..7230c7a
--- /dev/null
+++ b/canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java
@@ -0,0 +1,42 @@
+package com.henry.canal_kafka.util;
+
+import kafka.javaapi.producer.Producer;
+import kafka.producer.KeyedMessage;
+import kafka.producer.ProducerConfig;
+import kafka.serializer.StringEncoder;
+
+import java.util.Properties;
+
+/**
+ * Kafka生产消息工具类
+ */
+public class KafkaSender {
+ private String topic;
+
+ public KafkaSender(String topic){
+ super();
+ this.topic = topic;
+ }
+
+ /**
+ * 发送消息到Kafka指定topic
+ *
+ * @param topic topic名字
+ * @param key 键值
+ * @param data 数据
+ */
+ public static void sendMessage(String topic , String key , String data){
+ Producer producer = createProducer();
+ producer.send(new KeyedMessage(topic , key , data));
+ }
+
+ private static Producer createProducer(){
+ Properties properties = new Properties();
+
+ properties.put("metadata.broker.list" , GlobalConfigUtil.kafkaBootstrapServers);
+ properties.put("zookeeper.connect" , GlobalConfigUtil.kafkaZookeeperConnect);
+ properties.put("serializer.class" , StringEncoder.class.getName());
+
+ return new Producer(new ProducerConfig(properties));
+ }
+}
\ No newline at end of file
diff --git a/canal-kafka/src/main/resources/application.properties b/canal-kafka/src/main/resources/application.properties
new file mode 100644
index 0000000..0eea56a
--- /dev/null
+++ b/canal-kafka/src/main/resources/application.properties
@@ -0,0 +1,14 @@
+#
+# canal\u914D\u7F6E
+#
+canal.host=master
+canal.port=11111
+canal.instance=example
+mysql.username=root
+mysql.password=123456
+#
+#kafka\u7684\u914D\u7F6E
+#
+kafka.bootstrap.servers=master:9092,slave1:9092,slave2:9092
+kafka.zookeeper.connect=master:2181,slave1:2181,slave2:2181
+kafka.input.topic=canal
diff --git a/canal-kafka/src/main/resources/log4j.properties b/canal-kafka/src/main/resources/log4j.properties
new file mode 100644
index 0000000..cd73ac5
--- /dev/null
+++ b/canal-kafka/src/main/resources/log4j.properties
@@ -0,0 +1,4 @@
+log4j.rootLogger=error,stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..e582ed7
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,20 @@
+
+
+ 4.0.0
+
+ com.henry
+ pyg
+ 1.0-SNAPSHOT
+ pom
+
+
+ report
+ real-process
+ canal-kafka
+ sync-db
+ batch-process
+
+
+
\ No newline at end of file
diff --git a/pyg.iml b/pyg.iml
new file mode 100644
index 0000000..78b2cc5
--- /dev/null
+++ b/pyg.iml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/real-process/pom.xml b/real-process/pom.xml
new file mode 100644
index 0000000..57d5ab9
--- /dev/null
+++ b/real-process/pom.xml
@@ -0,0 +1,163 @@
+
+
+
+ pyg
+ com.henry
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ real-process
+
+
+ 2.11
+ 1.6.0
+ 3.2.4
+ 2.0.0
+
+
+
+
+
+ org.apache.kafka
+ kafka_${scala.version}
+ 0.10.1.0
+
+
+
+
+ org.apache.flink
+ flink-connector-kafka-0.10_${scala.version}
+ ${flink.version}
+
+
+
+
+ org.apache.flink
+ flink-table_${scala.version}
+ ${flink.version}
+
+
+
+
+ org.apache.flink
+ flink-scala_${scala.version}
+ ${flink.version}
+
+
+
+
+ org.apache.flink
+ flink-streaming-scala_${scala.version}
+ ${flink.version}
+
+
+ org.apache.flink
+ flink-streaming-java_${scala.version}
+ ${flink.version}
+
+
+
+
+ org.apache.flink
+ flink-hbase_${scala.version}
+ ${flink.version}
+
+
+
+ org.apache.hbase
+ hbase-client
+ ${hbase.version}
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+
+
+
+ xml-apis
+ xml-apis
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+
+
+
+
+ com.alibaba
+ fastjson
+ 1.2.83
+
+
+
+
+
+
+ src/main/scala
+ src/test/scala
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.2
+
+
+ package
+
+ shade
+
+
+
+
+ com.google.code.findbugs:jsr305
+ org.slf4j:*
+ log4j:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ com.henry.pyg.App
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/real-process/src/main/resources/application.conf b/real-process/src/main/resources/application.conf
new file mode 100644
index 0000000..9b89327
--- /dev/null
+++ b/real-process/src/main/resources/application.conf
@@ -0,0 +1,26 @@
+#
+#
+# kafka的配置
+#
+# kafka 集群地址
+bootstrap.servers="master:9092,slave1:9092,slave2:9092"
+# zookeeper 集群地址
+zookeeper.connect="master:2181,slave1:2181,slave2:2181"
+# kafka topic
+input.topic="pyg"
+# 消费者组 ID
+gruop.id="pyg"
+# 自动提交拉取到的消费端的消息offset到kafka
+enable.auto.commit="true"
+# 自动提交offset到zookeeper的时间间隔单位(毫秒)
+auto.commit.interval.ms="5000"
+# 每次消费最新的数据
+auto.offset.reset="latest"
+
+#Hbase的配置
+//hbase.zookeeper.quorum="master:2181,slave1:2181,slave2:2181"
+//hbase.master="master:60000"
+//hbase.zookeeper.property.clientPort="2181"
+//hbase.rpc.timeout="600000"
+//hbase.client.operator.timeout="600000"
+//hbase.client.scanner.timeout.period="600000"
\ No newline at end of file
diff --git a/real-process/src/main/resources/hbase-site.xml b/real-process/src/main/resources/hbase-site.xml
new file mode 100644
index 0000000..db207f8
--- /dev/null
+++ b/real-process/src/main/resources/hbase-site.xml
@@ -0,0 +1,60 @@
+
+
+
+
+
+
+
+ hbase.rootdir
+ hdfs://master:9000/hbase2
+
+
+
+
+ hbase.cluster.distributed
+ true
+
+
+
+
+ hbase.master.info.port
+ 16000
+
+
+
+
+ hbase.zookeeper.quorum
+ master:2181,slave1:2181,slave2:2181
+
+
+
+ hbase.zookeeper.property.clientPort
+ 2181
+
+
+
+ hbase.zookeeper.property.dataDir
+ /usr/local/src/zookeeper-3.4.5/hbasedata
+
+
+
+
diff --git a/real-process/src/main/resources/log4j.properties b/real-process/src/main/resources/log4j.properties
new file mode 100644
index 0000000..dbbb7d3
--- /dev/null
+++ b/real-process/src/main/resources/log4j.properties
@@ -0,0 +1,296 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=ERROR,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hadoop configuration logging
+#
+
+# Uncomment the following line to turn off configuration deprecation warnings.
+# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# mapred audit logging
+#
+mapred.audit.logger=INFO,NullAppender
+mapred.audit.log.maxfilesize=256MB
+mapred.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
+log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+# AWS SDK & S3A FileSystem
+log4j.logger.com.amazonaws=ERROR
+log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
+log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+# Use following logger to send summary to separate file defined by
+# hadoop.mapreduce.jobsummary.log.file :
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+#
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
+hadoop.mapreduce.jobsummary.log.maxbackupindex=20
+log4j.appender.JSA=org.apache.log4j.RollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
+log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# Yarn ResourceManager Application Summary Log
+#
+# Set the ResourceManager summary log filename
+yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# To enable AppSummaryLogging for the RM,
+# set yarn.server.resourcemanager.appsummary.logger to
+# ,RMSUMMARY in hadoop-env.sh
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+# - hadoop.log.dir (Hadoop Log directory)
+# - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+# - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+log4j.appender.RMSUMMARY.MaxFileSize=256MB
+log4j.appender.RMSUMMARY.MaxBackupIndex=20
+log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+
+# HS audit log configs
+#mapreduce.hs.audit.logger=INFO,HSAUDIT
+#log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
+#log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
+#log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
+#log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
+
+# Http Server Request Logs
+#log4j.logger.http.requests.namenode=INFO,namenoderequestlog
+#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
+#log4j.appender.namenoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.datanode=INFO,datanoderequestlog
+#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
+#log4j.appender.datanoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
+#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
+#log4j.appender.resourcemanagerrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
+#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
+#log4j.appender.jobhistoryrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
+#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
+#log4j.appender.nodemanagerrequestlog.RetainDays=3
+
+
+# WebHdfs request log on datanodes
+# Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to
+# direct the log to a separate file.
+#datanode.webhdfs.logger=INFO,console
+#log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger}
+#log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log
+#log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n
+#log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd
+
+#
+# Fair scheduler state dump
+#
+# Use following logger to dump the state to a separate file
+
+#log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=DEBUG,FSSTATEDUMP
+#log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=false
+#log4j.appender.FSSTATEDUMP=org.apache.log4j.RollingFileAppender
+#log4j.appender.FSSTATEDUMP.File=${hadoop.log.dir}/fairscheduler-statedump.log
+#log4j.appender.FSSTATEDUMP.layout=org.apache.log4j.PatternLayout
+#log4j.appender.FSSTATEDUMP.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+#log4j.appender.FSSTATEDUMP.MaxFileSize=${hadoop.log.maxfilesize}
+#log4j.appender.FSSTATEDUMP.MaxBackupIndex=${hadoop.log.maxbackupindex}
diff --git a/real-process/src/main/scala/com/henry/realprocess/App.scala b/real-process/src/main/scala/com/henry/realprocess/App.scala
new file mode 100644
index 0000000..72b4b55
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/App.scala
@@ -0,0 +1,166 @@
+package com.henry.realprocess
+
+
+import java.util.Properties
+
+import com.alibaba.fastjson.JSON
+import com.henry.realprocess.bean.{ClickLog, ClickLogWide, Message}
+import com.henry.realprocess.task._
+import com.henry.realprocess.util.GlobalConfigutil
+import org.apache.flink.api.common.serialization.SimpleStringSchema
+import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
+import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
+import org.apache.flink.api.scala._
+import org.apache.flink.runtime.state.filesystem.FsStateBackend
+import org.apache.flink.streaming.api.environment.CheckpointConfig
+import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
+import org.apache.flink.streaming.api.watermark.Watermark
+import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
+
+
+/**
+ * @Author: Henry
+ * @Description: 入口类
+ * @Date: Create in 2019/10/16 22:42
+ **/
+object App {
+
+ def main(args: Array[String]): Unit = {
+
+ //------------ 初始化Flink流式环境,ctrl+alt+v --------------------
+ val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
+
+ // 设置处理时间为EventTime
+ env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
+
+ // 设置并行度
+ env.setParallelism(1)
+
+ // 本地测试 加载本地集合 成为一个 Datastream 打印输出
+ // val localDataStream:DataStream[String] = env.fromCollection(
+ // List("hadoop", "hive", "hbase", "flink")
+ // )
+ // localDataStream.print()
+
+
+ //------------ 添加 checkpoint 的支持 -------------------------------
+ env.enableCheckpointing(5000) // 5秒启动一次checkpoint
+
+ // 设置 checkpoint 只检查 1次,即 仅一次
+ env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
+ // 设置两次 checkpoint 的最小时间间隔 1s
+ env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000)
+ // 设置checkpoint的超时时长, 60s
+ env.getCheckpointConfig.setCheckpointTimeout(60000)
+ // 允许的最大并行度
+ env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)
+ // 当程序关闭时,触发额外的checkpoint
+ env.getCheckpointConfig.enableExternalizedCheckpoints(
+ CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
+
+
+ // 设置checkpoint的地址
+ env.setStateBackend(new FsStateBackend("hdfs://master:9000/flink-checkpoint/"))
+
+
+ //--------------- 整合kafka --------------------------
+ val properties = new Properties()
+ // kafka 集群地址
+ properties.setProperty("bootstrap.servers", GlobalConfigutil.bootstrapServers)
+ // zookeeper 集群地址
+ properties.setProperty("zookeeper.connect", GlobalConfigutil.zookeeperConnect)
+ // kafka topic
+ properties.setProperty("input.topic", GlobalConfigutil.inputTopic)
+ // 消费者组 ID
+ properties.setProperty("gruop.id", GlobalConfigutil.gruopId)
+ // 自动提交拉取到的消费端的消息offset到kafka
+ properties.setProperty("enable.auto.commit", GlobalConfigutil.enableAutoCommit)
+ // 自动提交offset到zookeeper的时间间隔单位(毫秒)
+ properties.setProperty("auto.commit.interval.ms", GlobalConfigutil.autoCommitIntervalMs)
+ // 每次消费最新的数据
+ properties.setProperty("auto.offset.reset", GlobalConfigutil.autoOffsetReset)
+
+
+ // topic 、反序列化器、 属性集合
+ val consumer = new FlinkKafkaConsumer010[String](
+ GlobalConfigutil.inputTopic,
+ new SimpleStringSchema(),
+ properties)
+
+ val kafkaDataStream: DataStream[String] = env.addSource(consumer)
+
+ // kafkaDataStream.print()
+
+ // JSON -> 元组
+ val tupleDataStream = kafkaDataStream.map {
+ msgJson =>
+ val jsonObject = JSON.parseObject(msgJson)
+
+ val message = jsonObject.getString("message")
+ val count = jsonObject.getLong("count")
+ val timeStamp = jsonObject.getLong("timestamp")
+
+// (message, count, timeStamp)
+ // 改造成样例类
+// (ClickLog(message), count, timeStamp)
+ Message(ClickLog(message), count, timeStamp)
+
+ }
+
+// tupleDataStream.print()
+
+ //----------------- 添加水印支持 -----------------------
+
+ var watermarkDataStream = tupleDataStream.assignTimestampsAndWatermarks(
+ new AssignerWithPeriodicWatermarks[Message] {
+
+ var currentTimestamp = 0L
+
+ // 延迟时间
+ var maxDelayTime = 2000L
+
+ // 获取当前时间戳
+ override def getCurrentWatermark: Watermark = {
+ // 设置水印时间比事件时间小 2s
+ new Watermark(currentTimestamp - maxDelayTime)
+ }
+
+ // 获取当前事件时间
+ override def extractTimestamp(
+ element: Message,
+ previousElementTimestamp: Long): Long = {
+ currentTimestamp = Math.max(element.timeStamp, previousElementTimestamp)
+ currentTimestamp
+ }
+ })
+
+ // 数据的预处理
+ val clickLogWideDateStream : DataStream[ClickLogWide] = PreprocessTask.process(watermarkDataStream)
+// clickLogWideDateStream.print()
+
+ // 转换
+// ChannelRealHotTask.process(clickLogWideDateStream).print()
+// ChannelRealHotTask.process(clickLogWideDateStream)
+
+ // 转换 PV、UV
+ ChannelPvUvTask.process(clickLogWideDateStream)
+// ChannelPvUvTaskMerge.process(clickLogWideDateStream)
+// ChannelFreshnessTask.process(clickLogWideDateStream)
+
+ // 重构模板方法
+ ChannelFreshnessTaskTrait.process(clickLogWideDateStream)
+
+ // ChannelAreaTask 测试
+ ChannelAreaTask.process(clickLogWideDateStream)
+
+ // ChannelNetworkTask 测试
+ ChannelNetworkTask.process(clickLogWideDateStream)
+
+ // ChannelBrowserTask 测试
+ ChannelBrowserTask.process(clickLogWideDateStream)
+
+
+ // 执行任务
+ env.execute("real-process")
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala b/real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala
new file mode 100644
index 0000000..e6ee672
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala
@@ -0,0 +1,77 @@
+package com.henry.realprocess.bean
+
+import com.alibaba.fastjson.JSON
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/20 14:45
+ **/
+
+//频道ID(channelID)
+//产品类别ID(categoryID)
+//产品ID(produceID)
+//国家(country)
+//省份(province)
+//城市(city)
+//网络方式(network)
+//来源方式(source)
+//浏览器类型(browserType)
+//进入网站时间(entryTime)
+//离开网站时间(leaveTime)
+//用户ID(userID)
+
+case class ClickLog (
+ // 1、alt + 下拉
+ // 2、ctrl + shift + →,选中各个变量
+ var channelID:String,
+ var categoryID:String,
+ var produceID:String,
+ var country:String,
+ var province:String,
+ var city:String,
+ var network:String,
+ var source:String,
+ var browserType:String,
+ var entryTime:String,
+ var leaveTime:String,
+ var userID:String
+ )
+
+object ClickLog{
+
+ def apply(json: String): ClickLog = {
+
+ // 先把json转换为JSONObject
+ val jsonObject = JSON.parseObject(json)
+
+ // 提取jsonObject中的各个属性,赋值给样例类
+ var channelID = jsonObject.getString("channelID")
+ var categoryID = jsonObject.getString("categoryID")
+ var produceID = jsonObject.getString("produceID")
+ var country = jsonObject.getString("country")
+ var province = jsonObject.getString("province")
+ var city = jsonObject.getString("city")
+ var network = jsonObject.getString("network")
+ var source = jsonObject.getString("source")
+ var browserType = jsonObject.getString("browserType")
+ var entryTime = jsonObject.getString("entryTime")
+ var leaveTime = jsonObject.getString("leaveTime")
+ var userID = jsonObject.getString("userID")
+
+ ClickLog(
+ channelID,
+ categoryID,
+ produceID,
+ country,
+ province,
+ city,
+ network,
+ source,
+ browserType,
+ entryTime,
+ leaveTime,
+ userID
+ )
+ }
+}
\ No newline at end of file
diff --git a/real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala b/real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala
new file mode 100644
index 0000000..06cf89c
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala
@@ -0,0 +1,61 @@
+package com.henry.realprocess.bean
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/27 14:19
+ **/
+
+// 频道ID(channelID)
+// 产品类别ID(categoryID)
+// 产品ID(produceID)
+// 国家(country)
+// 省份(province)
+// 城市(city)
+// 网络方式(network)
+// 来源方式(source)
+// 浏览器类型(browserType)
+// 进入网站时间(entryTime)
+// 离开网站时间(leaveTime)
+// 用户ID(userID)
+// ---- 添加以下字段 ---------------
+// 用户访问次数(count)
+// 用户访问的时间(timestamp)
+// 国家省份城市(拼接)(address)
+// 年月(yearMonth)
+// 年月日(yearMonthDay)
+// 年月日时(yearMonthDayHour)
+// 是否为访问某个频道的新用户(isNew)—— 0:表示否; 1:表示是
+// 在某一小时内是否为某个频道的新用户(isHourNew)—— 0:表示否; 1:表示是
+// 在某一天内是否为某个频道的新用户(isDayNew)—— 0:表示否; 1:表示是
+// 在某一天月是否为某个频道的新用户(isMonthNew)—— 0:表示否; 1:表示是
+
+case class ClickLogWide (
+ // 1、alt + 下拉
+ // 2、ctrl + shift + →,选中各个变量
+ var channelID:String,
+ var categoryID:String,
+ var produceID:String,
+ var country:String,
+ var province:String,
+ var city:String,
+ var network:String,
+ var source:String,
+ var browserType:String,
+ var entryTime:String,
+ var leaveTime:String,
+ var userID:String,
+ //--- 新增 ---------------------------
+ var count:Long,
+ var timestamp:Long,
+ var address:String,
+ var yearMonth:String,
+ var yearMonthDay:String,
+ var yearMonthDayHour:String,
+ var isNew:Int,
+ var isHourNew:Int,
+ var isDayNew:Int,
+ var isMonthNew:Int
+
+)
+
diff --git a/real-process/src/main/scala/com/henry/realprocess/bean/Message.scala b/real-process/src/main/scala/com/henry/realprocess/bean/Message.scala
new file mode 100644
index 0000000..ef14077
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/bean/Message.scala
@@ -0,0 +1,12 @@
+package com.henry.realprocess.bean
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/20 15:58
+ **/
+case class Message (
+ var clickLog:ClickLog,
+ var count:Long,
+ var timeStamp:Long
+ )
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala
new file mode 100644
index 0000000..0233e0b
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala
@@ -0,0 +1,80 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.task.ChannelBrowserTask.pvColName
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/11/3 10:42
+ **/
+
+trait BaseTask[T] {
+
+
+ // 1、 转换
+ def map(clickLogWideDataStream : DataStream[ClickLogWide]): DataStream[T]
+
+ // 2、 分组
+ def keyBy(mapDataStream : DataStream[T]): KeyedStream[T, String]
+
+ // 3、 时间窗口
+ def timeWindow(keyedStream: KeyedStream[T, String]) : WindowedStream[T, String, TimeWindow] = {
+ // 因为所有自类都是 3 秒的时间窗口
+ keyedStream.timeWindow(Time.seconds(3))
+ }
+
+ // 4、 聚合
+ def reduce(windowedStream : WindowedStream[T, String, TimeWindow]) : DataStream[T]
+
+ // 5、 落地 HBase
+ def sink2HBase(reduceDataStream: DataStream[T])
+
+
+ // 定义模板执行顺序
+ def process(clickLogWideDataStream : DataStream[ClickLogWide]): Unit = {
+ val mapDataStream: DataStream[T] = map(clickLogWideDataStream)
+ val keyedStream: KeyedStream[T, String] = keyBy(mapDataStream)
+ val windowedStream: WindowedStream[T, String, TimeWindow] = timeWindow(keyedStream)
+ val reduceStream: DataStream[T] = reduce(windowedStream)
+ sink2HBase(reduceStream)
+ }
+
+ // 检测老用户是否第一次访问
+ val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
+
+ // 创建 HBase 相关列
+ var tableName = ""
+ var clfName = "info"
+ var rowkey = ""
+ var channelIdColName = "channelID"
+ var browserColName = "browser"
+ var dateColName = "date"
+ var pvColName = "pv"
+ var uvColName = "uv"
+ var newCountColName = "newCount"
+ var oldCountColName = "oldCount"
+
+
+ /* 累加相关列的值
+ * @param resultMap map集合
+ * @param column 待查询的列
+ * @param currentValue 当前值
+ * @return 累加后的值
+ */
+ def getTotal(resultMap: Map[String, String],column:String,currentValue:Long):Long={
+
+ var total = currentValue
+ // 如果resultMap不为空,并且可以去到相关列的值,那么就进行累加
+ if (resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(column,""))) {
+ total = resultMap(column).toLong + currentValue
+ }
+ total
+ }
+
+
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala
new file mode 100644
index 0000000..c003234
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala
@@ -0,0 +1,170 @@
+package com.henry.realprocess.task
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.windowing.time.Time
+
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/11/3 14:06
+ **/
+
+// 样例类
+case class ChannelArea(
+ var channelId: String,
+ var area: String,
+ var date: String,
+ var pv: Long,
+ var uv: Long,
+ var newCount: Long,
+ var oldCount: Long
+ )
+
+object ChannelAreaTask extends BaseTask [ChannelArea]{
+
+ // 1、 转换
+ override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelArea] = {
+
+ clickLogWideDataStream.flatMap{
+
+ clickLogWide =>{
+
+ // 如果是老用户,并且在该时间段内第一次来,就计数 1. 否则计 0
+ val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
+
+ List(
+ ChannelArea( // 月维度
+ clickLogWide.channelID,
+ clickLogWide.address,
+ clickLogWide.yearMonth,
+ clickLogWide.count, // pv, 每来一个数据进行累加
+ clickLogWide.isMonthNew, // uv, 第一次来的时候只计数一次
+ clickLogWide.isNew, // 当是 New 的时候进行累加
+ isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
+ ),
+ ChannelArea( // 日维度
+ clickLogWide.channelID,
+ clickLogWide.address,
+ clickLogWide.yearMonth,
+ clickLogWide.count,
+ clickLogWide.isDayNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isDayNew)
+ ),
+ ChannelArea( // 小时维度
+ clickLogWide.channelID,
+ clickLogWide.address,
+ clickLogWide.yearMonth,
+ clickLogWide.count,
+ clickLogWide.isHourNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isHourNew)
+ )
+ )
+ }
+ }
+ }
+
+ // 2、 分组 根据 频道ID+地域+时间
+ override def keyBy(mapDataStream: DataStream[ChannelArea]): KeyedStream[ChannelArea, String] = {
+ mapDataStream.keyBy{
+ area =>
+ area.channelId + " : " + area.area + " : " + area.date
+ }
+ }
+
+ // 3、 时间窗口, 这段代码每个子类都是一样的,可以写到父类中
+// override def timeWindow(keyedStream: KeyedStream[ChannelArea, String]): WindowedStream[ChannelArea, String, TimeWindow] = {}
+
+
+ // 4、 聚合 累加4个字段
+ override def reduce(windowedStream: WindowedStream[ChannelArea, String, TimeWindow]) = {
+ windowedStream.reduce {
+ (t1, t2) =>
+ ChannelArea(t1.channelId, t1.area,
+ t1.date,
+ t1.pv + t2.pv,
+ t1.uv + t2.uv,
+ t1.newCount + t2.newCount,
+ t1.oldCount + t2.oldCount)
+ }
+ }
+
+
+ // 5、 落地HBase
+ override def sink2HBase(reduceDataStream: DataStream[ChannelArea]): Unit = {
+ reduceDataStream.addSink{
+ area => {
+ // HBase 相关字段
+ val tableName = "channel_area"
+ val clfName = "info"
+ val rowkey = area.channelId + ":" + area.area + ":" + area.date
+
+ val channelIdColumn = "channelId"
+ val areaColumn = "area"
+ val dateColumn = "date"
+ val pvColumn = "pv"
+ val uvColumn = "uv"
+ val newCountColumn = "newCount"
+ val oldCountColumn = "oldCount"
+
+ // 查询 HBase
+ val pvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,pvColumn)
+ val uvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,uvColumn)
+ val newCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,newCountColumn)
+ val oldCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,oldCountColumn)
+
+ // 累加
+ var totalPv = 0L
+ var totalUv = 0L
+ var totalNewCount = 0L
+ var totalOldCount = 0L
+
+ // PV
+ if(StringUtils.isNotBlank(pvInHbase)){
+ totalPv = pvInHbase.toLong+area.pv
+ }else{
+ totalPv = area.pv
+ }
+
+ // UV
+ if(StringUtils.isNotBlank(uvInHbase)){
+ totalUv = uvInHbase.toLong+area.uv
+ }else{
+ totalUv = area.uv
+ }
+
+ // totalNewCount
+ if(StringUtils.isNotBlank(newCountInHbase)){
+ totalNewCount = newCountInHbase.toLong+area.newCount
+ }else{
+ totalNewCount = area.newCount
+ }
+
+ // totalOldCount
+ if(StringUtils.isNotBlank(oldCountInHbase)){
+ totalOldCount = oldCountInHbase.toLong+area.oldCount
+ }else{
+ totalOldCount = area.oldCount
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName,rowkey,clfName,Map(
+ channelIdColumn->area.channelId,
+ areaColumn->area.area,
+ dateColumn->area.date,
+ pvColumn->totalPv,
+ uvColumn->totalUv,
+ newCountColumn->totalNewCount,
+ oldCountColumn->totalOldCount
+ ))
+
+ }
+ }
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala
new file mode 100644
index 0000000..0954465
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala
@@ -0,0 +1,130 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.windowing.time.Time
+
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/11/3 15:52
+ **/
+
+// 2. 添加一个`ChannelBrowser`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商
+// (browser)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
+case class ChannelBrowser(
+ var channelId: String,
+ var browser: String,
+ var date: String,
+ var pv: Long,
+ var uv: Long,
+ var newCount: Long,
+ var oldCount: Long
+ )
+
+
+object ChannelBrowserTask extends BaseTask[ChannelBrowser]{
+
+ override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelBrowser] = {
+
+ clickLogWideDataStream.flatMap{
+ clickLogWide => {
+ List(
+ ChannelBrowser( // 月维度
+ clickLogWide.channelID,
+ clickLogWide.browserType,
+ clickLogWide.yearMonth,
+ clickLogWide.count,
+ clickLogWide.isMonthNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
+ ),
+ ChannelBrowser( // 天维度
+ clickLogWide.channelID,
+ clickLogWide.browserType,
+ clickLogWide.yearMonthDay,
+ clickLogWide.count,
+ clickLogWide.isDayNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isDayNew)
+ ),
+ ChannelBrowser( // 小时维度
+ clickLogWide.channelID,
+ clickLogWide.browserType,
+ clickLogWide.yearMonthDayHour,
+ clickLogWide.count,
+ clickLogWide.isHourNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isHourNew)
+ )
+ )
+ }
+ }
+ }
+
+ override def keyBy(mapDataStream: DataStream[ChannelBrowser]): KeyedStream[ChannelBrowser, String] = {
+
+ mapDataStream.keyBy {
+ browser =>
+ browser.channelId +" : "+ browser.browser +" : "+ browser.date
+ }
+ }
+
+ override def reduce(windowedStream: WindowedStream[ChannelBrowser, String, TimeWindow]): DataStream[ChannelBrowser] = {
+ windowedStream.reduce {
+ (t1, t2) => {
+ ChannelBrowser(
+ t1.channelId,
+ t1.browser,
+ t1.date,
+ t1.pv + t2.pv,
+ t1.uv + t2.uv,
+ t1.newCount + t2.newCount,
+ t1.oldCount + t2.oldCount
+ )
+ }
+ }
+ }
+
+
+ override def sink2HBase(reduceDataStream: DataStream[ChannelBrowser]): Unit = {
+
+ reduceDataStream.addSink(
+ browser => {
+
+ // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名
+ // 不需要加 val 或者 var ,因为引用的是父类的变量
+ tableName = "channel_browser"
+ rowkey = s"${browser.channelId} : ${browser.date} : ${browser.browser}" // 引用变量的方式
+ browserColName = "browser"
+
+
+ // 查询 HBase
+ // - 判断hbase中是否已经存在结果记录
+ val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName,
+ List( pvColName, uvColName, newCountColName, oldCountColName )
+ )
+
+ // 数据累加
+ // 保存数据
+ HBaseUtil.putMapData(
+ tableName, rowkey, clfName, Map(
+ channelIdColName -> browser.channelId,
+ browserColName -> browser.browser,
+ dateColName -> browser.date,
+ pvColName -> getTotal(resultMap, pvColName , browser.pv),
+ uvColName -> getTotal(resultMap, uvColName , browser.uv),
+ newCountColName -> getTotal(resultMap, newCountColName , browser.newCount),
+ oldCountColName -> getTotal(resultMap, oldCountColName , browser.newCount)
+ )
+ )
+ }
+ )
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala
new file mode 100644
index 0000000..11db371
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala
@@ -0,0 +1,115 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/31 21:38
+ **/
+
+case class ChannelFreshness(
+ var channelId : String ,
+ var date : String ,
+ var newCount: Long ,
+ val oldCount: Long
+
+ )
+
+/**
+ * 1、 转换
+ * 2、 分组
+ * 3、 时间窗口
+ * 4、 聚合
+ * 5、 落地 HBase
+ */
+object ChannelFreshnessTask {
+
+ def process(clickLogWideDataStream: DataStream[ClickLogWide])= {
+
+ // 1、 转换
+ val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap {
+ clickLog =>
+
+ // 如果是老用户,只有在第一次来的时候,计数为 1
+ val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
+ // 统计新用户、老用户数量
+ List(
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)),
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)),
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew))
+ )
+ }
+
+// 2、 分组
+ val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy {
+ freshness => (freshness.channelId + freshness.date)
+ }
+
+
+ // 3、 时间窗口
+ val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3))
+
+
+ // 4、 聚合
+ val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce {
+ (t1, t2) =>
+ ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount)
+ }
+
+ // 5、 落地 HBase
+ reduceDataStream.addSink(new SinkFunction[ChannelFreshness] {
+ override def invoke(value: ChannelFreshness): Unit = {
+ // 创建 HBase 相关变量
+ val tableName = "channel_freshness"
+ val clfName = "info"
+ val channelIdColumn = "channelId"
+ val dateColumn = "date"
+ val newCountColumn = "newCount"
+ val oldCountColumn = "oldCount"
+
+ val rowkey = value.channelId + ":" + value.date
+
+ // 查询历史数据
+ val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn))
+
+ // 累加
+ var totalNewCount = 0L
+ var totalOldCount = 0L
+
+ if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){
+ resultMap(newCountColumn).toLong + value.newCount
+ }
+ else {
+ totalNewCount = value.newCount
+ }
+
+ if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){
+ resultMap(oldCountColumn).toLong + value.oldCount
+ }
+ else {
+ totalOldCount = value.oldCount
+ }
+
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+ // 向如下列插入数据
+ channelIdColumn -> value.channelId ,
+ dateColumn -> value.date ,
+ newCountColumn -> totalNewCount ,
+ oldCountColumn -> totalOldCount
+ ))
+
+ }
+ })
+ }
+
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala
new file mode 100644
index 0000000..30c68d3
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala
@@ -0,0 +1,134 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/31 21:38
+ **/
+
+case class ChannelFreshness(
+ var channelId : String ,
+ var date : String ,
+ var newCount: Long ,
+ val oldCount: Long
+
+ )
+
+/**
+ * 1、 转换
+ * 2、 分组
+ * 3、 时间窗口
+ * 4、 聚合
+ * 5、 落地 HBase
+ */
+object ChannelFreshnessTaskTrait extends BaseTask[ChannelFreshness] {
+ /* Alt + Enter */
+
+ // 1、 转换
+ override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelFreshness] = {
+
+ val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap {
+ clickLog =>
+
+ // 如果是老用户,只有在第一次来的时候,计数为 1
+ val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
+ // 统计新用户、老用户数量
+ List(
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)),
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)),
+ ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew))
+ )
+ }
+ mapDataStream
+ }
+
+ // 2、 分组
+ override def keyBy(mapDataStream: DataStream[ChannelFreshness]): KeyedStream[ChannelFreshness, String] = {
+
+ // 或者:mapDataStream.keyBy {freshness => (freshness.channelId + freshness.date)
+ val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy {
+ freshness => (freshness.channelId + freshness.date)
+ }
+ keyedStream
+
+ }
+
+ // 3、 时间窗口
+ override def timeWindow(keyedStream: KeyedStream[ChannelFreshness, String]): WindowedStream[ChannelFreshness, String, TimeWindow] = {
+
+ val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3))
+ windowedStream
+
+ }
+
+ // 4、 聚合
+ override def reduce(windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow]): DataStream[ChannelFreshness] = {
+
+ val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce {
+ (t1, t2) =>
+ ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount)
+ }
+ reduceDataStream
+
+ }
+
+ // 5、 落地 HBase
+ override def sink2HBase(reduceDataStream: DataStream[ChannelFreshness]): Unit = {
+
+ reduceDataStream.addSink(new SinkFunction[ChannelFreshness] {
+ override def invoke(value: ChannelFreshness): Unit = {
+ // 创建 HBase 相关变量
+ val tableName = "channel_freshness"
+ val clfName = "info"
+ val channelIdColumn = "channelId"
+ val dateColumn = "date"
+ val newCountColumn = "newCount"
+ val oldCountColumn = "oldCount"
+
+ val rowkey = value.channelId + ":" + value.date
+
+ // 查询历史数据
+ val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn))
+
+ // 累加
+ var totalNewCount = 0L
+ var totalOldCount = 0L
+
+ if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){
+ resultMap(newCountColumn).toLong + value.newCount
+ }
+ else {
+ totalNewCount = value.newCount
+ }
+
+ if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){
+ resultMap(oldCountColumn).toLong + value.oldCount
+ }
+ else {
+ totalOldCount = value.oldCount
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+ // 向如下列插入数据
+ channelIdColumn -> value.channelId ,
+ dateColumn -> value.date ,
+ newCountColumn -> totalNewCount ,
+ oldCountColumn -> totalOldCount
+ ))
+ }
+ })
+ }
+
+}
+
+
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala
new file mode 100644
index 0000000..9497c07
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala
@@ -0,0 +1,173 @@
+package com.henry.realprocess.task
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.windowing.time.Time
+
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/11/3 15:52
+ **/
+
+// 2. 添加一个`ChannelNetwork`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商
+// (network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
+case class ChannelNetWork(
+ var channelId: String,
+ var network: String,
+ var date: String,
+ var pv: Long,
+ var uv: Long,
+ var newCount: Long,
+ var oldCount: Long
+ )
+
+
+object ChannelNetworkTask extends BaseTask[ChannelNetWork]{
+
+ override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelNetWork] = {
+
+ val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
+
+ clickLogWideDataStream.flatMap{
+ clickLogWide => {
+ List(
+ ChannelNetWork( // 月维度
+ clickLogWide.channelID,
+ clickLogWide.network,
+ clickLogWide.yearMonth,
+ clickLogWide.count,
+ clickLogWide.isMonthNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
+ ),
+ ChannelNetWork( // 天维度
+ clickLogWide.channelID,
+ clickLogWide.network,
+ clickLogWide.yearMonthDay,
+ clickLogWide.count,
+ clickLogWide.isDayNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isDayNew)
+ ),
+ ChannelNetWork( // 小时维度
+ clickLogWide.channelID,
+ clickLogWide.network,
+ clickLogWide.yearMonthDayHour,
+ clickLogWide.count,
+ clickLogWide.isHourNew,
+ clickLogWide.isNew,
+ isOld(clickLogWide.isNew, clickLogWide.isHourNew)
+ )
+ )
+ }
+ }
+ }
+
+ override def keyBy(mapDataStream: DataStream[ChannelNetWork]): KeyedStream[ChannelNetWork, String] = {
+
+ mapDataStream.keyBy {
+ network =>
+ network.channelId +" : "+ network.network +" : "+ network.date
+ }
+ }
+
+ override def reduce(windowedStream: WindowedStream[ChannelNetWork, String, TimeWindow]): DataStream[ChannelNetWork] = {
+ windowedStream.reduce {
+ (t1, t2) => {
+ ChannelNetWork(
+ t1.channelId,
+ t1.network,
+ t1.date,
+ t1.pv + t2.pv,
+ t1.uv + t2.uv,
+ t1.newCount + t2.newCount,
+ t1.oldCount + t2.oldCount
+ )
+ }
+ }
+ }
+
+
+ override def sink2HBase(reduceDataStream: DataStream[ChannelNetWork]): Unit = {
+
+ reduceDataStream.addSink(
+ network => {
+ // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名
+ val tableName = "channel_network"
+ val clfName = "info"
+ // 频道ID(channelID)、运营商(network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
+ val rowkey = s"${network.channelId} : ${network.date} : ${network.network}" // 引用变量的方式
+ val channelIdColName = "channelID"
+ val networkColName = "network"
+ val dateColName = "date"
+ val pvColName = "pv"
+ val uvColName = "uv"
+ val newCountColName = "newCount"
+ val oldCountColName = "oldCount"
+
+ // 查询 HBase
+ // - 判断hbase中是否已经存在结果记录
+ val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName,
+ List( pvColName, uvColName, newCountColName, oldCountColName )
+ )
+
+ // 数据累加
+ var totalPv = 0L
+ var totalUv = 0L
+ var totalNewCount = 0L
+ var totalOldCount = 0L
+
+ // totalPv
+ if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(pvColName,""))) {
+ totalPv = resultMap(pvColName).toLong + network.pv
+ }
+ else {
+ totalPv = network.pv
+ }
+
+ // totalUv
+ if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(uvColName,""))) {
+ totalUv = resultMap(uvColName).toLong + network.uv
+ }
+ else {
+ totalUv = network.uv
+ }
+
+ // totalNewCount
+ if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(newCountColName,""))) {
+ totalNewCount = resultMap(newCountColName).toLong + network.newCount
+ }
+ else {
+ totalNewCount = network.newCount
+ }
+
+ // totalOldCount
+ if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColName,""))) {
+ totalOldCount = resultMap(oldCountColName).toLong + network.oldCount
+ }
+ else {
+ totalOldCount = network.oldCount
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(
+ tableName, rowkey, clfName, Map(
+ channelIdColName -> network.channelId,
+ networkColName -> network.network,
+ dateColName -> network.date,
+ pvColName -> totalPv,
+ uvColName -> totalUv,
+ newCountColName -> totalNewCount,
+ oldCountColName -> totalOldCount
+ )
+ )
+ }
+ )
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala
new file mode 100644
index 0000000..0d59e5a
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala
@@ -0,0 +1,108 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream}
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.scala.KeyedStream
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+import org.apache.commons.lang.StringUtils
+
+/**
+ * @Author: Henry
+ * @Description: 渠道 PV/UV
+ * 1、字段转换;
+ * 2、分组;
+ * 3、时间窗口;
+ * 4、聚合;
+ * 5、落地HBase
+ * @Date: Create in 2019/10/30 20:15
+ **/
+
+case class ChannelPvUv(
+ val channelId: String,
+ val yearDayMonthHour: String,
+ val pv: Long,
+ val uv: Long
+ )
+
+object ChannelPvUvTask {
+
+ def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
+
+ // 1、转换
+ val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.map{
+ clickLogWide => {
+ ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour,
+ clickLogWide.count, clickLogWide.isHourNew)
+ }
+ }
+
+ // 2、分组
+ val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{
+ channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour
+ }
+
+ // 3、窗口
+ val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] =
+ keyedStream.timeWindow(Time.seconds(3))
+
+
+ // 4、聚合
+ val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{
+ (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv)
+ }
+
+ // 5、HBase 落地
+ reduceDataStream.addSink(new SinkFunction[ChannelPvUv] {
+
+ override def invoke(value: ChannelPvUv): Unit = {
+
+ // HBase 相关字段
+ val tableName = "channel_pvuv"
+ val clfName = "info"
+ val channelIdColumn = "channelId"
+ val yearMonthDayHourColumn = "yearMonthDayHour"
+ val pvColumn = "pv"
+ val uvColumn = "uv"
+
+ val rowkey = value.channelId + ":" + value.yearDayMonthHour
+
+ // 查询 HBase ,并且获取相关记录
+ val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn)
+ val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn)
+
+ var totalPv = 0L
+ var totalUv = 0L
+
+ // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加
+ if(StringUtils.isBlank(pvInHBase)){
+ totalPv = value.pv
+ }
+ else {
+ totalPv = pvInHBase.toLong + value.pv
+ }
+
+ // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加
+ if(StringUtils.isBlank(uvInHBase)){
+ totalUv = value.uv
+ }
+ else {
+ totalUv = uvInHBase.toLong + value.uv
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+
+ channelIdColumn -> value.channelId ,
+ yearMonthDayHourColumn -> value.yearDayMonthHour ,
+ pvColumn -> value.pv.toString ,
+ uvColumn -> value.uv.toString
+ ))
+
+ }
+ })
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala
new file mode 100644
index 0000000..2843fbb
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala
@@ -0,0 +1,105 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.flink.api.scala._
+import org.apache.commons.lang.StringUtils
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/30 22:42
+ **/
+
+case class ChannelPvUv(
+ val channelId: String,
+ val yearDayMonthHour: String,
+ val pv: Long,
+ val uv: Long
+ )
+
+object ChannelPvUvTaskMerge {
+
+ def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
+
+ // 1、转换
+ val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.flatMap{
+ clickLogWide => {
+ List(
+ ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour, clickLogWide.count, clickLogWide.isHourNew) ,
+ ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDay, clickLogWide.count, clickLogWide.isDayNew) ,
+ ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonth, clickLogWide.count, clickLogWide.isMonthNew)
+ )
+ }
+ }
+
+ // 2、分组
+ val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{
+ channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour
+ }
+
+ // 3、窗口
+ val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] =
+ keyedStream.timeWindow(Time.seconds(3))
+
+
+ // 4、聚合
+ val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{
+ (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv)
+ }
+
+ // 5、HBase 落地
+ reduceDataStream.addSink(new SinkFunction[ChannelPvUv] {
+
+ override def invoke(value: ChannelPvUv): Unit = {
+
+ // HBase 相关字段
+ val tableName = "channel_pvuv"
+ val clfName = "info"
+ val channelIdColumn = "channelId"
+ val yearMonthDayHourColumn = "yearMonthDayHour"
+ val pvColumn = "pv"
+ val uvColumn = "uv"
+
+ val rowkey = value.channelId + ":" + value.yearDayMonthHour
+
+ // 查询 HBase ,并且获取相关记录
+ val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn)
+ val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn)
+
+ var totalPv = 0L
+ var totalUv = 0L
+
+ // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加
+ if(StringUtils.isBlank(pvInHBase)){
+ totalPv = value.pv
+ }
+ else {
+ totalPv = pvInHBase.toLong + value.pv
+ }
+
+ // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加
+ if(StringUtils.isBlank(uvInHBase)){
+ totalUv = value.uv
+ }
+ else {
+ totalUv = uvInHBase.toLong + value.uv
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+
+ channelIdColumn -> value.channelId ,
+ yearMonthDayHourColumn -> value.yearDayMonthHour ,
+ pvColumn -> value.pv.toString ,
+ uvColumn -> value.uv.toString
+ ))
+
+ }
+ })
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala
new file mode 100644
index 0000000..42a72f9
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala
@@ -0,0 +1,88 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.ClickLogWide
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream}
+import org.apache.flink.api.scala._
+import org.apache.flink.streaming.api.functions.sink.SinkFunction
+import org.apache.flink.streaming.api.scala.KeyedStream
+import org.apache.flink.streaming.api.windowing.time.Time
+import org.apache.flink.streaming.api.windowing.windows.TimeWindow
+import org.apache.commons.lang.StringUtils
+
+
+/**
+ * @Author: Henry
+ * @Description: 频道热点分析业务开发
+ * 1、字段转换;
+ * 2、分组;
+ * 3、时间窗口;
+ * 4、聚合;
+ * 5、落地HBase
+ * @Date: Create in 2019/10/29 20:22
+ **/
+
+case class ChannelRealHot(var channelid:String, var visited:Long)
+
+
+object ChannelRealHotTask {
+
+ def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
+
+ // 1、字段转换 channelid、visited
+ val realHotDataStream: DataStream[ChannelRealHot] = clickLogWideDateStream.map{
+ clickLogWide: ClickLogWide =>
+ ChannelRealHot(clickLogWide.channelID, clickLogWide.count)
+ }
+
+ // 2、分组
+ val keyedStream: KeyedStream[ChannelRealHot, String] = realHotDataStream.keyBy(_.channelid)
+
+
+ // 3、时间窗口
+ val windowedStream: WindowedStream[ChannelRealHot, String, TimeWindow] = keyedStream.timeWindow(
+ Time.seconds(3))
+
+ // 4、聚合
+ val reduceDataStream: DataStream[ChannelRealHot] = windowedStream.reduce{
+ (t1: ChannelRealHot, t2: ChannelRealHot) =>
+ ChannelRealHot(t1.channelid, t1.visited + t2.visited)
+ }
+ // 输出测试
+ reduceDataStream
+
+ // 5、落地 HBase
+ reduceDataStream.addSink(new SinkFunction[ChannelRealHot] {
+
+ override def invoke(value: ChannelRealHot): Unit = {
+
+ // HBase 相关字段
+ val tableName = "channel"
+ val clfName = "info"
+ val channelIdColumn = "channelId"
+ val visitedColumn = "visited"
+ val rowkey = value.channelid
+
+
+ // 查询 HBase ,并且获取相关记录
+ val visitedValue: String = HBaseUtil.getData(tableName, rowkey, clfName, visitedColumn)
+ // 创建总数的临时变量
+ var totalCount: Long = 0
+
+ if(StringUtils.isBlank(visitedValue)){
+ totalCount = value.visited
+ }
+ else {
+ totalCount = visitedValue.toLong + value.visited
+ }
+
+ // 保存数据
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+ channelIdColumn -> value.channelid ,
+ visitedColumn -> totalCount.toString
+ ))
+ }
+ })
+ }
+
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala b/real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala
new file mode 100644
index 0000000..d5453be
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala
@@ -0,0 +1,165 @@
+package com.henry.realprocess.task
+
+import com.henry.realprocess.bean.{ClickLogWide, Message}
+import com.henry.realprocess.util.HBaseUtil
+import org.apache.commons.lang.StringUtils
+import org.apache.commons.lang.time.FastDateFormat
+import org.apache.flink.streaming.api.scala.DataStream
+import org.apache.flink.api.scala._
+
+/**
+ * @Author: Henry
+ * @Description: 预处理任务
+ * @Date: Create in 2019/10/27 14:31
+ **/
+object PreprocessTask {
+
+
+ def process(watermarkDataStream:DataStream[Message])= {
+
+ /**
+ * 大括号{}用于代码块,计算结果是代码最后一行;
+ * 大括号{}用于拥有代码块的函数;
+ * 大括号{}在只有一行代码时可以省略,除了case语句(Scala适用);
+ * 小括号()在函数只有一个参数时可以省略(Scala适用);
+ * 几乎没有二者都省略的情况。
+ */
+ watermarkDataStream.map {
+
+ msg =>
+ // 转换时间
+ val yearMonth: String = FastDateFormat.getInstance("yyyyMM").format(msg.timeStamp)
+ val yearMonthDay: String = FastDateFormat.getInstance("yyyyMMdd").format(msg.timeStamp)
+ val yearMonthDayHour: String = FastDateFormat.getInstance("yyyyMMddHH").format(msg.timeStamp)
+
+ // 转换地区
+ val address = msg.clickLog.country + msg.clickLog.province + msg.clickLog.city
+
+ val isNewtuple = isNewProcess(msg)
+
+ ClickLogWide(
+ msg.clickLog.channelID,
+ msg.clickLog.categoryID,
+ msg.clickLog.produceID,
+ msg.clickLog.country,
+ msg.clickLog.province,
+ msg.clickLog.city,
+ msg.clickLog.network,
+ msg.clickLog.source,
+ msg.clickLog.browserType,
+ msg.clickLog.entryTime,
+ msg.clickLog.leaveTime,
+ msg.clickLog.userID,
+ msg.count,
+ msg.timeStamp,
+ address,
+ yearMonth,
+ yearMonthDay,
+ yearMonthDayHour,
+ isNewtuple._1,
+ isNewtuple._2,
+ isNewtuple._3,
+ isNewtuple._4
+ )
+ }
+
+ }
+
+ /**
+ * 判断用户是否为新用户
+ * @param msg
+ */
+ private def isNewProcess(msg:Message)={
+
+ // 1、定义4个变量,初始化为0
+ var isNew = 0
+ var isHourNew = 0
+ var isDayNew = 0
+ var isMonthNew = 0
+
+
+ // 2、从HBase中查询用户记录,如果有记录,再去判断其他时间;如果没有记录,则证明是新用户
+ val tableName = "user_history"
+ var clfName = "info"
+ var rowkey = msg.clickLog.userID + ":" + msg.clickLog.channelID
+
+ // - 用户ID(userID)
+ var userIdColumn = "userid"
+ // - 频道ID(channelid)
+ var channelidColumn = "channelid"
+ // - 最后访问时间(时间戳)(lastVisitedTime)
+ var lastVisitedTimeColumn = "lastVisitedTime"
+
+
+ var userId: String = HBaseUtil.getData(tableName, rowkey, clfName, userIdColumn)
+ var channelid: String = HBaseUtil.getData(tableName, rowkey, clfName, channelidColumn)
+ var lastVisitedTime: String = HBaseUtil.getData(tableName, rowkey, clfName, lastVisitedTimeColumn)
+
+
+ // 如果 userid 为空,则该用户一定是新用户
+ if(StringUtils.isBlank(userId)){
+ isNew = 1
+ isHourNew = 1
+ isDayNew = 1
+ isMonthNew = 1
+
+ // 保存用户的访问记录到 "user_history"
+ HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
+ userIdColumn -> msg.clickLog.userID ,
+ channelidColumn -> msg.clickLog.channelID ,
+ lastVisitedTimeColumn -> msg.timeStamp
+ ))
+ }
+ else{
+ isNew = 0
+ // 其它字段需要进行时间戳的比对
+ isHourNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMddHH")
+ isDayNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMdd")
+ isMonthNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMM")
+
+ // 更新 "user_history" 用户的时间戳
+ HBaseUtil.putData(tableName, rowkey, clfName, lastVisitedTimeColumn , msg.timeStamp.toString)
+
+ }
+
+ (isDayNew, isHourNew, isDayNew, isMonthNew)
+ }
+
+
+ /**
+ * 比对时间: 201912 > 201911
+ * @param currentTime 当前时间
+ * @param historyTime 历史时间
+ * @param format 时间格式: yyyyMM yyyyMMdd
+ * @return 1 或者 0
+ */
+ def compareDate(currentTime:Long, historyTime:Long, format:String):Int={
+
+ val currentTimeStr:String = timestamp2Str(currentTime, format)
+ val historyTimeStr:String = timestamp2Str(historyTime, format)
+
+ // 比对字符串大小,如果当前时间 > 历史时间,返回1
+ var result:Int = currentTimeStr.compareTo(historyTimeStr)
+
+ if(result > 0){
+ result = 1
+ }
+ else {
+ result = 0
+ }
+ result
+ }
+
+ /**
+ * 转换日期
+ * @param timestamp Long 类型时间戳
+ * @param format 日期格式
+ * @return
+ */
+ def timestamp2Str(timestamp:Long, format:String):String={
+ FastDateFormat.getInstance("yyyyMM").format(timestamp)
+ }
+
+
+
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala b/real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala
new file mode 100644
index 0000000..a42e4bc
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala
@@ -0,0 +1,33 @@
+package com.henry.realprocess.util
+
+import com.typesafe.config.{Config, ConfigFactory}
+
+/**
+ * @Author: Henry
+ * @Description: 配置文件加载类
+ * @Date: Create in 2019/10/15 23:42
+ **/
+object GlobalConfigutil {
+
+ // 通过工厂加载配置, config 会自动加载 application.conf 文件,文件名不能变
+ val config:Config = ConfigFactory.load()
+
+ val bootstrapServers = config.getString("bootstrap.servers")
+ val zookeeperConnect = config.getString("zookeeper.connect")
+ val inputTopic = config.getString("input.topic")
+ val gruopId = config.getString("gruop.id")
+ val enableAutoCommit = config.getString("enable.auto.commit")
+ val autoCommitIntervalMs = config.getString("auto.commit.interval.ms")
+ val autoOffsetReset = config.getString("auto.offset.reset")
+
+ def main(args: Array[String]): Unit = {
+ // 选择快捷键,alt,鼠标左键拉倒最后一行,然后按 ctrl+shift 键,再按 →
+ println(bootstrapServers)
+ println(zookeeperConnect)
+ println(inputTopic)
+ println(gruopId)
+ println(enableAutoCommit)
+ println(autoCommitIntervalMs)
+ println(autoOffsetReset)
+ }
+}
diff --git a/real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala b/real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala
new file mode 100644
index 0000000..f3c9d9d
--- /dev/null
+++ b/real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala
@@ -0,0 +1,274 @@
+package com.henry.realprocess.util
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
+import org.apache.hadoop.hbase.client.{ColumnFamilyDescriptor, _}
+import org.apache.hadoop.hbase.util.Bytes
+
+/**
+ * @Author: Henry
+ * @Description: HBase 工具类
+ * 1、获取Table对象
+ * 2、保存单列数据
+ * 3、查询单列数据
+ * 4、保存多列数据
+ * 5、查询多列数据
+ * 6、删除数据
+ * @Date: Create in 2019/10/21 22:53
+ **/
+object HBaseUtil {
+
+ // HBase 配置类,不需要指定配置文件名,文件名要求是 hbase-site.xml
+ val conf:Configuration = HBaseConfiguration.create()
+
+ // HBase 的连接
+ val conn:Connection = ConnectionFactory.createConnection(conf)
+
+ // HBase 的操作 API
+ val admin:Admin = conn.getAdmin
+
+ /**
+ * 返回Table,如果不存在,则创建表
+ *
+ * @param tableName 表名
+ * @param columnFamilyName 列族名
+ * @return
+ */
+ def getTable(tableNameStr:String, columnFamilyName:String):Table={
+
+
+ // 获取 TableName
+ val tableName:TableName = TableName.valueOf(tableNameStr)
+
+ // 如果表不存在,则创建表
+
+ if(!admin.tableExists(tableName)){
+
+ // 构建出表的描述的建造者
+ val descBuilder: TableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName)
+
+ val familyDescriptor:ColumnFamilyDescriptor = ColumnFamilyDescriptorBuilder
+ .newBuilder(columnFamilyName.getBytes).build()
+
+ // 给表添加列族
+ descBuilder.setColumnFamily(familyDescriptor)
+
+ // 创建表
+ admin.createTable(descBuilder.build())
+ }
+
+ conn.getTable(tableName)
+
+ }
+
+ /**
+ * 存储单列数据
+ *
+ * @param tableNameStr 表名
+ * @param rowkey 主键
+ * @param columnFamilyName 列族名
+ * @param columnName 列名
+ * @param columnValue 列值
+ */
+ def putData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String, columnValue:String)={
+
+ // 获取表
+ val table:Table = getTable(tableNameStr, columnFamilyName)
+
+ try{
+ // Put
+ val put:Put = new Put(rowkey.getBytes)
+ put.addColumn(columnFamilyName.getBytes, columnName.getBytes, columnValue.getBytes)
+
+ // 保存数据
+ table.put(put)
+ }catch {
+ case ex:Exception=>{
+ ex.printStackTrace()
+ }
+ }finally {
+ table.close()
+ }
+ }
+
+
+ /**
+ * 通过单列名获取列值
+ * @param tableNameStr 表名
+ * @param rowkey 主键
+ * @param columnFamilyName 列族名
+ * @param columnName 列名
+ * @param columnValue 列值
+ * @return
+ */
+ def getData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String):String={
+
+ // 1. 获取 Table 对象
+ val table = getTable(tableNameStr, columnFamilyName)
+
+ try {
+ // 2. 构建 get 对象
+ val get = new Get(rowkey.getBytes)
+
+ // 3. 进行查询
+ val result:Result = table.get(get)
+
+ // 4. 判断查询结果是否为空,并且包含要查询的列
+ if (result != null && result.containsColumn(columnFamilyName.getBytes, columnName.getBytes)){
+ val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), columnName.getBytes)
+
+ Bytes.toString(bytes)
+ }else{
+ ""
+ }
+
+ }catch{
+ case ex:Exception => {
+ ex.printStackTrace()
+ ""
+ }
+ }finally {
+ // 5、关闭表
+ table.close()
+ }
+
+ }
+
+
+ /**
+ * 存储多列数据
+ * @param tableNameStr 表名
+ * @param rowkey 主键
+ * @param columnFamilyName 列族名
+ * @param map 多个列名和列族集合
+ */
+ def putMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, map:Map[String,Any])={
+
+ // 1、获取 table 对象
+ val table = getTable(tableNameStr, columnFamilyName)
+
+ try{
+ // 2、创建 put
+ val put = new Put(rowkey.getBytes)
+
+ // 3、在 put 中添加多个列名和列值
+ for ((colName, colValue) <- map){
+ put.addColumn(columnFamilyName.getBytes, colName.getBytes, colValue.toString.getBytes)
+ }
+
+ // 4、保存 put
+ table.put(put)
+
+ }catch{
+ case ex:Exception => {
+ ex.printStackTrace()
+
+ }
+ }finally {
+ // 5、关闭表
+ table.close()
+ }
+
+
+ // 5、关闭 table
+ table.close()
+ }
+
+
+ /**
+ * 获取多了数据的值
+ * @param tableNameStr 表名
+ * @param rowkey 主键
+ * @param columnFamilyName 列族名
+ * @param columnNameList 多个列名和列值集合
+ * @return
+ */
+ def getMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnNameList:List[String]):Map[String,String]= {
+
+ // 1、获取 Table
+ val table = getTable(tableNameStr, columnFamilyName)
+
+ try{
+ // 2、构建 get
+ val get = new Get(rowkey.getBytes)
+
+ // 3、执行查询
+ val result: Result = table.get(get)
+
+ // 4、遍历列名集合,取出列值,构建成 Map 返回
+ columnNameList.map {
+ col =>
+ val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), col.getBytes)
+
+ if (bytes != null && bytes.size > 0) {
+ col -> Bytes.toString(bytes)
+ }
+ else { // 如果取不到值,则赋一个空串
+ "" -> ""
+ }
+ }.filter(_._1 != "").toMap // 把不是空串的过滤出来,再转换成 Map
+
+ }catch {
+ case ex:Exception => {
+ ex.printStackTrace()
+ Map[String, String]() // 返回一个空的 Map
+ }
+ }finally {
+ // 5、关闭 Table
+ table.close()
+ }
+ }
+
+
+ /**
+ * 删除数据
+ * @param tableNameStr 表名
+ * @param rowkey 主键
+ * @param columnFamilyName 列族名
+ */
+ def delete(tableNameStr:String, rowkey:String, columnFamilyName:String)={
+
+ // 1、获取 Table
+ val table:Table = getTable(tableNameStr, columnFamilyName)
+
+ try {
+ // 2、构建 delete 对象
+ val delete: Delete = new Delete(rowkey.getBytes)
+
+ // 3、执行删除
+ table.delete(delete)
+
+ }
+ catch {
+ case ex:Exception =>
+ ex.printStackTrace()
+ }
+ finally {
+ // 4、关闭 table
+ table.close()
+ }
+
+ }
+
+
+ def main(args: Array[String]): Unit = {
+
+// println(getTable("test","info"))
+// putData("test", "1", "info", "t1", "hello world")
+// println(getData("test", "1", "info", "t1"))
+
+// val map = Map(
+// "t2" -> "scala" ,
+// "t3" -> "hive" ,
+// "t4" -> "flink"
+// )
+// putMapData("test", "1", "info", map)
+
+// println(getMapData("test", "1", "info", List("t1", "t2")))
+
+ delete("test", "1", "info")
+ println(getMapData("test", "1", "info", List("t1", "t2")))
+
+ }
+
+}
diff --git a/real-process/src/test/temp.txt b/real-process/src/test/temp.txt
new file mode 100644
index 0000000..98af405
--- /dev/null
+++ b/real-process/src/test/temp.txt
@@ -0,0 +1,7 @@
+val bootstrap.servers = config.getString("bootstrap.servers")
+val zookeeper.connect = config.getString("zookeeper.connect")
+val input.topic = config.getString("input.topic")
+val gruop.id = config.getString("gruop.id")
+val enable.auto.commit = config.getString("enable.auto.commit")
+val auto.commit.interval.ms = config.getString("auto.commit.interval.ms")
+val auto.offset.reset = config.getString("auto.offset.reset")
\ No newline at end of file
diff --git a/report/pom.xml b/report/pom.xml
new file mode 100644
index 0000000..f9eafd6
--- /dev/null
+++ b/report/pom.xml
@@ -0,0 +1,101 @@
+
+
+
+ 4.0.0
+ com.henry
+ report
+ 1.0-SNAPSHOT
+ jar
+ report
+ Spring Boot,上报服务
+
+
+
+ org.springframework.boot
+ spring-boot-starter-parent
+ 1.5.13.RELEASE
+
+
+
+
+ UTF-8
+ UTF-8
+ 1.8
+ Greenwich.M3
+
+
+
+
+
+
+ alimaven
+ alimaven
+ http://maven.aliyun.com/nexus/content/groups/public/
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-starter
+ 1.5.13.RELEASE
+
+
+ org.springframework.boot
+ spring-boot-starter-test
+ 1.5.13.RELEASE
+
+
+
+ org.springframework.boot
+ spring-boot-starter-web
+ 2.5.12
+
+
+
+ org.springframework.boot
+ spring-boot-starter-tomcat
+ 1.5.13.RELEASE
+
+
+
+ org.apache.tomcat
+ tomcat-catalina
+ 8.5.35
+
+
+
+ com.alibaba
+ fastjson
+ 1.2.83
+
+
+
+ org.springframework.kafka
+ spring-kafka
+ 1.0.6.RELEASE
+
+
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.5.13
+
+
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+
+
+
+
+
+
\ No newline at end of file
diff --git a/report/src/main/java/com/henry/report/ReportApplication.java b/report/src/main/java/com/henry/report/ReportApplication.java
new file mode 100644
index 0000000..e7212dc
--- /dev/null
+++ b/report/src/main/java/com/henry/report/ReportApplication.java
@@ -0,0 +1,18 @@
+package com.henry.report;
+
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.boot.SpringApplication;
+/**
+ * @Author: HongZhen
+ * @Description:
+ * @Date: Create in 2019/9/20 11:10
+ **/
+
+// 添加注解 @SpringBootApplication ,表示该类是一个启动类
+@SpringBootApplication
+public class ReportApplication {
+
+ public static void main(String[] args) {
+ SpringApplication.run(ReportApplication.class, args);
+ }
+}
diff --git a/report/src/main/java/com/henry/report/bean/Clicklog.java b/report/src/main/java/com/henry/report/bean/Clicklog.java
new file mode 100644
index 0000000..a3a8fda
--- /dev/null
+++ b/report/src/main/java/com/henry/report/bean/Clicklog.java
@@ -0,0 +1,136 @@
+package com.henry.report.bean;
+
+/**
+ * @Author: Henry
+ * @Description: 点击流日志
+ * @Date: Create in 2019/10/13 19:33
+ **/
+
+public class Clicklog {
+
+ // 频道 ID
+ private long channelID;
+ // 产品的类别 ID
+ private long categoryID ;
+ // 产品 ID
+ private long produceID ;
+ // 用户 ID
+ private long userID ;
+
+ // 国家
+ private String country;
+ // 省份
+ private String province;
+ // 城市
+ private String city;
+
+ // 网络方式
+ private String network;
+ // 来源方式
+ private String source;
+
+ // 浏览器类型
+ private String browserType;
+
+ // 进入网站时间
+ private Long entryTime ;
+ // 离开网站实际
+ private long leaveTime;
+
+ public long getChannelID() {
+ return channelID;
+ }
+
+ public void setChannelID(long channelID) {
+ this.channelID = channelID;
+ }
+
+ public long getCategoryID() {
+ return categoryID;
+ }
+
+ public void setCategoryID(long categoryID) {
+ this.categoryID = categoryID;
+ }
+
+ public long getProduceID() {
+ return produceID;
+ }
+
+ public void setProduceID(long produceID) {
+ this.produceID = produceID;
+ }
+
+ public long getUserID() {
+ return userID;
+ }
+
+ public void setUserID(long userID) {
+ this.userID = userID;
+ }
+
+ public String getCountry() {
+ return country;
+ }
+
+ public void setCountry(String country) {
+ this.country = country;
+ }
+
+ public String getProvince() {
+ return province;
+ }
+
+ public void setProvince(String province) {
+ this.province = province;
+ }
+
+ public String getCity() {
+ return city;
+ }
+
+ public void setCity(String city) {
+ this.city = city;
+ }
+
+ public String getNetwork() {
+ return network;
+ }
+
+ public void setNetwork(String network) {
+ this.network = network;
+ }
+
+ public String getSource() {
+ return source;
+ }
+
+ public void setSource(String source) {
+ this.source = source;
+ }
+
+ public String getBrowserType() {
+ return browserType;
+ }
+
+ public void setBrowserType(String browserType) {
+ this.browserType = browserType;
+ }
+
+ public Long getEntryTime() {
+ return entryTime;
+ }
+
+ public void setEntryTime(Long entryTime) {
+ this.entryTime = entryTime;
+ }
+
+ public long getLeaveTime() {
+ return leaveTime;
+ }
+
+ public void setLeaveTime(long leaveTime) {
+ this.leaveTime = leaveTime;
+ }
+
+}
diff --git a/report/src/main/java/com/henry/report/bean/Message.java b/report/src/main/java/com/henry/report/bean/Message.java
new file mode 100644
index 0000000..c1cad29
--- /dev/null
+++ b/report/src/main/java/com/henry/report/bean/Message.java
@@ -0,0 +1,51 @@
+package com.henry.report.bean;
+
+/**
+ * @Author: Henry
+ * @Description: 消息实体类
+ * @Date: Create in 2019/10/11 23:40
+ **/
+public class Message {
+
+ // 消息次数
+ private int count;
+
+ // 消息的时间戳
+ private long timestamp;
+
+ // 消息体
+ private String message;
+
+ public int getCount() {
+ return count;
+ }
+
+ public void setCount(int count) {
+ this.count = count;
+ }
+
+ public long getTimestamp() {
+ return timestamp;
+ }
+
+ public void setTimestamp(long timestamp) {
+ this.timestamp = timestamp;
+ }
+
+ public String getMessage() {
+ return message;
+ }
+
+ public void setMessage(String message) {
+ this.message = message;
+ }
+
+ @Override
+ public String toString() {
+ return "Message{" +
+ "count=" + count +
+ ", timestamp=" + timestamp +
+ ", message='" + message + '\'' +
+ '}';
+ }
+}
diff --git a/report/src/main/java/com/henry/report/controller/ReportController.java b/report/src/main/java/com/henry/report/controller/ReportController.java
new file mode 100644
index 0000000..a06b0b8
--- /dev/null
+++ b/report/src/main/java/com/henry/report/controller/ReportController.java
@@ -0,0 +1,53 @@
+package com.henry.report.controller;
+
+import com.alibaba.fastjson.JSON;
+import com.henry.report.bean.Message;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.kafka.core.KafkaTemplate;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @Author: Henry
+ * @Description:
+ * @Date: Create in 2019/10/11 23:43
+ **/
+
+// 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解
+@RestController
+public class ReportController {
+
+ @Autowired
+ KafkaTemplate kafkaTemplate;
+
+ @RequestMapping("/receive")
+ public Map receive(@RequestBody String json) {
+
+ Map map = new HashMap(); // 记录是否发送成功
+
+ try {
+ // 构建 Message
+ Message msg = new Message();
+ msg.setMessage(json);
+ msg.setCount(1);
+ msg.setTimestamp(System.currentTimeMillis());
+
+ String msgJSON = JSON.toJSONString(msg);
+
+ // 发送 Message 到 Kafka
+ kafkaTemplate.send("pyg", msgJSON);
+ map.put("success", "ture");
+
+ }catch (Exception ex){
+ ex.printStackTrace();
+ map.put("success", "false");
+ }
+
+ return map;
+ }
+
+}
diff --git a/report/src/main/java/com/henry/report/controller/TestController.java b/report/src/main/java/com/henry/report/controller/TestController.java
new file mode 100644
index 0000000..d97799a
--- /dev/null
+++ b/report/src/main/java/com/henry/report/controller/TestController.java
@@ -0,0 +1,22 @@
+package com.henry.report.controller;
+
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+/**
+ * @Author: HongZhen
+ * @Description: Spring Boot 测试
+ * @Date: Create in 2019/9/20 11:19
+ **/
+
+// 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解
+@RestController
+public class TestController{
+
+// 为了能访问到该方法,需要添加如下注解,参数是代表如何来请求
+ @RequestMapping("/test")
+ public String test(String json){
+ System.out.println(json);
+ return json;
+ }
+}
diff --git a/report/src/main/java/com/henry/report/util/ClickLogGenerator.java b/report/src/main/java/com/henry/report/util/ClickLogGenerator.java
new file mode 100644
index 0000000..d7c33ed
--- /dev/null
+++ b/report/src/main/java/com/henry/report/util/ClickLogGenerator.java
@@ -0,0 +1,139 @@
+package com.henry.report.util;
+
+import com.alibaba.fastjson.JSONObject;
+import com.henry.report.bean.Clicklog;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.util.EntityUtils;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * @Author: Henry
+ * @Description: 点击流日志模拟器
+ * @Date: Create in 2019/10/13 20:00
+ **/
+public class ClickLogGenerator {
+
+ // ID 信息
+ private static Long[] channelID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
+ private static Long[] categoryID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
+ private static Long[] produceID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
+ private static Long[] userID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
+
+ // 地区
+ private static String[] contrys = new String[]{"china"}; // 地区-国家集合
+ private static String[] provinces = new String[]{"HeNan", "HeBeijing"}; // 地区-省集合
+ private static String[] citys = new String[]{"ShiJiaZhuang", "ZhengZhou", "LuoyYang"}; // 地区-市集合
+
+ // 网络方式
+ private static String[] networks = new String[]{"电信", "移动", "联通"};
+
+ // 来源方式
+ private static String[] sources = new String[]{"直接输入", "百度跳转", "360搜索跳转", "必应跳转"};
+
+ // 浏览器
+ private static String[] browser = new String[]{"火狐", "QQ浏览器", "360浏览器", "谷歌浏览器"};
+
+ // 打开方式,离开时间
+ private static List usertimeLog = producetimes();
+
+ // 获取时间
+ private static List producetimes() {
+ List usertimelog = new ArrayList<>();
+ for (int i = 0; i < 100; i++) {
+ Long[] timearray = gettimes("2019-10-10 24:60:60:000");
+ usertimelog.add(timearray);
+ }
+ return usertimelog;
+ }
+
+ private static Long[] gettimes(String time) {
+ DateFormat dataFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss:SSS");
+ try {
+ Date date = dataFormat.parse(time);
+ long timetemp = date.getTime();
+ Random random = new Random();
+ int randomint = random.nextInt(10);
+ long starttime = timetemp - randomint*3600*1000;
+ long endtime = starttime + randomint*3600*1000;
+ return new Long[]{starttime,endtime};
+ }catch (ParseException e){
+ e.printStackTrace();
+ }
+ return new Long[]{0L, 0L};
+ }
+
+ // 模拟发送 Http 请求到上报服务系统
+ public static void send(String url, String json){
+ try {
+ CloseableHttpClient httpClient = HttpClientBuilder.create().build();
+ HttpPost post = new HttpPost(url);
+ JSONObject response = null ;
+ try {
+ StringEntity s = new StringEntity(json.toString(), "utf-8");
+ s.setContentEncoding("utf-8");
+ // 发送 json 数据需要设置 contentType
+ s.setContentType("application/json");
+ post.setEntity(s);
+
+ HttpResponse res = httpClient.execute(post);
+ if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
+ // 返回 json 格式
+ String result = EntityUtils.toString(res.getEntity());
+ System.out.println(result);
+ }
+ }catch (Exception e){
+ throw new RuntimeException();
+
+ }
+
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ public static void main(String[] args) {
+ Random random = new Random();
+ for (int i = 0; i < 100; i++) {
+ // 频道id、类别id、产品id、用户id、打开时间、离开时间、地区、网络方式、来源方式、浏览器
+ Clicklog clicklog = new Clicklog();
+
+ clicklog.setChannelID(channelID[random.nextInt(channelID.length)]);
+ clicklog.setCategoryID(categoryID[random.nextInt(channelID.length)]);
+ clicklog.setProduceID(produceID[random.nextInt(produceID.length)]);
+ clicklog.setUserID(userID[random.nextInt(userID.length)]);
+ clicklog.setCountry(contrys[random.nextInt(contrys.length)]);
+ clicklog.setProvince(provinces[random.nextInt(provinces.length)]);
+ clicklog.setCity(citys[random.nextInt(citys.length)]);
+ clicklog.setNetwork(networks[random.nextInt(networks.length)]);
+ clicklog.setSource(sources[random.nextInt(sources.length)]);
+ clicklog.setBrowserType(browser[random.nextInt(browser.length)]);
+
+ Long[] times = usertimeLog.get(random.nextInt(usertimeLog.size()));
+ clicklog.setEntryTime(times[0]);
+ clicklog.setLeaveTime(times[1]);
+
+ // 将点击流日志转成字符串,发送到前端地址
+ String jsonstr = JSONObject.toJSONString(clicklog);
+ System.out.println(jsonstr);
+ try {
+ Thread.sleep(100);
+ }catch (InterruptedException e){
+ e.printStackTrace();
+ }
+
+ send("http://localhost:1234/receive", jsonstr);
+ }
+ }
+}
diff --git a/report/src/main/java/com/henry/report/util/KafkaProducerConfig.java b/report/src/main/java/com/henry/report/util/KafkaProducerConfig.java
new file mode 100644
index 0000000..aec7c27
--- /dev/null
+++ b/report/src/main/java/com/henry/report/util/KafkaProducerConfig.java
@@ -0,0 +1,98 @@
+package com.henry.report.util;
+
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.kafka.core.DefaultKafkaProducerFactory;
+import org.springframework.kafka.core.KafkaTemplate;
+import org.springframework.kafka.core.ProducerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @Author: Henry
+ * @Description: KafkaProducerConfig
+ * @Date: Create in 2019/10/6 21:56
+ **/
+
+@Configuration // 1、表示该类是一个配置类,这样在下面才能创建 Bean
+public class KafkaProducerConfig {
+
+ // 通过@value注解将配置文件中kafka.bootstrap_servers_config的值赋值给成员变量
+ @Value("${kafka.bootstrap_servers_config}")
+ private String bootstrap_servers_config;
+ // 如果出现发送失败的情况,允许重试的次数
+ @Value("${kafka.retries_config}")
+ private String retries_config;
+ // 每个批次发送多大的数据,单位:字节
+ @Value("${kafka.batch_size_config}")
+ private String batch_size_config;
+ // 定时发送,达到 1ms 发送
+ @Value("${kafka.linger_ms_config}")
+ private String linger_ms_config;
+ // 缓存的大小,单位:字节
+ @Value("${kafka.buffer_memory_config}")
+ private String buffer_memory_config;
+ // TOPOC 名字
+ @Value("${kafka.topic}")
+ private String topic;
+
+
+ @Bean // 2、表示该对象是受 Spring 管理的一个 Bean
+ public KafkaTemplate kafkaTemplate() {
+
+ // 构建工程需要的配置
+ Map configs = new HashMap<>();
+
+ // 3、设置相应的配置
+ // 将成员变量的值设置到Map中,在创建kafka_producer中用到
+ configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers_config);
+ configs.put(ProducerConfig.RETRIES_CONFIG, retries_config);
+ configs.put(ProducerConfig.BATCH_SIZE_CONFIG, batch_size_config);
+ configs.put(ProducerConfig.LINGER_MS_CONFIG, linger_ms_config);
+ configs.put(ProducerConfig.BUFFER_MEMORY_CONFIG, buffer_memory_config);
+
+ // 设置 key、value 的序列化器
+ configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
+ configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
+
+ // 指定自定义分区
+ configs.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, RoundRobinPartitioner.class);
+
+
+ // 4、创建生产者工厂
+ ProducerFactory producerFactory = new DefaultKafkaProducerFactory(configs);
+
+ // 5、再把工厂传递给Template构造方法
+ // 表示需要返回一个 kafkaTemplate 对象
+ return new KafkaTemplate(producerFactory);
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java b/report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java
new file mode 100644
index 0000000..aefd85d
--- /dev/null
+++ b/report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java
@@ -0,0 +1,46 @@
+package com.henry.report.util;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * @Author: Henry
+ * @Description: 自定义分区
+ * @Date: Create in 2019/10/9 23:00
+ **/
+
+public class RoundRobinPartitioner implements Partitioner {
+
+ // AtomicInteger 并发包下的多线程安全的整型类
+ AtomicInteger counter = new AtomicInteger(0) ;
+
+
+ // 返回值为分区号: 0、1、2
+ @Override
+ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+
+ // 获取分区的数量
+ Integer partitions = cluster.partitionCountForTopic(topic) ;
+
+ int curpartition = counter.incrementAndGet() % partitions ; // 当前轮询的 partition 号
+
+ if(counter.get() > 65535){
+ counter.set(0);
+ }
+
+ return curpartition;
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ @Override
+ public void configure(Map map) {
+
+ }
+}
diff --git a/report/src/main/resources/application.properties b/report/src/main/resources/application.properties
new file mode 100644
index 0000000..a196e2b
--- /dev/null
+++ b/report/src/main/resources/application.properties
@@ -0,0 +1,18 @@
+# Tomcat ˿ں
+server.port=1234
+
+#
+# Kafka
+#
+# kafkaķַ
+kafka.bootstrap_servers_config=master:9092,slave1:9092,slave2:9092
+# ַʧܵԵĴ
+kafka.retries_config=0
+# ÿηͶݣλֽ
+kafka.batch_size_config=4096
+# ʱͣﵽ 1ms
+kafka.linger_ms_config=1
+# ĴСλֽ
+kafka.buffer_memory_config=40960
+# TOPOC
+kafka.topic=pyg
\ No newline at end of file
diff --git a/report/src/test/java/com/henry/report/KafkaTest.java b/report/src/test/java/com/henry/report/KafkaTest.java
new file mode 100644
index 0000000..6e21e36
--- /dev/null
+++ b/report/src/test/java/com/henry/report/KafkaTest.java
@@ -0,0 +1,29 @@
+package com.henry.report;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.kafka.core.KafkaTemplate;
+import org.springframework.test.context.junit4.SpringRunner;
+
+/**
+ * @Author: Henry
+ * @Description: 测试Kafka
+ * @Date: Create in 2019/10/8 23:26
+ **/
+
+@RunWith(SpringRunner.class)
+@SpringBootTest
+public class KafkaTest {
+
+ @Autowired
+ KafkaTemplate kafkaTemplate;
+
+ @Test
+ public void sendMsg(){
+ for (int i = 0; i < 100; i++)
+ kafkaTemplate.send("test", "key","this is test msg") ;
+ }
+
+}
diff --git a/screenshot/036a079d.png b/screenshot/036a079d.png
new file mode 100644
index 0000000000000000000000000000000000000000..99d8a37e7b9d9115a8c016e2f51c4ad3b569e3f0
GIT binary patch
literal 259324
zcmZs?Rajh67bF@2!6is=clY4#?!lq);O;aK9D=*M2Pe2R?(Qy)1b3&A%m2@OGjr$O
zm-C&6(+|D(TD5Cc)s9e6l14$qNBs2Z6N;>igxaT1Uw|L)WALya&y7$$wojjaeUgE_sF)>Nue66-zTKhLrq|u8MWE({zT59J}ycw
z84trD`J4A3i{ew@Cxl<&9BElD=Q-iSMeh}V&hf@SK_lkLFj3*4`Iq{%*|b$uRQRxV
zJv~>5+Sd@|BE*JmzhVhodUiUt={vU4P5SNLx4`v_H2HGd=QgQh)JzS@Gy&WX`I>{L
z`V5;Jo58Dh1i>~f-n)o=HGr^*UFoL*@2slIlOA(1rb&Y`$)gbbBu(maDLYe+YA~ig
zWDmc3qvbb&S!S+&yVzm>!bxtN6^>)oH?8683Nye`H9!)Ak{8(l^6Nx0;I7_XZ9FMj
z|L&Ub;1S=Z-?b^*G$%UhkzlSR^GbR(@tQ_)nN&)hTLes|a(`d5aTfq}$BJ^N
z$anTxgOXU3hnierH4-EL4Ey3lV6&RG(j2H>B0psSP*;#yquxIw4uq9$efI8Vn&&@dq)Osht3=a
zjQmqHRv407T+@ePZ)`WN=QnOzL(ojK!`2>Ys>G(S#l~dBu8%UR&zO4-$nI!LI9oNokI|#=a+RGpI)+SP6@sU$UTv)OJgq*!{H_FgfvGTa(!+cCSbK
zwYKo~{sXwUo4h)ib+`+oD^^ihk1r%F$*`o$hg$*_Ur6vU`VbSN)KOC
z{(*0`q=YyPaw^_jc=o*7q(j|`X1Z!K_UV|T_Sz!QN<`)%CTFc=OaV&|ZO;vR5-0rh
zbsg;5Ljn(kI&W@1@a*ErWg(nQ4RfR%tp%ydj}22x#aMf)M87Vc53_rJoDU(gym$=P@$z`dKICsuvJA1f3s`PY`23j(W4}>TSud6
zwg+to_wA1aW^RvmpF4I-)$$)cIrW}9_sWgx82E|B0pssEPL)Xdr2g4We(IC^$K;B~
z#Gq_vCamdPU1i%&y?lzjY^TKT)*}{D+efxABXw_llVENih-1-CA{CaoZIZS;KT@0U
zz-Noa?Ilbn{~VW`)ELjs)%Hd`eRnTZJHn&gL2;WScENN@%yHS3%S$-vKXI2iIWfkch9eus
z?6MByBOE_kx@0~D>O37eK4s_5_yId)uGYJBh6GmHy@kpDuAQfGg3tz)Hx+m;A00?&
zijlmD97gjA$4)#~i1D5^vs>TLW;N+XBArnG(bI8iabTIo92OWjCDjh3Ur?
zfD#p=5%UlpU(IQ5R`TGe7HVnqb8SCf$OR?5dFQn?0U%COgCzG3#7}QfgDkGnvEr`G+
zXM?H>9-bZQ$K;GgR_w!*A}5w)kZ+eAaKH-E=SKh5j1)!#6Ws$FvYJN5DNH1eop9o+
zGr}Cv>g4nfEkdOx6t308C7-
zbECpP1R_2I#rWL)l_^?)7E$9<*3
z8%O;CH}~6)Jk126A1!tvMmTv8Z6gw{6ST;lt;-n=FJOpns5?kxq}<8!S7*V%Iruq7;W#T
zcMEp`;1s;L%R5PLc)dCtZcImCq^+roSO+co-uuamlYd>)*97|_AuM4Z)e9a2JGNZz
zJMs`R19`JpMz^5L%eJw06oVbGi0jwo$6A7FSF?50EEg
zY=-#0FuTN6M&hgTt=^;5Y`;-a7%i4h@>6ARa|GdD00oU|y(cer5+fO7Z{Qb<8-yD<
zN+Cv_sMcV;Zy>e)+W2w#hNAq#8$qe^7<|D1r6ZeN12SOwp`cq62F;YlcK?wxJCl52
z78Ab|kJJ6G7zm
zxa7CgzNei($?a)P=;7(;fM@>kMVA7K8c3mv9%TE5d#aEo={?+1^y$QKk}{7wfy}2k
zAl<&^(oT~=t+&|>jKQeif*UD^9e+fA?TwBdw{#m~3zhbBM{`UG6#AHpRVar>_By2g%BW@}@zKgJ(_)q<2uwsJo%sRUIHQL#
zxajl}O}ILR5iY$tZLaLS{9^TfM0%TD0GF)cJxj**{d-47woQRdYNBb1qJHKit3Iurs8=ihdx31#&|4o(t;-Zt7Oz%l{VZbd
zm!y>3q1J0eF{;U=oT1`XZ`H<_N^~LB1s7jj%)wz_qh6mDzE
zBx@IZ%`y#(vL7RshD=ykjq#HAT$XpIf(fT*lf0El`;LF*mKeUk2r_@6Q;xk-LfHBv
zkD`*`TNceV`%m+5Si`1P)U+Yd9JPL2_ShnH0xwWF?bnh#UnSv!%|n
z*$)~Ug;~p>yLHq%Y4OmgDe^LLU!#(`xN9nr^fds4+5;~2l{!Y_>S{2ccRr#GLapEY
z7dy~MtoVUFWi@PdI~DGCF$0P=i?Lle$L
zOB{=6#oX$j%4Lzrrhp>wMYD;u)C6Z@jiUnUg^Zm~Ro$bQ{TZopH$da=?w$i0?~NQ+
zjNnW8<#$ox{tx(Z|s{ZR*aXa$qAceO^H(ty^^i8s7lN(-n=GW^68I;RN1^Y@An|X>cR-g
z`$ctd#rL`gdT&Xn2bzP6?5`jpjUU|g&vOm9Sl&^0d)mil_-<3H
z0VDQ0A{DG*1^nhylF4ZNkwc?!L+UU~jC+e7>g<-Bw6hYl?`k=b$rC&69j-g%kFW^K
z1zKm(zPRhal=^>t?o@gtcmQAZOquARyrPo@|NAGUkbXtH3H0R|jLi*=-=#J_pG|fo
zsHpQ?)qA|U`7+%CL%3dt@3KS^xVkndQNO`b?Cj`e^{!Ziu+<`v?3d;ESAdDkIiAmP
zZ2`FjmGbQ_CzcHAS|E0{8?6|36^ie;l&^trU;uORBP4cZD1_xLs<+
zzJ4nXRUT(#V@|0Uys$u%<4$Um%Kxj?u?t+NomWmM&EG`+lNL3)P=IPfimGJSB*P=8cU
z)SdVmCjGQ{CQqm)^2gx0P0-vLz3}rE(Y{wjuSYH;FgAxLWaB4&-OKbBplfZQ!j$Pz
zzH{X=AZ9`OPpug=1YT3Ol1z|mB_D9uX_p(Y%R9QZ!>>P=0C`B=ix*V7wg>mZr>OXpqnudSG6%AZM
zFYQ#URdsbHN<5f*n^pN>XX9D8tW?XV3l)rwk{F_B9k_P%gNT#q8JPdrmXQrrdcdZ4
zD^OQ8`2gR>R3M2fBTL1ksr
zDD{3u@Dj3s_@*=S+3&;s^ko?w>}N%O2x*_|G**9`brsJ=x*_788tOdD&2-{NBH>tPegF;$`Nt{$-vj4=
z(YN*D-X(By3C*M?S&b(lqExi_OqvrehcSlx>1HHHvAK4GEneg;6R}`S#CC|hX1AsL
z@Mlf@^$7y_v73a;DrH2#Uj(Vd_$2Q8;fXa54F_sXTD^LtEp36{p86uvnZLpTVaL!&
zwLcegowkF**zP-Pc8+x30|Ck}G#%OR{>GOr{sR;wg25zJuU`yx-mzg{@4#@ZrW;Uc
z`l!fn6TWGz{z%Ag1eb|}UI!Chri&d%(_W08*LA{1{c(gZ*Qv4F`3@VA+%AI!n?#|Y
zV^Y7T%q>%&WE@REDdoGjgbONXm;*Gg3N`S7xPNA@Slhb@wx(JjHZ>(>=VEMXr`5*-mq7zLM{JjkGkV>eXF)vKVm4y6IJ~m;{a77
zl3er`f+b*ZuF$0-P^JF!7Ua$MadqjPD7GUha;{OgOgCxR>xu>U{(0*vOjs}ga`6Ov
z{(4nY3smOKaKol^s9hv2wwA%{T1d@wygZcE&|H#_Z(`!hqVN2bctF4W@_}pE?Xd`3
zQptFC7Et`78Fj^#2q$A$FjQP$xe}ffizk*9mR*|=^rpV&kE|e82$Prc=Xtp?X*W0`
zNX2A*N$rBJ7Rkh#If?B(i)C8@xctL?yFR?Q+<^E+L5Te@ekkH=e*nc(=}=r^Vr1_7
zv8ur3gwN&ib
zW7lq$(4g}(pR9p?&R6-)xXZFS$Lo{F6(mVk{#R{QI-D*~D?-jqR8K~4Mx8qBhs}4^
z^Vc5b%dpb0o%&wH(z%Gzw?~>iWP$)d!t=`>|3jUh&&apImoJHz4)4#xSIPbmbicQD
z9LGzu@278;Qq~wa(fHIut9kRgkF7}4y84c=cJX&sejT89Hz^AbPAI$U$X7TmC
zT*kwS&i?hHYU;Rs$=1^07l_7}h~R>gl$YnpS<=N#fSAlh4m{FxB7dBWTMJVW=
zkcCwL)_Ck>%!QUD)lrAzhxnLFS=F`L5orM6$1f1q9r_S
zDeBsQ#N;g)VEh>6u#?L0{$)$(@*NDhc_TPCc%JgK9DBGL8u0I4z5b5NX)K3;f2R6)
z$|vwJ=3sjILNxshMPijj<^T%UxhWOd{SKbldP8;S6h!te{E{NS|9D5CY@!zeWl@C@
znNlTHteI3u-`8nx4^7gbI2C*`=^N%)aw;Gsq#5*}q|17cMMRR-XmqZ_u^WU%tsqz_
z(euZ&Y))6F-(D)XMlOu%ZwWYAYfCDmH@Mh^8a(CyN`ShwqFPgDyqmG``0xGsr$SCM
z%))PM4J|;wsXu-c+nkUHlh-@Jlu*c!-#cy04~;s@IbIi>9Urte*knX#W*aLZ0F(Sv
zp3D?p<@}C`B6Lg^Pvk7p0*{Vv0LT81mi3~xT{@zS@0#-&;yk(zy}XSCr9TX_)F8N}
z2tS8U{swa(a-(V-O)K1NKNs5S2?0JCN^>ComNK2}72I>fOa7}YNx_MI~y^6@f+t@p3uNQOpDiQU3CeQ5p#Pe{#
zB+WpHG3OSW*xML+UJ2diK)Lr)fZf5hes2YHs4I2`b2Du9-@pZl7qUbA^Wg+n8cWK~
zQtaEeHD-L{8*5gpS+fpdA3>{K_gQI7#stAmyYf@F;UngomxgsR;hl{vCTLb@
zHpGFESSm`f&@%Ej{7N#9zcm&0xIccEfg20ReMk5*Tl}41YwOiv3Ai=wsyCFpUxDaPE}y)+GeAiD(>?|ZIloI^hSCb_?8;FFNZx_JDsg#Blim}
zKvyEa%Wi1xjxPKKUH(PC4q{>dZL%z@**qzS0L_Yl-$#j;*A))i4YN^8hThPoG!r6t
zEB1+#G1qg&Ef<+vy-jmCs>RI(h7}gj?Zrr2X5L}P&lc1*u~`CR3tHm+$rK?k8+5_=z_qb$=p
z-~`_6OwpksP4xL4)pl_?I?!HQ&-&`&AgAXQ?S*jbdDIt|&HeKqS>dNMK+a%qEO#&Z
zC0Qk3SZS^wdPNNWFa(3g{qmFFLq*u-w%~i<`0cJp1z93SD2@_^+>rFSrS7wc
z(V%H6wIf3I;F_$i<~G%6)1M5EU}ug>&SXi_YczV`?*E=|{+GbuvV;Uk?b3TB=JUgr
zG^NgSyyZDnj=cP?TlH9LM)q4Oc3}KL*vK$G<~^zZ3^S04whokAt%g;N)0n;
z2@r=8wSOlUy`7ZLYrFt!^A5*Y$m?ixoK7j|cZ{q_A`r=>NNhVxRokK3Umr@V$vIg2
zM<{e5weYcpEXMo?ag?Di42pN8fM-U$lqOT$)I_5+r6b$(@7R~7IP?$+#50SlmGmkxg@=&~WG(uMoYabT-s9I2A1A;{S-3l(y}1%i_F@As;1K5
z`mv~7!7JpH%8k!mThVdPFmHU1l9hrMV*B|UkCV;*zczmAZ7&)0M%JM9NQV(@+w#N=
z<$A-m|H+z8I`7VXf8mMwvud1&KESANC|C9MrorgVO!hHjgNrf!7V!hQ39TI9a)6g7
zv@5O{tKC1!-$0*uVYw?&8T{@Zw`!Aa^?DAZG+Z!(-lhD@YP;oPoJQxZ(%5`?A)A)3o&OEsc!S
zw;MZqt#VjV__2XAyQ(s{H|J)yg}lHqhFZrJ#FV9G&r?tA*AG2Q%~BEjA1@uVf4IoI
zJ3N~$GYfuZGApi?9IQM^&|e*GKSMbyFW!F>fM3d=qJ}3Vt#o(=40Ag+W3ZE(c|FH~c~QX$mCv!AEo0kK*NBby((C0zrxEuTTq#DrhvQlqJrY0}rB`Vvjx
z{BmgW1e-#6bzepMBKWFxSG7=8th~g_ZNgH;DzB1h;BTxzxW}M^_>ais;z*T08p|Lr
zr1P!|(bFo+{y%O6myP~!7inEUl;dyKSuBEKVg65WjQWju(M3WvLO0Sc`%|%CTQ>^p
zlVdjH9R-1hYj_QY5uq||O&j&zR9QeYy4!?x${_eL-I>~tWi|^mq*S=0MwBuC%ufzf
zr{f&vigHF~*y$Ya$pzJbkK;B(qpHqPR0!~whyKB#&}G&Ifx^?cBP!l5YX#I=B2
zp({r0nzg`RQ(r`V36)X(plo9CYVD3(UtWvf`^{bWttW{F1<~F@8MXRm90~pXtXZ23
zo%^iprDnoCE}y5#^{v@@PfKuA;R%|F0*4&&sMG;N%DF7T8aiW
zD1$D!tTmLV^H>sY)SSw(-P=QM*M6FTPsB7_J_jA4eWS6hee6u*^@XL5N$-L6bAiEd
zHuL5_|20qST>)48BUN>yEHI==F_O5Hq3rhV1$vXQ@G=tNKip*UfD%~v|3>tK6sNzX
zf4`L7Vk@Nc!kJPl4YaHOtfvKJjHN0eeL(pk0_Mo`ha_SD+5KD?K65P-1B5}w)6oM3
z53peY`|M(pQij{+$N}&-Y?^rTRF1vjw>sfz3hFPTKctehq|Jcw$Sc})HWvDEoZ2EF
z-TlH@5XYfNQpqpU6WAYFt1&geceLe>8&c%oaxXhLT|*w{uWUIG2!ZEy2sf+Urr2R8
z@yqj7kHVw(i(l~kKJLpc(z18Kmo|Cns|so2zC}51#y9sCAbQFw_%A$wUZMQdj-1ny
zaoM#RiXh|ja^6Y*bttWnb-1yP0SrTCL7HlLzeh@Es1G;V;L^bh@=2^is8|)*8e4&C
z)j&0q%~j|7t8ncDP>|U=+bkXGP6K{PYd1cOq}#^ypV2^@;^RJASA^EqWRKtjkYwxa
zn48u8-dlrnRa+_A!|;VD$Lsjv@oLIH+6Q!h;dwfOQ(KyK-q+vTVKr~BY;5qMS-@Hq
z#)A;6l12;KL<9P0rV5(s_G1tLbgv><6O-_AKa}Q|3u!qJdB%zy(iA9o%u!7Zd^rZQ
z4I_36UDiLj_K^=~^h7TZlO+kh(EfWhf?ec+AH)==kzaJV!HK~(*3`m{@}%zN)#K{U$6w|V{k-*pX4!3=gWtZ8UzSxsF
zHNj$q(nleY(>kY&f4B|qXHWF5UDA8G+fUE*9;u5j!q)||1ho(=7DmW{nH<3vIiL^+
z0eZ}z*W8=flAPF5S(ULvBb>y>M#0L^haS-KZ5t4b6QvLSx@w#)uZB7)d*z*S~UYKe$V0>oPSiLZ{F&YzH%P0Dx&2N*s#2zTqvan%bO8!nnM${M53ZD
zm9Zr*NvRpi?Xynrwwt1Z_6QqVOnFKhcIzoZ<^KLcvOxcq!aAy;O322;dN+@XT=DwJ
zVNp@Sc!4vkk|kvBB#7aw$-!bDd84=$uY7O=%jT?zMZ?y#2E3s|TDA+r5e2q0)QWbR
zPn@e7$q5Ix@EHv!x_ew_Snas`LM{GXHl~;DpF%>bhbpq_HD?FjbQgX&Ym=FWS?&k>1ql
zw^t)+NQoge2G}6)!aD!nW(5q;k^`_;=J!^|%jcwIgI~I)Cg>pM<0XNeQg}cF55;60
zxxZEQ77v9Ku0$eK`)pM6ko+9GOl&=W0^C4RT6a;#JZm7Q3T~o}>>QEBY!m{Q!($UX
z0G1buKuN#9Xrh$O%ZInbo<9~(Z;Z{w29Mf7Fc6@C7x>=#O8jCo(0t_p2Z}^zkS@^O
zh?1iBS>WSD$6sl>RroXqzs1K_!&WG1PL)=hgSZvuABYr9uWvzzAEoj~(N^y4t%9Xf
zg0F{DgTumsUb_jMfoi}Wz+~x@lj9CxE=*((-LjbsF=@zMlQnpa-T+jWS3c>m&(v1F9
zn^blte4V8Yf|*?35Igf`N;4!?Xg3kSS}AlS{iV&WnY#y8&1HCqX+hfT!o|mY##*>nZVOFC325LR8$Vs_6+Fce(D=;U^jQqOg;J%wWZij{Pi+gT(d
zQEBc*DJ2(B|IC~|Bo+gEj@^^1_&D2u%K0rYL8Y1%Y!vhja#F>!ZZw3@E6eU*ITPNP
zDtC2&GPT=|MAGjdk|
zV+585fA?O?YY3S!i4{4L!3ga|7fWOaFFck~VAhM8p%Lo7&M>fa(jJCWX2UbQOvq`o^1q`)mdpAdz6D#m~75K
zX^bIM@rNrs5Vzp<
z-{4P)14JcR3@-!FrA2pm^WNn#&Ra}#&fHGZVEWL`rup?KGMu
z#9ILkilQZWKhWr*EG4Vzr0h|D2huA;mM
zVCT%A)sBq}h2^wf*z@J1wm#anL-Ycr?|
zpy_NHc4KAj$y9R5?bTvgvNED|gI!)1;-$Oo7Gfprl(0=g(8dp7kem~JqU
z@0scJf5ZR|G-af2+0!$d7-g@0!pBHLhDd%sxn*yX+A7gxB+I2SVW*8#hw^6M!UHN$
z_750pjYJXLI_NCr;M)6SM^{y}wxVzhy`x7AQ?(AUK~7e`nmh%B474h
zx2%!RvH{l8O=|FnqgY#|K27qWO<#zR#}g*oVCiy5SSUs|vDEZDjOx+zjJn=8z)o2F
zvHljtub%?-);DGhmHe`gFFO)87T0`9>TfmpOW{mMGPjf^sQk6&Ig!
zR6m>aO)x_vv~qou5Y)N9HX&rmE{nJ|{Oj;U{z+tPONiW;wf5Q_Kw^@Ls+5)tQsaYv
zH=+!2c%V)Gn1fv)@O6+#N?=G-sl+2m5h3cqJ>u5%r}w6Rjyzd!FuEcC;b5R5IxR@o;(4^5@U~!8d^kbi?GAWBc)+s8<{W*W2
z*G;NOMRT{w4QrQNkoCv#8!JN|124ZdJs9l3c-#$mzxH?(M3*Qevt4Ngup;wtq0h&(l^w7N0E#i5aWKD>(-Eq#*-0*SL7{+9b{qWow
zDOHIBM2#YSPwR4|_>gH3lq%AGca@4=;YWClj(Q~a
zIzY04wLI(7>2}k4Sm%aqQHQ(IR8Z!w>|s^d9Sw9FszI
zd27R;91j6M2FWQp7ZbgH_t3`!tD0wDmzOys$WUHCeWV5V9DC32dXCU;MdL_sMOhi#
zuZ7yTwj2m`5Ngk+JGzdh&1bya+TjlCYE-16WXm
zF}q?!u$Q@lh2Za376DR+-_O5|kZ}3c%)`x12F)vV9zfdPEN&Tdf!Wb(uj1l>wF?`6
zk9s9&!GEApV2Zane=K*!*iWl4J*%;R(-Vu=!^rWUXKU|`Fx!Za+n6B@BPv|WD
zU`HNk%5W7%>MgCu#Bcv;f}VbsCp~Jc=&Mq)kH{RwCMjmeyi*$o>6URvXMBMhRq3A@
zy{pZvFSLUG2udFCxKC);Ira_I3LxGp+4QUUX
zoC-PY#9?T}VqR8w77Do$Pmx-EJLsehO?9Fh7)39nY_q=u$b&I!+ks_ee?)mQM0lvl
zzV>VL>nw^IHkYJG%kCd)L{TsAqCf|LXv9i%I}x(Astd(SZjMoU?-=#PalnX~UB5l0
z;b5F=^R;1mJk!3C-GJwTztB_A&3owr*#jEtZ@+IyJ-8nNIVFE_+Qxk)hkVm3gl|?Q
z|BCQA9cR@9nGNO0t)5ZRaEX2b$qauK(E#TnKm)`g22GxmY(z5seCxkfC)Ava;%#e!
zFBa?Ny>2qdHhV^cTWL!kZ(VrnEpvu3X4$M_QzY$UUSVS^Vn-Ke_nUp=W~PBpnq7t^
z(eEwc0!O>`@Ia1LbhRw|-k7bbMlL)mZQY}=^Xu#)4aC2NY0@yvXJYQ|s=vF7=q`a^43H`wyjCGuJ1Pnikb
zfSj~mSf-dCoz41g$w`lkJwI=*fp|CUR(xZ}D}5|hZg0p-&MG{%6lwqsx&1;k1-S@L
zzkFXT4wS|p^?%3m#1~))q
z4TDW$7olH=vnJe=GtiPTM!O)olSWjXZ+d18RxZo-Bfn{K(nPf9M*c++nNX3EJ>`yC
zh;%1HQ`0J>J|x=c;b^^^W)D{WZTC5vz_NW7w$4DLK`_uU48BfWBzRnU7??I5NwkOL
zEUlm64vRnrpG_WUW_XAtlg=uBh@5D?heX4m3^E!Gx0+r;r|%oS7mLF6QEs7-0Y!O_
zH2K;>Or(QFnpV)`5fYaQ8mi}}M(3Vq9zyww>$xfVk5iO_zfO?O<4vsN@ZT$oC!J(?
zWi_CPm769VMU6nh@9F)0@(<_-?=er4R$F`wg0m~e?;}R-2q;g)Dyv=$R2P+QvwmMc
zC5I0mBoTXrcPeeU-``(2=*b21e=b6Wf@%Bg{Eo5bM>~Ab1!v|Jo`i&9>GiB@RC81p
zy!RR=j04kJ6QaEqQ7M=hPDJ*E+rB0dcalxnA0w)8xAK)g)~S4B%`{lz7{6fpu2MPv
z0sgctDTcSTD)VlQ)ypAG5n7F4^p(mBg)(;cU-+KDm;EN}l}1_v<=Z!OvmphDN}3%@
z5?`s{$=qL}&jz25f@V3>>X?(9rbeV5d<%XGN7VAoo;@GAiCz#`y#^8gb$v(lcxXoT
z{!p9T5$hl^t|hdSG>k*dOJhjmXdJJ-2t2Rs9;dsql9q!=x*Ix64Im@@H5{5b=6_
zXRlyFG{Nex?O|d2>`+ZPO%d~GFi%2wiI76pUl4FMXcE?4_^ACo3=-lBNIbU_UG#^q
z%#~i|MRJXp^_6*I5jQZPXGITES!j7o6HtZ}1uAaU7LMG+{3hDwud~u(S
z=n-%WC2+Ty)EJXBePX%IPH)heZk>mEZV6%v9eB=wA1Ruzkr4E@r=Az>m+d=K$mIGu
z_`ShEQN$9h1g56DNW|IDXeUmVl)5m`O2%21{B_JiJEde}M?5Z-s!IYcJuS}6{?`b-
zY$bd)l$VdOjvqq(IL4fz2NK9$mU`H{98EI6FG|_PjFbttw
zcPMgCDN3FeU8~YdlfQ0)bXXuldkeiPJkKscEq?sM%nY%V6jSPSnO{tBQk6^WRm=!N
z{`za7Z>q?F)on9zKag$~l0R+AAIR7di07-`64FG%)58g-uD6jjT2gOILVrFS>uK=j
z2*}1bm1V=5_rPe+w2e&p7WyQIJ;rdb4{BnqS5(hAg>
z)JhQ1a%W$Yv=?5)UO`uTrHqqe6%SU6r$cCOaF=Nxl37Dvehq(xr+V)ub06*1O(`-_
z>uw5D+xgLk4bv)2GhlV?jlpqGMUPko0i0Q8$GqwoXvT
zX-nHwV(zVSEU*(HA^iNBV)#R;ABH
z?`ChLGdJGw8|LtqovS(bg$~XCO_cv97W=_?)~0AYk^~|5!{vBo1kt1^@QF+0?zTFS
z9!O!}I)FfMYG`K!g3G_2Q^dbXXm)I$QD#55ZJl0Rt<%StV!P4wt@-*b;G98@zDkAu
zd1iYFVd<03<7SJ~4{Q)r{~Giu5_9x)WMf#G1R^UNxoqBisu}e@(0M)w3rr))D+`EA
z1dRL0br;Fx!&pTXC$OsXZ>s|CuOKocc=igJZfL$_6H<*7(*W?&pEECLdTmtm*@
z0_Rf0U>SYHea|_%S4+5|-wQUoOywLrr~_r%CHaCiZr#E^RNGY+aXD92vVW!~k-ziJ
zicD!2n#}mk@Tt}!8zCd}46?g1AHE`#4k%RZpql%Ye-im3g&oJcU+T%sutQz!0N?
zZe|cBm2tV9R;aV#+1^G=tK&YWTGnT4`%IzCxXS%!)!r3TPs5!|<+UjgZ@o5RS#G
zh_#LAxqMI3W6~tiin|Z3o5NQ#vh+qJb}_97H`>u5M!O2V`TC}l!vIE@@;LCLEEs+=z`-C-Zy4D+ZjxyX416m>+qzkVV_&iY*}1)}T$`C=}sP#Aag
z8ZT8Shu+bXgqz~0fKWoNPO;Y(>(-);=bimD3<*?td@i!`CfFQ=EL7dB?Ej=)sD35&
z0-XIGJs6fxX|4zDo+Jp-SP5Sm$PPcyk(~kmd#T|=n)q+|c?8;n2_zZGz5>H(K6~yc
z`v>JWA
zcW&}*_hs9KG%jCC>WOp|H_|6d;4fKJ`f4TOm4JvM<-L7SM84Mmu3#0+`8{i-Gg5Ie
z#v@}dH(tIRL^g`Bue`7#j@y}blB=dBF()zu;|?HF_0^bN!N1asjzsv|3i&$7+ffcrB*n`?EalhO$l6?io;X!0Z
zBedW}VBD}CSywfo+4xEI_FP7?KAV{)hGZ?GphKT=9fY)5t$FPy%$#yD5Elqu_AJoA
z&d6u<)sG>!D-06WF>1ii`_8{j?RKMv=Sh`yycUoNzvZaKk2!Z6mabBPX;G8Y&>5=#
z7xr)+yJbg1WU!b7p0BW+M6-TmGmp`SqJgN4HpUC=h?n0SFJ#Lhih~6%w4$k=ZhO#r
zl*8ath><28I!3@VcKVY~v8JA8x93U4J331g2gwMyC2HJ2{?=-*!S!AAOVHYH75+}@
z&yJCDmAX%4zma|q^HM$JGwc~IkV#IkugI;nGlXN?QBf=6{+^lpDR$!VH+)TnUh+4k
zYw`TIS$AJQloXbn&_K3bRZVsAdm7d&7WC7bHwt%n=RX0Yr^eq?YIJ`E_L^|=XBR0q
z*1O0|DQYX#^pN}V7r#nXhoGN1ikJWCzU&|z*Lg$d6r)wtNAx)LyOYEKor2^hU7~=*
zKeR}CF404@>)bf-^{uh6_l*Vvy;LGTTYY#Nsn7!dy(aTNJ3`0j{7+OjpEs_I~q+g<)(YsiqofEn*4>ZRp;nNZ0Zye%Kv=MeQ#7$nxO7au`jetdsvY<#ZSpWDrGiKkHhY^D;>BLh|(tW4OET|RE2VV&9c)n#|Dg?Bc5uhZ<0?Hb^tdEkBD=XA
zq@fhw)$vmRi(md~JWI8Gre)y|WgC!8YxaV(B|^Y{Whg4xEe==Il?G@<-GL?A2^xQd
zsU+pbmXk35n!S*h{3%cOIZ%N%v<(>Us_S%o$Q6YbANytPp>JbBc9TTJZp9vXY-%7B
z^GChegzZmnLp^?j+O=;yPCycDmh>R>qFCf90yOb?xQB15N{!Ag;4w8heLM4+g)2>*
znB4V%@(bnuHA&vn3?hCfLZQuCyb_~uLXv3Lvq>s~)6qbn;=u4zBd_anldqtZGHzJ@
z-YSV$HADvqr%KPy*;0^IClFA05DTR*uxicdXKT{y
zM>HPbx
zJ9waOvV>kzz3&r(!0sf>$!B6p#>DjBH9g0_D4l7{gHi6J)S8{M1gpH
zHS@1ZMfX-EePP+>7I#v=GO$Z9;eNR;DMmDj#OvEEIq-<${>mN4_Bj$aJ2m#p$EBsF
z?~|}drsQFgauMMkJSys~NPS45MkB{REEg+g<~0I%T}q6#11M)stJrwySxJ%pSkfPu
z%lKsj2{Q!~#%903%)$yKfcC=n4x7gtK9lh>iFwCNn<)+LeR6iL-YJSZjC0PfA8ILo
zJd~OGo|%53iQgid045(ZAV|2mbWe?k)&CuLHsCD*kEHX_EoH)J{lVYV2lq^7lgi6f
zEc5H|jq=cEEr@KLggY9d({GE0jwN~QgBB`-SFFOCtAZ93<3iLVu%iQ%SQG}?v0~^&
zt{NmO&t45?yNyM8-<|01KG`I~>*2_t+9&;nRr+^~QsTVAKC*@P)9uPKTG+dwZxu!DzxF>ceA`I`mp@
zXy8odHyB>&vlKh|I6Y378!bl3=z1vKNL1K%DEsUGhpe~kitCBKb%R@g;L=EN4IU)8
zLy%y>-KBANhv4q+5Zv9NY22Z4Z`|GTJN#dqd(XXRj6L?)A7EFlx#pVn6xk(SLUYA$
zgNQ<9KFvl8ay4}-wPN9B2>AP63x&jmZGX#`_L1l9+h)UuJAr+Lya@9&9AgcM$n?}u
z)|5_dTR!txUs99O$ZzIy&waA4X#_8|A7
zLrrCDDvn!nLC6$
z+P<2!EU)xyKJYQ6Abg7??Z6{A6E)=fr1CQ`r
z)K7(Gz|bJjX?dZe=zjR~qS6PNgv+z#$M!ywl6++=cW`s~f7E)P>$cn=)Gf7{ZxosF
z&`x96)gsE>S+?DJ!U2?W4Tan2S#
zd?C;ngibIXXSm*3#4cb#e{*|VZ@foF2z!TRBa1JwDtP6Qtl11Civ(Xoqrv?%>m;8U
zz=La{p}ZUArUqv@n_ki1s9kkhzNCCVNdI918@*-(BG!qCr5v6l&0{jq
zrW+0RMH+MtHvu6zI~Hgm*aPq|_0@i#4Tn3I8Qs53f=5ROgJ|R&I(A7wxEO=`E6z%X
z81>l+D*BU~bFKWSv8WPAAtCsxQXgyUm~-Y!aiU->o7Ecfq%X6ycvSJb0*Eo1M7Y-UVc4Lz$$(X_Q6%qk(rW~PrajzvW{|ze@7)wP9+^l
zM=@ceZvBL?XKp-Fp+cxbvy&x1xZR{~^uGEGrh2DJYq~D4Xsl~3Bom@-LBJQ3asJ!F
z_1kbPU2H->?c49WkiHcHpbat))rrmEPpuw;a?ykH&6#
zW}4mM^HEZntsPE|@a0K#Le|$?W&Z;YH(~x%nym_m%2ut_?Ul%aJ5;n4zW_Tz{O+jv
zdemu+w3;|3Z`bfbCW{7=WvSw`Zi!KEHbwO}+2#(N***JJ=*66p&C3AwEP#nvIn7=p
ztjMTawxGKa-8ok2{ph}3+wkq
z(#|4uC}V7|P05=K#v87*mp|~#8QTSm(PvPFWrGWart~0N0lJ3aB74(Qy%@dP@C`gY
zJ~&3&X+-fDW*81ADSI^bF@pKoLLQp4@XbZ?XYZWqQ|VzqDeSaO#@rY)gT^0kD2d+(XUp9t^&vb2s+rmjUwm5w}daX(j$_
zkM}jr($?cMkM>3=DxDlCP#sx$|MPHf251!{D8$+*{?@sU43bZmubD60v8UwmhY!R3
z1^U}ximq~tJ`fp>EUZGMR*uiX
ztc;-CBI!}b;e8v4m%z{m{!Zy&q9kW!-d)%IIFGFR)~*2`dEL4%lg7!wD18mGhY-p0
zVbyd&8_ITb8R2o(zNh2M9T`bH;Qk=T`mvbrhTr~SPbi;Us@OM)bYw*rzU>ABiJkHS
z!qr{#G_cBOVK(`L3da89yfcX
z7*c+XLQ7K?J_1QXVH>iaSxv?MBbqg48u(LjjBMj_-P%wWOTgr@3xdTPPgE92WouVRg{=5Rx}&N@KIl;@{uafM#rNw_9vZp
z&2BG-rSnZ>@htL9Cb1LghO7gIUy=-EH|j_$zJk=4>c?qFO!Cl9{2!f27dR@v=#aHq
zSxa+&$YPK&mew??-)OPEqdTDIfrDFbgbDvArTjnM8p#xa^n<=!y~UOt{w&~^#O%**
zOG=3=V{K(JvX@OvHmfmtcvuWBYZnJljkr9uW`@K7EBXzH@*_n;V_^5IIl0hdklM$^
z4ldKj8h%ZJ){`M21ek==nmZSWsxSIkbfqvP#Rn!wIH@n!Hf%CW1zuaJG7eCu2>?yIEwFn^ckcW>&O8z**S
zkXV}z0_}_hu`Po%%zR+q=a@1!uXP#b^#*eN)HI%fo94McGZNwGX{={Mf4Y2_HyYqT
z+;dXrENU7gLm`Db^nSJQh!oMhc+9RyN}6o&IEO5Q=~g&(zK0`Eh7TTHssb>bQ>lrz
zNxo9dkZx~M9M*X`e8c4eUr3!F-pwe%TVkA07z49!0zSN;daC@uQT<+>-L2HwN-u)5
zMR!zo5G?a*6WSyhK}9c;-7jAiml-*Ca17Al7>!Dli3%1
zxo`L^xq-Ic+$N9x8=Ad4RJa6~(wjqC<(vQ+?>1%C9`quqgZev!j|0-uMvm;LTRD*%
zS?Y#QU}K=5!MIZ;2j2{Xsjo{1e`qLaK?vf!G}LfzAw%hfin;LnMBz#y!?of;AYJ}&
z{~nu?{L)F^U(AZG>_6QWj1VC!p;d>0
z2s|cp!dwxhOMnckK#xh
zsgH^s!Fz3hg;QN*T^}R4vw52V-k5g7|HO6bd`Cv;C#L%s5aecXf65#y{KzRL*BhG9
zGD#3-B)4g?@4ukpudCY^#N9v0H+>c5?MxhSr*pd9>~Gn#_<~AY?{kbqC{$-KQtUFARG}M@@
z9it_iH&E{0Ed6yy9yf+KD(VNIOJjA^GalZF`=h*)iyoMoEDGV`ejYx_LDJkW8!hUe
z^0fiqzawta(+?#2mO(wr9k4>INp`G`RStg(oQ~6Vh(R)3b1GYW@UL+_5=lUM#sjv5PA>Tm$PG
zaaK4{?7scuZKoK0gsx*l4g#4AvCvZIbma#fQ+$QKkTJ#&zQMpcJd9
zC|ajGp<_tO;r4|BeV)n$w6X#+>N8DQ)aR-9B=Ed+VFn)cf>|-cVYfJ@1g>`A)0C_b
z@-!1K^RdxnU<#Ez6!q339wbF7bvwXl=lpD{$@(i1{f75L=*DxL|y$42L48?8$*sNYmqo=-sW-~h->VNRt0-1k}n%sh@
zx$V=BbFn_V??C!f2tOp}Z~HURUu<9~3JwP8Rw&-MzWMEiBJ@z_`sD93cxrMuXwhc&
z7!&+Mj)=rs8ph{*wK$(-Dr~au2Y<$;29>`+mbe=bl-jMs*6f8SbwIE~$dpAh2JMs~
z&m>kf-W`IK&sL8K>+xG}^TbM$<7Bsbz+Sk&3B@EAy+O0s8hA>il+V(Mseaw;J8`XK
zi)|k5(pb(PJ}B8>a_fQMHq_mENxJaeBGm{^6VRb2m(Q{!?XQsk_=q0w&&rQ-dP;Oq
zq7(?KqeWg?jIm&60ecsA!^Poz_I_L~D)4N$%>VD#zfS>|fUBpCD@1`Sgf0gppo7QD
z^`QTY5VLJH`0YH-@!<^X&0HR4YLRf~2EFgpVkGe`IQpuZvuW4PA5Oow1&zw!0|%i$
z-oK#d?9hHnO9F%8t0$2n_)iV_`V!V>gN(p#t38I(;@95W@GV%LJa{F9&ay0Q_T6&Iy?~luXUOLdqqW|RNKwKhkSzWB`Id#64dr~
zEzQpV=%WkR-p&HspFu*n(AZ&c7egClMRv5DrMDZ$q|xbm3_5vqmt}a|9vKY1VIW1f~RD-xt|f=4axeYh_DMAw7IC!nDA
zB(}dUKQIj12d2#W?pt}V{s=i1At>;~*5B+Ko?jHPxh&kQgo)}|q|BDTcmppQjf`exwsbayf7i3(y9^Nb4QE5$jaW5BQ
z-%_6nWR1I;l!NP0Tpin3bxir_9*XrUe+P)FUMftKGmk(Lom@O}a4S|<{4g?BSPR+;
zv0Krr$UR(Yw!(yL?5&xE!UK&syZSK}HvFQVMaY)UAQ4k_IxJ%J`BXHlhA|!6kB?uj
zCdYE31xKb6YH0tP!r>k;tdWe{5>Xl>a>#$4vN9z|Gn|`KsIq-8w$S#@X%@WAkhEDU
znEaqkY(?@s)|m991W(%M<|FF`8BOGQ0e#s*cr|YZPmhM@R#e$bO%XBlDhj|J82>!(
zJlQOVBE(PQwbJ9nzx&iv(-$e1yh(!lr*14^5_^zL%eTa=2pKpWpN#O{IyH9k;Z<q@fQjIM{X?elx5lIdw
zc5pNS`&f0plLX4zZ1{Vc25~00Xu`DqH>OpauAiWW>D0bw)D(thepUmcJj+x?Y?$6Q
z#9CRN^srlGSbYu6&)AG|((M0bhVdxoiu=dMg!T}tO*2c=0?=~&FvbP1!UP;b*O9%v
z>x}=wM_&2v4QXYvN+#1j>H%WU^kdhv+0(iG5w!JK(rrmKm9zq?P*Qlk4l)UO1O}3y
z+l3c`TQO}Vq+qgYtxwFTTOdSz_qCp4ymXV2WI-KLgXqZzv4hc`QOc*t@ul^G#-=@{
zBDE2dS!bn3#q`gLUC&T;(?bH2m8n*U=OA*}i)Ai(T2AOBU5!$^UJX8T#z*{_X5L{J
zs#jrP7u0VRveG{`b#i8LaHn}`EGCNqnof^dGp~)$h@hXXRm6K+J&1)*EZ*)pwH=wP
zM2b(#iPLDklxoboL_Ho^84r&)$P90Iu5D;%fVYT#ZtF!ra;pIqpE=U!eJ5x%@FQ9Q
zlJxj?#-vbzE_No|&(Sp-ov^a}NA|4k=_DCC1vWIBn@}@KN_aQMF46oV^WDl5*%XOr
zE(#^(*92EDFl%d>0$5MUgL?2+G}J4+S#iEFcJv8g^BVWqQwo3EZpM7pLF(x^F3qz0
z->)@t;wx7qLDQN5bh}=JtR?%An56K!LJpkQ2RrfW{bydMQZzDy&q4+P@(QaDa~&LA
zPufI&_u5!~Jw2al)X
zbnvK)#PlFqRCEPi?VEOtU(ZICv`#3qWxHd1A!Qk}N6P!A9M-sg%^=`LE=C4N+XjPz
zhZRjVwJJDJ$bC-zKNI^Z!$UmT8Xy+=1W0=#BR8;+VczlN1&_;RePC6?oU$_?nLs`w^{kuH{zc3)2Wizzkm1As7?rODu9)7SI%a(&{1TN6t
zOtuBS2aT;+1$HYVg$?0b6_7R9rOj=fpkL3YkR6wt`EDme1nTDv_
zR+sh+w%0U>=_g!bA*#a2@FXJ4(g{oO#Itnlv}Ed}TL>TiJxo90g;LbW!RCI?k=@}6
zAZ5`PHZ$sz(%GvvqSEcxSNp{ZYFb%rGjZ?#;yT}ALGzr5>SJOD|0bx(=MKwpLu>L_
zal2ryi?cfN(%$=u$Ze||nwaRbzi-YC81bdw~y3
z=`Ti6nA_{ZQBGwW_JTb8_|I(N6O