|
@@ -16,10 +16,7 @@ import flink.zanxiangnet.ad.monitoring.pojo.dto.AdStatOfDayODSDTO;
|
|
|
import flink.zanxiangnet.ad.monitoring.pojo.entity.*;
|
|
|
import flink.zanxiangnet.ad.monitoring.pojo.properties.ApplicationProperties;
|
|
|
import flink.zanxiangnet.ad.monitoring.pojo.dto.AdDataOfMinuteDTO;
|
|
|
-import flink.zanxiangnet.ad.monitoring.process.AdMinuteDWDProcess;
|
|
|
-import flink.zanxiangnet.ad.monitoring.process.CostHourDayProcess;
|
|
|
-import flink.zanxiangnet.ad.monitoring.process.CostHourProcess;
|
|
|
-import flink.zanxiangnet.ad.monitoring.process.CostMinuteProcess;
|
|
|
+import flink.zanxiangnet.ad.monitoring.process.*;
|
|
|
import flink.zanxiangnet.ad.monitoring.sink.TunnelBatchStreamSink;
|
|
|
import flink.zanxiangnet.ad.monitoring.stream.KeyedBatchStream;
|
|
|
import flink.zanxiangnet.ad.monitoring.trigger.AdMinuteODSStreamTrigger;
|
|
@@ -164,6 +161,19 @@ public class AdStatJob {
|
|
|
clickhouseMinuteDmStream.addSink(batchSinkMinute);
|
|
|
|
|
|
|
|
|
+ //cost----小时数据处理
|
|
|
+ SingleOutputStreamOperator<CostHourDM> clickhouseMinuteHourDmStream =
|
|
|
+ adMinuteDWDStream
|
|
|
+ .keyBy(AdStatOfMinuteDWD::getAdId)
|
|
|
+ .window(TumblingEventTimeWindows.of(Time.minutes(5L)))
|
|
|
+ .trigger(new CostMinuteDMStreamTrigger())
|
|
|
+ .process(new CostMinuteHourProcess())
|
|
|
+ .name("sink_ad_minute_hour_dm_clickhouse");
|
|
|
+
|
|
|
+ BatchSinkHour batchSinkMinuteHour = new BatchSinkHour();
|
|
|
+ clickhouseMinuteHourDmStream.addSink(batchSinkMinuteHour);
|
|
|
+
|
|
|
+
|
|
|
// 小时流(直接写到小时报表的 ods)
|
|
|
DataStream<AdDataOfHourODS> adHourODSStream = adODSStream.getSideOutput(adHourStreamTag);
|
|
|
// 写入原始表
|
|
@@ -175,69 +185,69 @@ public class AdStatJob {
|
|
|
|
|
|
SingleOutputStreamOperator<AdStatOfHourDWD> adHourDWDStream =
|
|
|
adHourODSStream.keyBy(AdDataOfHourODS::getAdId)
|
|
|
- .countWindow(1).
|
|
|
- process(new ProcessWindowFunction<AdDataOfHourODS, AdStatOfHourDWD, Long, GlobalWindow>() {
|
|
|
- private Odps odps;
|
|
|
- // 上次查询的天数据
|
|
|
- private ValueState<String> lastQueryDayState;
|
|
|
- // 聚合的天的数据
|
|
|
- private MapState<String, AdStatOfHourDWD> historyReduceState;
|
|
|
-
|
|
|
- @Override
|
|
|
- public void open(Configuration conf) {
|
|
|
- Map<String, String> params = getRuntimeContext()
|
|
|
- .getExecutionConfig()
|
|
|
- .getGlobalJobParameters()
|
|
|
- .toMap();
|
|
|
- Account account = new AliyunAccount(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_ID),
|
|
|
- params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_KEY));
|
|
|
- odps = new Odps(account);
|
|
|
- odps.getRestClient().setRetryLogger(new MaxComputeLog());
|
|
|
- odps.setEndpoint(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_ENDPOINT));
|
|
|
- odps.setDefaultProject(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_PROJECT_NAME));
|
|
|
-
|
|
|
- lastQueryDayState = getRuntimeContext().getState(new ValueStateDescriptor<>("lastQueryDayState", String.class));
|
|
|
- historyReduceState = getRuntimeContext().getMapState(new MapStateDescriptor<>("historyReduceState", Types.STRING, Types.POJO(AdStatOfHourDWD.class)));
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void process(Long elementsCount, ProcessWindowFunction<AdDataOfHourODS, AdStatOfHourDWD, Long, GlobalWindow>.Context context, Iterable<AdDataOfHourODS> iterable, Collector<AdStatOfHourDWD> collector) throws Exception {
|
|
|
- AdDataOfHourODS element = iterable.iterator().next();
|
|
|
- LocalDate statDay = DateUtil.parseLocalDate(element.getStatDay());
|
|
|
- LocalDateTime statTime = LocalDateTime.of(statDay, LocalTime.of(element.getHour(), 0, 0));
|
|
|
- long now = System.currentTimeMillis();
|
|
|
- LocalDate today = LocalDate.now();
|
|
|
-
|
|
|
- String lastQueryDay = lastQueryDayState.value();
|
|
|
- // 从 maxCompute拉取指定 广告的历史数据
|
|
|
- if (lastQueryDay == null || !lastQueryDay.equals(DateUtil.formatLocalDate(today))) {
|
|
|
- LocalDate endDay = today, beginDay = statDay.minusDays(60);
|
|
|
- String sql = "SELECT * FROM ad_stat_of_hour_dwd WHERE stat_day >= \"" + DateUtil.formatLocalDate(beginDay) + "\" AND stat_day <= \"" + DateUtil.formatLocalDate(endDay) + "\" AND ad_id = " + element.getAdId() + ";";
|
|
|
- Instance instance = SQLTask.run(odps, sql);
|
|
|
- System.out.println("212===>sql: " + sql + ", odps日志: " + odps.logview().generateLogView(instance, 7 * 24));
|
|
|
- instance.waitForSuccess();
|
|
|
- List<Record> records = SQLTask.getResult(instance);
|
|
|
- Map<String, AdStatOfHourDWD> historyHourMap = records.stream()
|
|
|
- .map(AdStatOfHourDWD::byMaxCompute)
|
|
|
- .sorted((o1, o2) -> new Long(o1.getCreateTime().getTime() - o2.getCreateTime().getTime()).intValue())
|
|
|
- .collect(Collectors.toMap(data -> data.getStatDay() + data.getHour(), data -> data, (val1, val2) -> val2));
|
|
|
- historyReduceState.clear();
|
|
|
- historyReduceState.putAll(historyHourMap);
|
|
|
- lastQueryDayState.update(DateUtil.formatLocalDate(today));
|
|
|
- }
|
|
|
- AdStatOfHourDWD lastReduceData = null;
|
|
|
- for (int i = 1; i < 60 * 24; i++) {
|
|
|
- LocalDateTime time = statTime.minusHours(i);
|
|
|
- lastReduceData = historyReduceState.get(DateUtil.formatLocalDate(time.toLocalDate()) + time.getHour());
|
|
|
- if (lastReduceData != null) {
|
|
|
- break;
|
|
|
+ .countWindow(1).
|
|
|
+ process(new ProcessWindowFunction<AdDataOfHourODS, AdStatOfHourDWD, Long, GlobalWindow>() {
|
|
|
+ private Odps odps;
|
|
|
+ // 上次查询的天数据
|
|
|
+ private ValueState<String> lastQueryDayState;
|
|
|
+ // 聚合的天的数据
|
|
|
+ private MapState<String, AdStatOfHourDWD> historyReduceState;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void open(Configuration conf) {
|
|
|
+ Map<String, String> params = getRuntimeContext()
|
|
|
+ .getExecutionConfig()
|
|
|
+ .getGlobalJobParameters()
|
|
|
+ .toMap();
|
|
|
+ Account account = new AliyunAccount(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_ID),
|
|
|
+ params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_KEY));
|
|
|
+ odps = new Odps(account);
|
|
|
+ odps.getRestClient().setRetryLogger(new MaxComputeLog());
|
|
|
+ odps.setEndpoint(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_ENDPOINT));
|
|
|
+ odps.setDefaultProject(params.get(ApplicationProperties.MAX_COMPUTE_ACCOUNT_PROJECT_NAME));
|
|
|
+
|
|
|
+ lastQueryDayState = getRuntimeContext().getState(new ValueStateDescriptor<>("lastQueryDayState", String.class));
|
|
|
+ historyReduceState = getRuntimeContext().getMapState(new MapStateDescriptor<>("historyReduceState", Types.STRING, Types.POJO(AdStatOfHourDWD.class)));
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- AdStatOfHourDWD newStatData = AdStatOfHourDWD.reduce(lastReduceData, element, now);
|
|
|
- collector.collect(newStatData);
|
|
|
- }
|
|
|
- });
|
|
|
+ @Override
|
|
|
+ public void process(Long elementsCount, ProcessWindowFunction<AdDataOfHourODS, AdStatOfHourDWD, Long, GlobalWindow>.Context context, Iterable<AdDataOfHourODS> iterable, Collector<AdStatOfHourDWD> collector) throws Exception {
|
|
|
+ AdDataOfHourODS element = iterable.iterator().next();
|
|
|
+ LocalDate statDay = DateUtil.parseLocalDate(element.getStatDay());
|
|
|
+ LocalDateTime statTime = LocalDateTime.of(statDay, LocalTime.of(element.getHour(), 0, 0));
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ LocalDate today = LocalDate.now();
|
|
|
+
|
|
|
+ String lastQueryDay = lastQueryDayState.value();
|
|
|
+ // 从 maxCompute拉取指定 广告的历史数据
|
|
|
+ if (lastQueryDay == null || !lastQueryDay.equals(DateUtil.formatLocalDate(today))) {
|
|
|
+ LocalDate endDay = today, beginDay = statDay.minusDays(60);
|
|
|
+ String sql = "SELECT * FROM ad_stat_of_hour_dwd WHERE stat_day >= \"" + DateUtil.formatLocalDate(beginDay) + "\" AND stat_day <= \"" + DateUtil.formatLocalDate(endDay) + "\" AND ad_id = " + element.getAdId() + ";";
|
|
|
+ Instance instance = SQLTask.run(odps, sql);
|
|
|
+ System.out.println("212===>sql: " + sql + ", odps日志: " + odps.logview().generateLogView(instance, 7 * 24));
|
|
|
+ instance.waitForSuccess();
|
|
|
+ List<Record> records = SQLTask.getResult(instance);
|
|
|
+ Map<String, AdStatOfHourDWD> historyHourMap = records.stream()
|
|
|
+ .map(AdStatOfHourDWD::byMaxCompute)
|
|
|
+ .sorted((o1, o2) -> new Long(o1.getCreateTime().getTime() - o2.getCreateTime().getTime()).intValue())
|
|
|
+ .collect(Collectors.toMap(data -> data.getStatDay() + data.getHour(), data -> data, (val1, val2) -> val2));
|
|
|
+ historyReduceState.clear();
|
|
|
+ historyReduceState.putAll(historyHourMap);
|
|
|
+ lastQueryDayState.update(DateUtil.formatLocalDate(today));
|
|
|
+ }
|
|
|
+ AdStatOfHourDWD lastReduceData = null;
|
|
|
+ for (int i = 1; i < 60 * 24; i++) {
|
|
|
+ LocalDateTime time = statTime.minusHours(i);
|
|
|
+ lastReduceData = historyReduceState.get(DateUtil.formatLocalDate(time.toLocalDate()) + time.getHour());
|
|
|
+ if (lastReduceData != null) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ AdStatOfHourDWD newStatData = AdStatOfHourDWD.reduce(lastReduceData, element, now);
|
|
|
+ collector.collect(newStatData);
|
|
|
+ }
|
|
|
+ });
|
|
|
//.addSink(new TunnelBatchSink<>(AdStatOfHourDWD.class, 30000L, 365L, 6));
|
|
|
new KeyedBatchStream<>("adHourDWDStream", adHourDWDStream.keyBy(AdStatOfHourDWD::getStatDay), 4000L, 60 * 1000L)
|
|
|
.toBatch()
|
|
@@ -380,17 +390,17 @@ public class AdStatJob {
|
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfDayDWD.class))
|
|
|
.name("sink_ad_day_dwd");
|
|
|
|
|
|
- //小时数据-----天
|
|
|
- SingleOutputStreamOperator<CostHourDM> clickhouseHourDayDmStream =
|
|
|
- adDayDWDStream
|
|
|
- .keyBy(AdStatOfDayDWD::getAdId)
|
|
|
- .countWindow(1)
|
|
|
- .process(new CostHourDayProcess())
|
|
|
- .name("sink_ad_hour_day_dm_clickhouse");
|
|
|
-
|
|
|
-// clickhouseHourDayDmStream.print();
|
|
|
- BatchSinkHour batchSinkHourDay = new BatchSinkHour();
|
|
|
- clickhouseHourDayDmStream.addSink(batchSinkHourDay);
|
|
|
+// //小时数据-----天
|
|
|
+// SingleOutputStreamOperator<CostHourDM> clickhouseHourDayDmStream =
|
|
|
+// adDayDWDStream
|
|
|
+// .keyBy(AdStatOfDayDWD::getAdId)
|
|
|
+// .countWindow(1)
|
|
|
+// .process(new CostHourDayProcess())
|
|
|
+// .name("sink_ad_hour_day_dm_clickhouse");
|
|
|
+//
|
|
|
+//// clickhouseHourDayDmStream.print();
|
|
|
+// BatchSinkHour batchSinkHourDay = new BatchSinkHour();
|
|
|
+// clickhouseHourDayDmStream.addSink(batchSinkHourDay);
|
|
|
|
|
|
|
|
|
SingleOutputStreamOperator<AdStatOfDayDWD> adDayDWDYearStream = adDayODSStreamSplit.getSideOutput(adDayStreamRollYearTag)
|
|
@@ -417,17 +427,17 @@ public class AdStatJob {
|
|
|
.toBatch()
|
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfDayDWD.class))
|
|
|
.name("sink_ad_year_dwd");
|
|
|
- //小时数据----年
|
|
|
- SingleOutputStreamOperator<CostHourDM> clickhouseHourYearDmStream =
|
|
|
- adDayDWDYearStream
|
|
|
- .keyBy(AdStatOfDayDWD::getAdId)
|
|
|
- .countWindow(1)
|
|
|
- .process(new CostHourDayProcess())
|
|
|
- .name("sink_ad_hour_year_dm_clickhouse");
|
|
|
-
|
|
|
-// clickhouseHourYearDmStream.print();
|
|
|
- BatchSinkHour batchSinkHourYear = new BatchSinkHour();
|
|
|
- clickhouseHourYearDmStream.addSink(batchSinkHourYear);
|
|
|
+// //小时数据----年
|
|
|
+// SingleOutputStreamOperator<CostHourDM> clickhouseHourYearDmStream =
|
|
|
+// adDayDWDYearStream
|
|
|
+// .keyBy(AdStatOfDayDWD::getAdId)
|
|
|
+// .countWindow(1)
|
|
|
+// .process(new CostHourDayProcess())
|
|
|
+// .name("sink_ad_hour_year_dm_clickhouse");
|
|
|
+//
|
|
|
+//// clickhouseHourYearDmStream.print();
|
|
|
+// BatchSinkHour batchSinkHourYear = new BatchSinkHour();
|
|
|
+// clickhouseHourYearDmStream.addSink(batchSinkHourYear);
|
|
|
|
|
|
//TODO:jobname----后面应该带个时间以此来区分log以及job
|
|
|
env.execute("job_ad");
|