|
@@ -1,6 +1,7 @@
|
|
package flink.zanxiangnet.ad.monitoring;
|
|
package flink.zanxiangnet.ad.monitoring;
|
|
|
|
|
|
import flink.zanxiangnet.ad.monitoring.pojo.entity.*;
|
|
import flink.zanxiangnet.ad.monitoring.pojo.entity.*;
|
|
|
|
+import flink.zanxiangnet.ad.monitoring.pojo.properties.ApplicationProperties;
|
|
import flink.zanxiangnet.ad.monitoring.process.*;
|
|
import flink.zanxiangnet.ad.monitoring.process.*;
|
|
import flink.zanxiangnet.ad.monitoring.sink.AdDayDWDToCkBatchSink;
|
|
import flink.zanxiangnet.ad.monitoring.sink.AdDayDWDToCkBatchSink;
|
|
import flink.zanxiangnet.ad.monitoring.sink.AdHourDMToCkBatchSink;
|
|
import flink.zanxiangnet.ad.monitoring.sink.AdHourDMToCkBatchSink;
|
|
@@ -15,9 +16,12 @@ import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.flink.api.common.eventtime.*;
|
|
import org.apache.flink.api.common.eventtime.*;
|
|
import org.apache.flink.configuration.Configuration;
|
|
import org.apache.flink.configuration.Configuration;
|
|
import org.apache.flink.connector.kafka.source.KafkaSource;
|
|
import org.apache.flink.connector.kafka.source.KafkaSource;
|
|
|
|
+import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
|
|
|
|
+import org.apache.flink.streaming.api.CheckpointingMode;
|
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
|
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
|
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
|
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
|
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
|
|
|
+import org.apache.flink.streaming.api.environment.CheckpointConfig;
|
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
|
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
|
|
import org.apache.flink.streaming.api.windowing.time.Time;
|
|
import org.apache.flink.streaming.api.windowing.time.Time;
|
|
@@ -47,6 +51,26 @@ public class AdHourStreamJob {
|
|
});
|
|
});
|
|
env.getConfig().setGlobalJobParameters(configuration);
|
|
env.getConfig().setGlobalJobParameters(configuration);
|
|
|
|
|
|
|
|
+ // checkpoint配置
|
|
|
|
+ env.enableCheckpointing(2 * 60 * 1000L, CheckpointingMode.EXACTLY_ONCE);
|
|
|
|
+ // checkpoint执行超时时间,超时则 checkpoint失败
|
|
|
|
+ env.getCheckpointConfig().setCheckpointTimeout(5 * 60 * 1000L);
|
|
|
|
+ // checkpoint执行最小间隔时间
|
|
|
|
+ env.getCheckpointConfig().setMinPauseBetweenCheckpoints(60 * 1000L);
|
|
|
|
+ // 允许并行执行 checkpoint个数,当指定了 minPauseBetweenCheckpoints时,其值无效(就是 1)
|
|
|
|
+ env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
|
|
|
|
+ // 开启 checkpoints的外部持久化,但是在job失败的时候不会自动清理,需要自己手工清理state。
|
|
|
|
+ // ExternalizedCheckpointCleanup用于指定当job canceled的时候外部的 checkpoint该如何清理
|
|
|
|
+ // DELETE_ON_CANCELLATION: 在job canceled的时候会自动删除外部 state,但是如果是FAILED的状态则会保留
|
|
|
|
+ // RETAIN_ON_CANCELLATION:在job canceled的时候保留外部 state
|
|
|
|
+ env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
|
|
|
|
+ // 大概是允许 checkpoint失败几次,默认 0
|
|
|
|
+ env.getCheckpointConfig().setTolerableCheckpointFailureNumber(0);
|
|
|
|
+ env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
|
|
|
|
+ if (StringUtils.isNotBlank(props.getProperty(ApplicationProperties.CHECKPOINT_SAVEPOINT))) {
|
|
|
|
+ env.getCheckpointConfig().setCheckpointStorage(props.getProperty(ApplicationProperties.CHECKPOINT_SAVEPOINT));
|
|
|
|
+ }
|
|
|
|
+
|
|
KafkaSource<String> adStreamOfMinuteSource = KafkaComponent.buildKafkaSource(props, KafkaComponent.KafkaTopic.adHourTopic, KafkaComponent.KafkaTopic.KafkaGroupId.adHourConsumerGroup);
|
|
KafkaSource<String> adStreamOfMinuteSource = KafkaComponent.buildKafkaSource(props, KafkaComponent.KafkaTopic.adHourTopic, KafkaComponent.KafkaTopic.KafkaGroupId.adHourConsumerGroup);
|
|
DataStreamSource<String> adStreamOfMinuteIn = env.fromSource(adStreamOfMinuteSource, WatermarkStrategy.noWatermarks(), "adHourSource_kafka");
|
|
DataStreamSource<String> adStreamOfMinuteIn = env.fromSource(adStreamOfMinuteSource, WatermarkStrategy.noWatermarks(), "adHourSource_kafka");
|
|
|
|
|
|
@@ -64,7 +88,7 @@ public class AdHourStreamJob {
|
|
// 分钟流
|
|
// 分钟流
|
|
DataStream<AdDataOfMinuteODS> adMinuteODSStream = adODSStream.getSideOutput(adMinuteStreamTag);
|
|
DataStream<AdDataOfMinuteODS> adMinuteODSStream = adODSStream.getSideOutput(adMinuteStreamTag);
|
|
// 分钟流-写入原始表
|
|
// 分钟流-写入原始表
|
|
- new KeyedBatchStream<>("adMinuteODSStream", adMinuteODSStream.keyBy(AdDataOfMinuteODS::getStatDay), 4000L, 2 * 60 * 1000L)
|
|
|
|
|
|
+ new KeyedBatchStream<>("adMinuteODSStream", adMinuteODSStream.keyBy(AdDataOfMinuteODS::getStatDay), 1000L, 2 * 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new TunnelBatchStreamSink<>(AdDataOfMinuteODS.class))
|
|
.addSink(new TunnelBatchStreamSink<>(AdDataOfMinuteODS.class))
|
|
.name("sink_ad_minute_ods");
|
|
.name("sink_ad_minute_ods");
|
|
@@ -79,7 +103,7 @@ public class AdHourStreamJob {
|
|
.window(TumblingEventTimeWindows.of(Time.minutes(5L)))
|
|
.window(TumblingEventTimeWindows.of(Time.minutes(5L)))
|
|
.trigger(new AdMinuteODSStreamTrigger())
|
|
.trigger(new AdMinuteODSStreamTrigger())
|
|
.process(new AdMinuteDWDProcess());
|
|
.process(new AdMinuteDWDProcess());
|
|
- new KeyedBatchStream<>("adMinuteDWDStream", adMinuteDWDStream.keyBy(AdStatOfMinuteDWD::getStatDay), 4000L, 60 * 1000L)
|
|
|
|
|
|
+ new KeyedBatchStream<>("adMinuteDWDStream", adMinuteDWDStream.keyBy(AdStatOfMinuteDWD::getStatDay), 1000L, 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfMinuteDWD.class))
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfMinuteDWD.class))
|
|
.name("sink_ad_minute_dwd");
|
|
.name("sink_ad_minute_dwd");
|
|
@@ -89,7 +113,7 @@ public class AdHourStreamJob {
|
|
adMinuteDWDStream
|
|
adMinuteDWDStream
|
|
.keyBy(AdStatOfMinuteDWD::getAdId)
|
|
.keyBy(AdStatOfMinuteDWD::getAdId)
|
|
.process(new CostMinuteProcess());
|
|
.process(new CostMinuteProcess());
|
|
- new BatchStream<>("adMinuteDMStream", clickhouseMinuteDmStream, 1000L, 60 * 1000L)
|
|
|
|
|
|
+ new BatchStream<>("adMinuteDMStream", clickhouseMinuteDmStream, 4000L, 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new AdMinuteDMToCkBatchSink())
|
|
.addSink(new AdMinuteDMToCkBatchSink())
|
|
.name("sink_ad_minute_dm_clickhouse");
|
|
.name("sink_ad_minute_dm_clickhouse");
|
|
@@ -97,7 +121,7 @@ public class AdHourStreamJob {
|
|
// 小时流
|
|
// 小时流
|
|
DataStream<AdDataOfHourODS> adHourODSStream = adODSStream.getSideOutput(adHourStreamTag);
|
|
DataStream<AdDataOfHourODS> adHourODSStream = adODSStream.getSideOutput(adHourStreamTag);
|
|
// 小时流-写入原始表
|
|
// 小时流-写入原始表
|
|
- new KeyedBatchStream<>("adHourODSStream", adHourODSStream.keyBy(AdDataOfHourODS::getStatDay), 4000L, 3 * 60 * 1000L)
|
|
|
|
|
|
+ new KeyedBatchStream<>("adHourODSStream", adHourODSStream.keyBy(AdDataOfHourODS::getStatDay), 1000L, 3 * 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new TunnelBatchStreamSink<>(AdDataOfHourODS.class))
|
|
.addSink(new TunnelBatchStreamSink<>(AdDataOfHourODS.class))
|
|
.name("sink_ad_hour_ods");
|
|
.name("sink_ad_hour_ods");
|
|
@@ -108,7 +132,7 @@ public class AdHourStreamJob {
|
|
.process(new AdHourDWDProcess());
|
|
.process(new AdHourDWDProcess());
|
|
|
|
|
|
// 小时流-写入maxCompute
|
|
// 小时流-写入maxCompute
|
|
- new KeyedBatchStream<>("adHourDWDStream", adHourDWDStream.keyBy(AdStatOfHourDWD::getStatDay), 4000L, 3 * 60 * 1000L)
|
|
|
|
|
|
+ new KeyedBatchStream<>("adHourDWDStream", adHourDWDStream.keyBy(AdStatOfHourDWD::getStatDay), 1000L, 3 * 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfHourDWD.class))
|
|
.addSink(new TunnelBatchStreamSink<>(AdStatOfHourDWD.class))
|
|
.name("sink_ad_hour_dwd");
|
|
.name("sink_ad_hour_dwd");
|
|
@@ -124,7 +148,7 @@ public class AdHourStreamJob {
|
|
adHourDWDAllStream
|
|
adHourDWDAllStream
|
|
.keyBy(AdStatOfHourDWD::getAdId)
|
|
.keyBy(AdStatOfHourDWD::getAdId)
|
|
.process(new CostHourProcess());
|
|
.process(new CostHourProcess());
|
|
- new BatchStream<>("adHourDMStream", adHourDMStream, 1000L, 60 * 1000L)
|
|
|
|
|
|
+ new BatchStream<>("adHourDMStream", adHourDMStream, 2000L, 60 * 1000L)
|
|
.toBatch()
|
|
.toBatch()
|
|
.addSink(new AdHourDMToCkBatchSink())
|
|
.addSink(new AdHourDMToCkBatchSink())
|
|
.name("sink_ad_hour_dm_clickhouse");
|
|
.name("sink_ad_hour_dm_clickhouse");
|