|
@@ -4,6 +4,7 @@ from model.DateUtils import DateUtils
|
|
|
from model.DingTalkUtils import DingTalkUtils
|
|
|
import pandas as pd
|
|
|
from datetime import datetime
|
|
|
+
|
|
|
# logging.getLogger().setLevel(logging.WARNING)
|
|
|
|
|
|
|
|
@@ -119,18 +120,23 @@ def run(dt):
|
|
|
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',
|
|
|
data)
|
|
|
|
|
|
-
|
|
|
-
|
|
|
- #ck对应数据也保存一份
|
|
|
- ck.execute(f'''
|
|
|
- alter table dw_image_cost_day drop partition '{dt}'
|
|
|
+ # ck对应数据也保存一份
|
|
|
+ # 1,获取到所有分区
|
|
|
+ ck_partitions = ck.execute(f'''
|
|
|
+ select partition
|
|
|
+ from `system`.parts p
|
|
|
+ where table='dw_image_cost_day'
|
|
|
+ and `partition` like '%{dt}%'
|
|
|
''')
|
|
|
+ # 2.进行当天相关的分区删除
|
|
|
+ for i in ck_partitions:
|
|
|
+ ck.client.execute(''' alter table dw_image_cost_day drop partition ''' + i[0])
|
|
|
col = ['dt', 'type', 'use_times', 'cost', 'view_count', 'click_count',
|
|
|
'follow_count', 'order_count', 'order_amount', 'title', 'description',
|
|
|
'book', 'platform', 'stage', 'channel', 'pitcher', 'image_id', 'preview_url',
|
|
|
'signature', 'is_video', 'width', 'height', 'size', 'format', 'video_length',
|
|
|
'video_bit_rate', 'video_meta_data', 'download_path']
|
|
|
- #ck存入前进行数据格式化
|
|
|
+ # ck存入前进行数据格式化
|
|
|
for _ in data:
|
|
|
# data= [col if col else 'null' for col in data ]
|
|
|
_[0] = datetime.strptime(_[0], '%Y-%m-%d')
|
|
@@ -151,13 +157,14 @@ def run(dt):
|
|
|
col_str = ','.join(col)
|
|
|
ck.client.execute('insert into dw_image_cost_day ({}) values'.format(col_str), data)
|
|
|
|
|
|
+
|
|
|
def hourly():
|
|
|
try:
|
|
|
logging.info('广告数据清洗,开始')
|
|
|
run(du.getNow())
|
|
|
logging.info('广告数据清洗,结束')
|
|
|
- except:
|
|
|
- DingTalkUtils().send("广告数据清洗失败")
|
|
|
+ except Exception as e:
|
|
|
+ DingTalkUtils().send("广告数据清洗失败\n"+str(e))
|
|
|
|
|
|
|
|
|
def day():
|
|
@@ -171,8 +178,6 @@ def day():
|
|
|
if __name__ == '__main__':
|
|
|
# run('2021-05-18')
|
|
|
|
|
|
- for i in du.getDateLists(du.get_n_days(0), du.get_n_days(0)):
|
|
|
+ for i in du.getDateLists(du.get_n_days(-495), du.get_n_days(0)):
|
|
|
print(i)
|
|
|
run(i)
|
|
|
-
|
|
|
-
|