|
@@ -25,7 +25,7 @@ def run(dt):
|
|
|
left join channel_by_account_daily e on b.account_id=e.account_id and a.dt=e.dt
|
|
|
left join channel_info_daily f on e.channel=f.channel and e.dt=f.dt
|
|
|
left join campaign_info g on b.campaign_id = g.campaign_id
|
|
|
- where a.dt='{dt}' and c.is_video=0 and g.campaign_id is not null
|
|
|
+ where a.dt='{dt}' and (c.is_video=0 or c.is_video is null) and g.campaign_id is not null
|
|
|
group by g.campaign_id
|
|
|
|
|
|
"""
|
|
@@ -42,6 +42,9 @@ def run(dt):
|
|
|
# print(i)
|
|
|
li.extend(i[16].split(','))
|
|
|
# TODO:之后如果一天产生的图片过多,可能超过sql的字符限制
|
|
|
+
|
|
|
+ # TODO:归属人数据有问题
|
|
|
+
|
|
|
# 之后数据使用hive,来进行数据存储
|
|
|
|
|
|
sql3 = f"select image_id,preview_url,signature,width,height,size,`type` from image_info where image_id in ({str(set(li))[1:-1]})"
|
|
@@ -158,8 +161,6 @@ def run(dt):
|
|
|
i = i[:-3] + i[-2:]
|
|
|
data_new.append(i)
|
|
|
data.extend(data_new)
|
|
|
-
|
|
|
-
|
|
|
# 进行数据存储
|
|
|
|
|
|
db.dm.execute(f'delete from dw_image_cost_day where dt="{dt}"')
|
|
@@ -172,48 +173,6 @@ def run(dt):
|
|
|
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',
|
|
|
data)
|
|
|
|
|
|
- # # 图片数据归属权改变
|
|
|
- # # 大于五天的数据进行,消耗超过5000的owner置位null
|
|
|
- # owner_sql = '''
|
|
|
- # UPDATE dw_image_cost_day
|
|
|
- # set owner = null
|
|
|
- # WHERE
|
|
|
- # dt<date_add(now(),interval -5 day) or cost>5000
|
|
|
- # '''
|
|
|
- # db.dm.execute(owner_sql)
|
|
|
-
|
|
|
-
|
|
|
- # ck对应数据也保存一份
|
|
|
- # 1.进行当天相关的分区删除
|
|
|
- ck.client.execute(f''' alter table dw_image_cost_day drop partition '{dt}' ''')
|
|
|
- col = ['dt', 'type', 'use_times', 'cost', 'view_count', 'click_count',
|
|
|
- 'follow_count', 'order_count', 'order_amount', 'title', 'description',
|
|
|
- 'book', 'platform', 'stage', 'channel', 'pitcher', 'image_id', 'preview_url',
|
|
|
- 'signature', 'is_video', 'width', 'height', 'size', 'format', 'video_length',
|
|
|
- 'video_bit_rate', 'video_meta_data', 'download_path','campaign_id', 'owner']
|
|
|
- # ck存入前进行数据格式化
|
|
|
- for _ in data:
|
|
|
- # data= [col if col else 'null' for col in data ]
|
|
|
- _[0] = datetime.strptime(_[0], '%Y-%m-%d')
|
|
|
- _[1] = str(_[1]) if _[1] is not None else None
|
|
|
- _[2] = int(_[2]) if _[2] is not None else None
|
|
|
- _[3] = float(_[3]) if _[3] is not None else None
|
|
|
- _[4] = int(_[4]) if _[4] is not None else None
|
|
|
- _[5] = int(_[5]) if _[5] is not None else None
|
|
|
- _[6] = int(_[6]) if _[6] is not None else None
|
|
|
- _[7] = int(_[7]) if _[7] is not None else None
|
|
|
- _[8] = float(_[8]) if _[8] is not None else None
|
|
|
- _[19] = str(_[19])
|
|
|
- _[20] = str(_[20]) if _[20] is not None else None
|
|
|
- _[21] = str(_[21]) if _[21] is not None else None
|
|
|
- _[22] = str(_[22]) if _[22] is not None else None
|
|
|
- _[23] = str(_[23]) if _[23] is not None else None
|
|
|
-
|
|
|
- col_str = ','.join(col)
|
|
|
- logging.info('ck填充数据进入')
|
|
|
- ck.client.execute('SET max_partitions_per_insert_block=1000;')
|
|
|
- ck.client.execute('insert into dw_image_cost_day ({}) values'.format(col_str), data)
|
|
|
- logging.info('ck填充数据,结束')
|
|
|
|
|
|
|
|
|
def hourly():
|
|
@@ -250,10 +209,12 @@ if __name__ == '__main__':
|
|
|
)
|
|
|
# -495
|
|
|
#
|
|
|
- for i in du.getDateLists(du.get_n_days(-495), du.get_n_days(0)):
|
|
|
- print(i)
|
|
|
- # exit()
|
|
|
- run(i)
|
|
|
+ # for i in du.getDateLists(du.get_n_days(-495), du.get_n_days(0)):
|
|
|
+ # print(i)
|
|
|
+ # # exit()
|
|
|
+ # run(i)
|
|
|
|
|
|
# print(du.get_n_days(-20))
|
|
|
- # run(du.get_n_days(0))
|
|
|
+ run(du.get_n_days(0))
|
|
|
+
|
|
|
+
|