Преглед изворни кода

MOD:素材排行版数据更新

cxyu пре 3 година
родитељ
комит
b8641e1987
1 измењених фајлова са 11 додато и 50 уклоњено
  1. 11 50
      app/etl/dw/dw_image_cost_day.py

+ 11 - 50
app/etl/dw/dw_image_cost_day.py

@@ -25,7 +25,7 @@ def run(dt):
             left join channel_by_account_daily e on b.account_id=e.account_id and a.dt=e.dt
             left join channel_info_daily f on e.channel=f.channel and e.dt=f.dt
             left join campaign_info g on b.campaign_id = g.campaign_id 
-            where a.dt='{dt}'  and c.is_video=0 and g.campaign_id is not null
+            where a.dt='{dt}'  and (c.is_video=0 or c.is_video is null) and g.campaign_id is not null
             group by g.campaign_id 
             
             """
@@ -42,6 +42,9 @@ def run(dt):
         # print(i)
         li.extend(i[16].split(','))
     # TODO:之后如果一天产生的图片过多,可能超过sql的字符限制
+
+    # TODO:归属人数据有问题
+
     # 之后数据使用hive,来进行数据存储
 
     sql3 = f"select image_id,preview_url,signature,width,height,size,`type` from image_info where  image_id in ({str(set(li))[1:-1]})"
@@ -158,8 +161,6 @@ def run(dt):
         i = i[:-3] + i[-2:]
         data_new.append(i)
     data.extend(data_new)
-
-
     # 进行数据存储
 
     db.dm.execute(f'delete from dw_image_cost_day where dt="{dt}"')
@@ -172,48 +173,6 @@ def run(dt):
         values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',
         data)
 
-    # # 图片数据归属权改变
-    # # 大于五天的数据进行,消耗超过5000的owner置位null
-    # owner_sql = '''
-    #     UPDATE dw_image_cost_day
-    #     set owner = null
-    #     WHERE
-    #     dt<date_add(now(),interval -5 day) or cost>5000
-    # '''
-    # db.dm.execute(owner_sql)
-
-
-    # ck对应数据也保存一份
-    # 1.进行当天相关的分区删除
-    ck.client.execute(f''' alter table dw_image_cost_day drop partition '{dt}' ''')
-    col = ['dt', 'type', 'use_times', 'cost', 'view_count', 'click_count',
-           'follow_count', 'order_count', 'order_amount', 'title', 'description',
-           'book', 'platform', 'stage', 'channel', 'pitcher', 'image_id', 'preview_url',
-           'signature', 'is_video', 'width', 'height', 'size', 'format', 'video_length',
-           'video_bit_rate', 'video_meta_data', 'download_path','campaign_id', 'owner']
-    # ck存入前进行数据格式化
-    for _ in data:
-        # data= [col if col else 'null' for col in data ]
-        _[0] = datetime.strptime(_[0], '%Y-%m-%d')
-        _[1] = str(_[1]) if _[1] is not None else None
-        _[2] = int(_[2]) if _[2] is not None else None
-        _[3] = float(_[3]) if _[3] is not None else None
-        _[4] = int(_[4]) if _[4] is not None else None
-        _[5] = int(_[5]) if _[5] is not None else None
-        _[6] = int(_[6]) if _[6] is not None else None
-        _[7] = int(_[7]) if _[7] is not None else None
-        _[8] = float(_[8]) if _[8] is not None else None
-        _[19] = str(_[19])
-        _[20] = str(_[20]) if _[20] is not None else None
-        _[21] = str(_[21]) if _[21] is not None else None
-        _[22] = str(_[22]) if _[22] is not None else None
-        _[23] = str(_[23]) if _[23] is not None else None
-
-    col_str = ','.join(col)
-    logging.info('ck填充数据进入')
-    ck.client.execute('SET max_partitions_per_insert_block=1000;')
-    ck.client.execute('insert into dw_image_cost_day ({}) values'.format(col_str), data)
-    logging.info('ck填充数据,结束')
 
 
 def hourly():
@@ -250,10 +209,12 @@ if __name__ == '__main__':
     )
     # -495
     #
-    for i in du.getDateLists(du.get_n_days(-495), du.get_n_days(0)):
-        print(i)
-        # exit()
-        run(i)
+    # for i in du.getDateLists(du.get_n_days(-495), du.get_n_days(0)):
+    #     print(i)
+    #     # exit()
+    #     run(i)
 
     # print(du.get_n_days(-20))
-    # run(du.get_n_days(0))
+    run(du.get_n_days(0))
+
+