cxyu 3 tahun lalu
induk
melakukan
c7a14378aa
2 mengubah file dengan 86 tambahan dan 50 penghapusan
  1. 79 43
      data_manage/public_analysis.py
  2. 7 7
      handlers/PublicAnalysisHandler.py

+ 79 - 43
data_manage/public_analysis.py

@@ -615,7 +615,7 @@ def advertisement_rank(user_id, start, end, type, page, page_size, order, order_
 
 
 def idea_rank(user_id, start, end, page, page_size, order, order_by, book, channel, is_singleimg,
-              is_video, labels, collect, data_type):
+              is_video, label_ids, collect, data_type):
     # TODO:修改为clickhouse来进行数据访问
 
     # 时间的归因-----获取到所有这段时间内的记录,并进行聚合(聚合周末再做,先把数据拿出来)
@@ -633,25 +633,26 @@ def idea_rank(user_id, start, end, page, page_size, order, order_by, book, chann
     op4 = f" and channel='{channel}'" if channel else ''
 
     op10 = f" and book='{book}'" if book else ''
-    # TODO:添加标签相关处理------id与对应计划进行--对应
-
     op11 = f" and image_id like '%,%' " if not is_singleimg else ''
-    op12 = f" and is_video" if is_video else ''  # 进行对应修改1----图片
+    op12 = f" and is_video" if is_video == 2 else ' and not is_video'  # 进行对应修改1----图片
+
+    op_label = f" and label_id in ({label_ids}) " if label_ids else ''
+    op_label2 = f' and labels is not null' if label_ids else ''
 
     # 公共数据,和素材库一样,个人只显示个人(小组)数据
-    # TODO:之后op_or1 变化为owner来限制,------dw_image_cost_day 生成时就根据dt,cost来归类owner
     op_or1 = f' or (dt<date_add(now(),interval -5 day) or cost>5000) ' if data_type == 'all' else ''
     # clicktimes,view_times,consume_amount,click_rate---------数据进行一一对应
     if order_by == 'click_times':
-        order_by = 'click_count'
+        order_by = 'clickTimes'
     if order_by == 'view_times':
-        order_by = 'view_count'
+        order_by = 'viewTimes'
     if order_by == 'consume_amount':
-        order_by = 'cost'
+        order_by = 'consumeAmount'
     if order_by == 'click_rate':
-        order_by = 'if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,round(click_count / view_count,2)) '
+        # order_by = 'if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,round(click_count / view_count,2)) '
+        order_by = 'clickRate'
     if order_by == 'create_time' or order_by == 'start_date':
-        order_by = 'cost'
+        order_by = 'consumeAmount'
 
     op_order = f" order by {order_by}  {order}" if order_by and order else ''
 
@@ -661,9 +662,10 @@ def idea_rank(user_id, start, end, page, page_size, order, order_by, book, chann
 
     db = MysqlUtils().dm
 
-    sql = f"""
-    select 
-    row_number () over() as id,
+    sql = f'''
+    select foo.*,foo2.labels as labels from 
+    (select 
+    campaign_id as id,
     book as novels,
     dt as startDate,
     date_format( now(),'%Y-%m-%d') as endDate,
@@ -672,7 +674,6 @@ def idea_rank(user_id, start, end, page, page_size, order, order_by, book, chann
     owner as creator,
     0 as delFlag,
     False as isCollected,
-    '' as labels,
     download_path as downloadPath,
     height,
     width,
@@ -697,11 +698,20 @@ def idea_rank(user_id, start, end, page, page_size, order, order_by, book, chann
     video_length as videoLength,
     use_times as userTimes
     from dw_image_cost_day
-    where replace (preview_url,' ,','') !='' 
-    and (1=1 {op1}  {op4}   {op10} {op11}  {op12}  {op_or1}) 
+    where replace (preview_url,' ,','') !=''
+     and (1=1 {op1}  {op_or1}) 
+    {op4}   {op10} {op11}  {op12}  
     {op_time_bigger} {op_time_small} 
-     {op_order} 
-    """
+    ) as foo
+    left join  (select a.campaign_id,group_concat(b.label) as labels from label_database a
+                left join ads_label b on a.label_id=b.id
+                where 1=1 {op_label}
+                group by a.campaign_id ) as foo2 on  foo.id= foo2.campaign_id
+    where 1=1 {op_label2}
+        {op_order} 
+
+'''
+
     print(sql)
     data, total = getLimitData(db, sql, page, page_size)
     data = {'records': data, 'total': total, 'size': page_size, 'current': page, 'pages': int(total / page_size) + 1}
@@ -709,7 +719,7 @@ def idea_rank(user_id, start, end, page, page_size, order, order_by, book, chann
 
 
 def media_rank(user_id, start, end, page, page_size, order, order_by, book, channel, is_singleimg,
-               is_video, labels, collect, data_type):
+               is_video, label_ids, collect, data_type):
     # TODO:修改为clickhouse来进行数据访问
 
     # 时间的归因-----获取到所有这段时间内的记录,并进行聚合(聚合周末再做,先把数据拿出来)
@@ -730,22 +740,25 @@ def media_rank(user_id, start, end, page, page_size, order, order_by, book, chan
     # TODO:添加标签相关处理------id与对应计划进行--对应
 
     op11 = f" and image_id like '%,%' " if not is_singleimg else ''
-    op12 = f" and is_video" if is_video else ''  # 进行对应修改1----图片
+    op12 = f" and is_video" if is_video == 2 else ' and not is_video'  # 进行对应修改1----图片
+    op_label = f" and label_id in ({label_ids}) " if label_ids else ''
+    op_label2 = f' and labels is not null' if label_ids else ''
 
     # 公共数据,和素材库一样,个人只显示个人(小组)数据
     # TODO:之后op_or1 变化为owner来限制,------dw_image_cost_day 生成时就根据dt,cost来归类owner
     op_or1 = f' or (dt<date_add(now(),interval -5 day) or cost>5000) ' if data_type == 'all' else ''
     # clicktimes,view_times,consume_amount,click_rate---------数据进行一一对应
     if order_by == 'click_times':
-        order_by = 'click_count'
+        order_by = 'clickTimes'
     if order_by == 'view_times':
-        order_by = 'view_count'
+        order_by = 'viewTimes'
     if order_by == 'consume_amount':
-        order_by = 'cost'
+        order_by = 'consumeAmount'
     if order_by == 'click_rate':
-        order_by = 'if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,click_count / view_count) '
+        # order_by = 'if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,round(click_count / view_count,2)) '
+        order_by = 'clickRate'
     if order_by == 'create_time' or order_by == 'start_date':
-        order_by = 'cost'
+        order_by = 'consumeAmount'
 
     op_order = f" order by {order_by}  {order}" if order_by and order else ''
 
@@ -755,9 +768,11 @@ def media_rank(user_id, start, end, page, page_size, order, order_by, book, chan
 
     db = MysqlUtils().dm
 
-    sql = f"""
+    sql = f'''
+    select foo.*,foo2.labels as labels from 
+    (
     select 
-    row_number () over() as id,
+    campaign_id as id,
     book as novels,
     dt as startDate,
     date_format( now(),'%Y-%m-%d') as endDate,
@@ -766,7 +781,6 @@ def media_rank(user_id, start, end, page, page_size, order, order_by, book, chan
     owner as creator,
     0 as delFlag,
     False as isCollected,
-    '' as labels,
     download_path as downloadPath,
     height,
     width,
@@ -790,11 +804,20 @@ def media_rank(user_id, start, end, page, page_size, order, order_by, book, chan
     video_length as videoLength,
     use_times as userTimes
     from dw_image_cost_day
-    where replace (preview_url,' ,','') !='' 
-    and (1=1 {op1}  {op4}   {op10} {op11}  {op12}  {op_or1}) 
+        where replace (preview_url,' ,','') !=''
+     and (1=1 {op1}  {op_or1}) 
+    {op4}   {op10} {op11}  {op12}  
     {op_time_bigger} {op_time_small} 
-     {op_order} 
-    """
+    ) as foo
+    left join  (select a.campaign_id,group_concat(b.label) as labels from label_database a
+                left join ads_label b on a.label_id=b.id
+                where 1=1 {op_label}
+                group by a.campaign_id ) as foo2 on  foo.id= foo2.campaign_id
+    where 1=1 {op_label2}
+        {op_order} 
+
+'''
+
     print(sql)
     data, total = getLimitData(db, sql, page, page_size)
     data = {'records': data, 'total': total, 'size': page_size, 'current': page, 'pages': int(total / page_size) + 1}
@@ -802,7 +825,7 @@ def media_rank(user_id, start, end, page, page_size, order, order_by, book, chan
 
 
 def content_rank(user_id, start, end, page, page_size, order, order_by, book, channel, is_singleimg,
-                 is_video, labels, collect, data_type):
+                 is_video, label_ids, collect, data_type):
     # TODO:修改为clickhouse来进行数据访问
 
     # 时间的归因-----获取到所有这段时间内的记录,并进行聚合(聚合周末再做,先把数据拿出来)
@@ -818,12 +841,13 @@ def content_rank(user_id, start, end, page, page_size, order, order_by, book, ch
             op1 = f" and pitcher in {str(user)}"
 
     op4 = f" and channel='{channel}'" if channel else ''
-
+    op_label = f" and label_id in ({label_ids}) " if label_ids else ''
+    op_label2 = f' and labels is not null' if label_ids else ''
     op10 = f" and book='{book}'" if book else ''
     # TODO:添加标签相关处理------id与对应计划进行--对应
 
     op11 = f" and image_id like '%,%' " if not is_singleimg else ''
-    op12 = f" and is_video" if is_video else ''  # 进行对应修改1----图片
+    op12 = f" and is_video" if is_video == 2 else ' and not is_video'  # 进行对应修改1----图片
 
     # 公共数据,和素材库一样,个人只显示个人(小组)数据
     # TODO:之后op_or1 变化为owner来限制,------dw_image_cost_day 生成时就根据dt,cost来归类owner
@@ -869,7 +893,7 @@ where   REPLACE(REPLACE(title , CHAR(10), ''), CHAR(13), '')  is not null and le
 
     sql = f"""
    select * from (select 
-    row_number () over() as id,
+    a.campaign_id as id,
     book as novels,
     dt as startDate,
     date_format( now(),'%Y-%m-%d') as endDate,
@@ -877,7 +901,7 @@ where   REPLACE(REPLACE(title , CHAR(10), ''), CHAR(13), '')  is not null and le
     'all' as dataType,
     owner as creator,
     False as isCollected,
-    '' as labels,
+    b.labels as labels,
     if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,round(click_count / view_count,2)) as clickRate,
     round(width/if(height,height,1),2) aspect_ratio,
     cost as consumeAmount,
@@ -890,14 +914,20 @@ where   REPLACE(REPLACE(title , CHAR(10), ''), CHAR(13), '')  is not null and le
     date_format( now(),'%Y-%m-%d %H:%i:%S') as upateTime,
     null as updateBy,
     use_times as userTimes
-    from dw_image_cost_day
+    from 
+     dw_image_cost_day a
+    left join  (select a.campaign_id,group_concat(b.label) as labels from label_database a
+                left join ads_label b on a.label_id=b.id
+                where 1=1  {op_label}
+                group by a.campaign_id ) as b on  a.campaign_id= b.campaign_id
     where     REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') is not null and length(description)>0 
-    and (1=1 {op1}  {op4}   {op10} {op11}  {op12}  {op_or1}) 
+    and (1=1 {op1}  {op_or1})
+    {op4}   {op10} {op11}  {op12}   {op_label2}
     {op_time_bigger} {op_time_small} 
     limit {start_title} , {int(page_size / 2)}) as a
 union all
 select * from (select 
-    row_number () over() as id,
+    a.campaign_id as id,
     book as novels,
     dt as startDate,
     date_format( now(),'%Y-%m-%d') as endDate,
@@ -905,7 +935,7 @@ select * from (select
     'all' as dataType,
     owner as creator,
     False as isCollected,
-    '' as labels,
+    b.labels  as labels,
     if(click_count=0 or view_count =0 or view_count is null or click_count is null,0,round(click_count / view_count,2)) as clickRate,
     round(width/if(height,height,1),2) aspect_ratio,
     cost as consumeAmount,
@@ -918,9 +948,15 @@ select * from (select
     date_format( now(),'%Y-%m-%d %H:%i:%S') as upateTime,
     null as updateBy,
     use_times as userTimes
-    from dw_image_cost_day
+    from 
+     dw_image_cost_day a
+    left join  (select a.campaign_id,group_concat(b.label) as labels from label_database a
+                left join ads_label b on a.label_id=b.id
+                where 1=1   {op_label}
+                group by a.campaign_id ) as b on  a.campaign_id= b.campaign_id
     where  REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') is not null and length(title)>0 
-    and (1=1 {op1}  {op4}   {op10} {op11}  {op12}  {op_or1}) 
+    and (1=1 {op1}   {op_or1}) 
+      {op4}   {op10} {op11}  {op12} {op_label2}
     {op_time_bigger} {op_time_small}  
     limit {start_des} , {int(page_size / 2)}
     ) as b 

+ 7 - 7
handlers/PublicAnalysisHandler.py

@@ -190,7 +190,7 @@ class AdvertisementIdea(BaseHandler):
             arg = self.get_args()
             data_type = arg.get('dataType', 'all')  # 数据是否是个人,个人是private,共有是all
             channel = arg.get('channel')  # 渠道-----朋友圈信息流,抖音,广点通等等,,,,,,选项朋友圈信息流,公众平台流量,先#TODO:先暂时放置
-            labels = arg.get('labels')  # 标签
+            label_ids = arg.get('labels')  # 标签
             collect = arg.get('collect')  # 是否用标签的数据
             page = arg.get('pageNum', 1)
             page_size = arg.get('pageSize', 20)
@@ -200,11 +200,11 @@ class AdvertisementIdea(BaseHandler):
             end = arg.get('endDate')
             book = arg.get('novels')
             is_video = arg.get('type')
-            is_singleimg = arg.get('singleImg')  # 是否是组图-----默认是没有
+            is_singleimg = arg.get('singleImg', True)  # 是否是组图-----默认是没有
             user_id = arg.get('userId', '192')  # TODO:测试默认192
 
             data = idea_rank(user_id, start, end, page, page_size, order, order_by,
-                             book, channel, is_singleimg, is_video, labels, collect, data_type)
+                             book, channel, is_singleimg, is_video, label_ids, collect, data_type)
             self.write_json_tmp_java(data=data)
 
 
@@ -228,7 +228,7 @@ class AdvertisementMedia(BaseHandler):
             end = arg.get('endDate')
             book = arg.get('novels')
             is_video = arg.get('type')
-            is_singleimg = arg.get('singleImg')  # 是否是组图-----默认是没有
+            is_singleimg = arg.get('singleImg', True)  # 是否是组图-----默认是没有
             user_id = arg.get('userId', '192')  # TODO:测试默认192
 
             data = media_rank(user_id, start, end, page, page_size, order, order_by,
@@ -256,7 +256,7 @@ class AdvertisementContent(BaseHandler):
             end = arg.get('endDate')
             book = arg.get('novels')
             is_video = arg.get('type')
-            is_singleimg = arg.get('singleImg')  # 是否是组图-----默认是没有
+            is_singleimg = arg.get('singleImg', True)  # 是否是组图-----默认是没有
             user_id = arg.get('userId', '192')  # TODO:测试默认192
 
             data = content_rank(user_id, start, end, page, page_size, order, order_by,
@@ -266,8 +266,8 @@ class AdvertisementContent(BaseHandler):
 
 class LabelList(BaseHandler):
     def get(self):
-        page=1
-        page_size=1000
+        page = 1
+        page_size = 1000
         data = label_list(page, page_size)
         self.write_json_tmp_java(data=data)