Browse Source

MOD:添加留存维度信息

cxyu 4 years ago
parent
commit
b51b30de0f

+ 1 - 1
app/api_data/platform_order/order_data_change.py

@@ -28,7 +28,7 @@ def insert_order_data(ymd):
     left join db_mp.h_game d on a.app_id =d.id 
     left join db_mp.h_member f on a.mem_id = f.id
     where c.`type` in (6,7)
-    and a.create_time<{timestamp_tom} and a.create_time>{timestamp_ymd}
+    and a.create_time<={timestamp_tom} and a.create_time>={timestamp_ymd}
     '''
     # print(sql)
     list_ = db.db_mp.get_data_list(sql)

+ 3 - 1
app/api_data/tx_ad_cost/get_cost_game.py

@@ -186,7 +186,9 @@ if __name__ == '__main__':
     # get_data()
 
     thread_list = []
-    get_data('2021-10-07', '2021-10-07', 21768795, '80ddc731c9108817b560273422c8e187')
+    y = [21768795, '', '80ddc731c9108817b560273422c8e187']
+    li = []
+    get_data(y, li, '2021-10-07', '2021-10-07')
     # for i in range(1):
     #     pass
     #     one_tread=threading.Thread(target=get_data)

+ 6 - 0
app/etl/data_stat_run.py

@@ -67,6 +67,12 @@ def daily():
     do_order(st, et)
     do_cost(st, et)
 
+    #TODO:特殊---因为创建角色数量一直在变,故需要每次都跑所有数据------之后引进flink-cdc后则不需要
+    #TODO:暂时让近30天的创建角色正确
+    for i in du.getDateLists(du.get_n_days(-30), et):
+        dw_daily_channel_cost(i)
+        platform_data_sum(i)
+
 
 if __name__ == '__main__':
     logging.basicConfig(

+ 94 - 10
app/etl/data_stat_task.py

@@ -21,12 +21,35 @@ def platform_data_sum(ymd):
 
 
 def dw_daily_channel_cost(ymd):
+    def table_name(datatime_tmp, datatime_realtime):
+        str_year = min(datatime_tmp.tm_year, datatime_realtime.tm_year)
+        str_mon = min(datatime_tmp.tm_mon, datatime_realtime.tm_mon)
+        str_mon = str_mon if str_mon > 9 else '0' + str(str_mon)
+        res = 'h_log_mem_login_{}{}'.format(str_year, str_mon)
+        return res
+
     logging.info("run> dw_daily_channel_cost")
     datatime_ymd = datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))).timetuple()
-    timestamp_ymd = time.mktime(datatime_ymd)
-    tomorrow_ymd = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+    datatime_ymd_tom = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
         days=1)).timetuple()
-    timestamp_tom = time.mktime(tomorrow_ymd)
+    datatime_ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=2)).timetuple()
+    datatime_realtime = datetime.now().timetuple()
+
+    # datatime_str
+    ymd_tom = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=1)).strftime('%Y-%m-%d')
+    ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=2)).strftime('%Y-%m-%d')
+
+    # timestamp
+    timestamp_ymd = time.mktime(datatime_ymd)
+    timestamp_tom = time.mktime(datatime_ymd_tom)
+
+    # table_name
+    table_name_login_today = table_name(datatime_ymd, datatime_realtime)
+    table_name_login_tom = table_name(datatime_ymd_tom, datatime_realtime)
+    table_name_login_tom_after = table_name(datatime_ymd_tom_after, datatime_realtime)
 
     sql = f"""
         select x.dt,x.channel,pitcher,stage,x.platform,x.book,
@@ -36,7 +59,12 @@ def dw_daily_channel_cost(ymd):
                ifnull(platform_view_count,0) platform_view_count,
                ifnull(web_order_count,0) web_order_count,
                if(stage ='趣程15期' or stage ='趣程26期' or stage ='趣程30期','GDT','MP') type
-                ,0 require_roi,0 require_mult,ifnull(y.reg_num,0),ifnull(w.create_user_num,0)
+                ,0 require_roi,0 require_mult,
+                ifnull(y.reg_num,0),ifnull(w.create_user_num,0),
+                v.today_active_user_rate,
+                v.second_stay_rate,
+                v.third_stay_rate,
+                v.game_user_sum
                  from
         ( select dt, channel,stage,pitcher,platform,book  from channel_info_daily cid 
             where dt='{ymd}' and channel !=''
@@ -47,6 +75,7 @@ def dw_daily_channel_cost(ymd):
             ) 
         ) x  -- 只允许渠道MP、GDT
         left join
+        
         (select channel,sum(cost) as cost,sum(view_count) as view_count,sum(valid_click_count) as click_count,
             sum(from_follow_uv)  as follow_user,
          sum(web_view_count) as web_view_count,
@@ -88,7 +117,7 @@ def dw_daily_channel_cost(ymd):
         
         left join     
         (
-        select f.name as channel,DATE(FROM_UNIXTIME(a.create_time)) as wx_date,
+        select f.name as channel,DATE(FROM_UNIXTIME(c.create_time)) as wx_date,
         count(*) as create_user_num
         from db_mp.h_mg_role a
         left join db_mp.h_mem_game b on a.mg_mem_id = b.id
@@ -97,16 +126,65 @@ def dw_daily_channel_cost(ymd):
         left join db_mp.mp_mp_conf e on d.advertiser_conf_id =e.id
         left join quchen_text.advertiser_vx f on e.mp_id = f.wechat_account_id 
         where f.name is not null 
-        and c.create_time > {timestamp_ymd} and c.create_time < {timestamp_tom}
+        and c.create_time >= {timestamp_ymd} and c.create_time <= {timestamp_tom}
         group by f.name,wx_date
         order by wx_date desc
         ) w on x.channel= w.channel
-
+        
+        
+        left join
+        (
+        select   h.name as channel ,
+if(d.ct=0,0,ifnull(e.ct,0)/d.ct) as today_active_user_rate,
+if(a.ct=0,0,ifnull( b.ct,0)/a.ct) as second_stay_rate,
+if(a.ct=0,0,ifnull(c.ct,0)/a.ct) as third_stay_rate,
+d.ct game_user_sum
+from 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_today} 
+   where date = '{ymd}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) a
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_tom} 
+   where date = '{ymd_tom}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) b on a.app_id =b.app_id and a.agent_id =b.agent_id
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_tom_after} 
+   where date = '{ymd_tom_after}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) c on a.app_id =c.app_id and a.agent_id = c.agent_id
+left join 
+(select app_id ,agent_id ,count(*) as ct from db_mp.h_member hm 
+where  create_time <={timestamp_tom}
+group by app_id ,agent_id ) d on a.app_id =d.app_id and a.agent_id =d.agent_id
+left join 
+(select count(distinct(mem_id)) as ct,app_id ,agent_id from db_mp.{table_name_login_today} 
+   where date = '{ymd}' 
+   group by app_id ,agent_id ) e on a.agent_id =e.agent_id and a.app_id =e.app_id
+left join db_mp.mp_conf_agent f on a.app_id =f.app_id and a.agent_id =f.agent_id 
+left join db_mp.mp_mp_conf g on f.advertiser_conf_id = g.id 
+left join quchen_text.advertiser_vx h on g.mp_id = h.wechat_account_id 
+where h.name is not null
+            ) v on x.channel= v.channel
+        
+        
         
         """
     data = db.quchen_text.get_data_list(sql)
+
+
     data1 = []
-    col = "dt,channel,pitcher,stage,platform,book,view_count,click_count,follow_user,cost,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult,reg_num,create_user_num"
+    col = "dt,channel,pitcher,stage,platform,book,view_count,click_count,follow_user,cost,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult,reg_num,create_user_num,today_active_user_rate,second_stay_rate,third_stay_rate,game_user_sum"
     for i in data:
         i[0] = str(i[0])
         i[9] = str(i[9])
@@ -117,12 +195,16 @@ def dw_daily_channel_cost(ymd):
         i[10] = float(i[10])
         i[11] = float(i[11])
         i[12] = float(i[12])
+        i[18] = float(i[18]) if i[18] else 0
+        i[19] = float(i[19]) if i[19] else 0
+        i[20] = float(i[20]) if i[20] else 0
+        i[21] = float(i[21]) if i[21] else 0
+
         data1.append(tuple(i))
     ck.execute(f"alter table game_data.dw_daily_channel_cost drop  partition '{ymd}' ")
     logging.info(len(data1))
     ck.insertMany("game_data.dw_daily_channel_cost", col, tuple(data1))
 
-
 def channel_by_account_daily(ymd):
     """返回当天消耗账户对应的公众号表"""
     logging.info("run> channel_by_account_daily")
@@ -239,7 +321,9 @@ if __name__ == '__main__':
     # channel_info_daily('2021-02-06')
 
     # channel_by_account_daily('2021-02-05')
-    for i in dt.getDateLists('2021-04-18', '2021-10-28'):
+    # dw_daily_channel_cost('2021-10-18')
+
+    for i in dt.getDateLists('2021-09-08', '2021-11-28'):
         print(i)
         channel_info_daily(i)