Bläddra i källkod

MOD:留存修改时间定义

cxyu 3 år sedan
förälder
incheckning
b7baab3716
1 ändrade filer med 131 tillägg och 32 borttagningar
  1. 131 32
      app/etl/data_stat_task.py

+ 131 - 32
app/etl/data_stat_task.py

@@ -22,8 +22,9 @@ def platform_data_sum(ymd):
 
 
 def dw_daily_channel_cost(ymd):
 def dw_daily_channel_cost(ymd):
     def table_name(datatime_tmp, datatime_realtime):
     def table_name(datatime_tmp, datatime_realtime):
-        str_year = min(datatime_tmp.tm_year, datatime_realtime.tm_year)
-        str_mon = min(datatime_tmp.tm_mon, datatime_realtime.tm_mon)
+        datatime_use = min(datatime_tmp, datatime_realtime)
+        str_year = datatime_use.tm_year
+        str_mon = datatime_use.tm_mon
         str_mon = str_mon if str_mon > 9 else '0' + str(str_mon)
         str_mon = str_mon if str_mon > 9 else '0' + str(str_mon)
         res = 'h_log_mem_login_{}{}'.format(str_year, str_mon)
         res = 'h_log_mem_login_{}{}'.format(str_year, str_mon)
         return res
         return res
@@ -34,6 +35,17 @@ def dw_daily_channel_cost(ymd):
         days=1)).timetuple()
         days=1)).timetuple()
     datatime_ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
     datatime_ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
         days=2)).timetuple()
         days=2)).timetuple()
+    datatime_ymd_seven_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=7)).timetuple()
+    datatime_ymd_fifteen_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=15)).timetuple()
+    datatime_ymd_thirty_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=30)).timetuple()
+    datatime_ymd_fortyfive_day = (
+                datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+            days=45)).timetuple()
+    datatime_ymd_sixty_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=60)).timetuple()
     datatime_realtime = datetime.now().timetuple()
     datatime_realtime = datetime.now().timetuple()
 
 
     # datatime_str
     # datatime_str
@@ -41,6 +53,16 @@ def dw_daily_channel_cost(ymd):
         days=1)).strftime('%Y-%m-%d')
         days=1)).strftime('%Y-%m-%d')
     ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
     ymd_tom_after = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
         days=2)).strftime('%Y-%m-%d')
         days=2)).strftime('%Y-%m-%d')
+    ymd_seven_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=7)).strftime('%Y-%m-%d')
+    ymd_fifteen_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=15)).strftime('%Y-%m-%d')
+    ymd_thirty_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=30)).strftime('%Y-%m-%d')
+    ymd_fortyfive_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=45)).strftime('%Y-%m-%d')
+    ymd_sixty_day = (datetime.strptime(ymd, '%Y-%m-%d').astimezone(timezone(timedelta(hours=8))) + timedelta(
+        days=60)).strftime('%Y-%m-%d')
 
 
     # timestamp
     # timestamp
     timestamp_ymd = time.mktime(datatime_ymd)
     timestamp_ymd = time.mktime(datatime_ymd)
@@ -50,6 +72,11 @@ def dw_daily_channel_cost(ymd):
     table_name_login_today = table_name(datatime_ymd, datatime_realtime)
     table_name_login_today = table_name(datatime_ymd, datatime_realtime)
     table_name_login_tom = table_name(datatime_ymd_tom, datatime_realtime)
     table_name_login_tom = table_name(datatime_ymd_tom, datatime_realtime)
     table_name_login_tom_after = table_name(datatime_ymd_tom_after, datatime_realtime)
     table_name_login_tom_after = table_name(datatime_ymd_tom_after, datatime_realtime)
+    table_name_login_seven_day = table_name(datatime_ymd_seven_day, datatime_realtime)
+    table_name_login_fifteen_day = table_name(datatime_ymd_fifteen_day, datatime_realtime)
+    table_name_login_thirty_day = table_name(datatime_ymd_thirty_day, datatime_realtime)
+    table_name_login_fortyfive_day = table_name(datatime_ymd_fortyfive_day, datatime_realtime)
+    table_name_login_sixty_day = table_name(datatime_ymd_sixty_day, datatime_realtime)
 
 
     sql = f"""
     sql = f"""
         select x.dt,x.channel,pitcher,stage,x.platform,x.book,
         select x.dt,x.channel,pitcher,stage,x.platform,x.book,
@@ -64,6 +91,11 @@ def dw_daily_channel_cost(ymd):
                 v.today_active_user_rate,
                 v.today_active_user_rate,
                 v.second_stay_rate,
                 v.second_stay_rate,
                 v.third_stay_rate,
                 v.third_stay_rate,
+                v.seven_stay_rate, 
+                v.fifteen_stay_rate,
+                v.thirty_stay_rate,
+                v.fortyfive_stay_rate,
+                v.sixty_stay_rate,
                 v.game_user_sum
                 v.game_user_sum
                  from
                  from
         ( select dt, channel,stage,pitcher,platform,book  from channel_info_daily cid 
         ( select dt, channel,stage,pitcher,platform,book  from channel_info_daily cid 
@@ -101,33 +133,40 @@ def dw_daily_channel_cost(ymd):
       
       
        left join     
        left join     
         (
         (
-        select c.name as channel ,DATE(FROM_UNIXTIME(origin.create_time)) as wx_date,
+        select h.name as channel ,DATE(FROM_UNIXTIME(origin.create_time)) as wx_date,
         count(*) as reg_num
         count(*) as reg_num
         from 
         from 
         db_mp.h_member origin left join
         db_mp.h_member origin left join
-        db_mp.mp_conf_agent a  on origin.app_id =a.app_id and origin.agent_id = a.agent_id 
-        left join
-        db_mp.mp_mp_conf b on a.advertiser_conf_id =b.id
-        left join 
-        quchen_text.advertiser_vx c on b.mp_id =c.wechat_account_id 
-        where c.wechat_account_id is not null
+        (select a.name,d.app_id,d.agent_id from quchen_text.advertiser_vx a
+			left join db_mp.h_game b on a.book = b.name 
+			left join db_mp.mp_mp_conf c on a.name= c.wx_name 
+			left join db_mp.mp_conf_agent d on c.id=d.advertiser_conf_id 
+			where d.app_id =b.id
+			group by d.app_id ,d.agent_id ) h 
+			on origin.app_id = h.app_id and origin.agent_id = h.agent_id
+        where h.name is not null
         and origin.create_time > {timestamp_ymd} and origin.create_time < {timestamp_tom}
         and origin.create_time > {timestamp_ymd} and origin.create_time < {timestamp_tom}
         group by name,wx_date
         group by name,wx_date
         ) y on x.channel= y.channel
         ) y on x.channel= y.channel
         
         
         left join     
         left join     
         (
         (
-        select f.name as channel,DATE(FROM_UNIXTIME(c.create_time)) as wx_date,
+        select h.name as channel,DATE(FROM_UNIXTIME(c.create_time)) as wx_date,
         count(*) as create_user_num
         count(*) as create_user_num
         from db_mp.h_mg_role a
         from db_mp.h_mg_role a
         left join db_mp.h_mem_game b on a.mg_mem_id = b.id
         left join db_mp.h_mem_game b on a.mg_mem_id = b.id
         left join db_mp.h_member c on b.mem_id = c.id
         left join db_mp.h_member c on b.mem_id = c.id
-        left join db_mp.mp_conf_agent d on c.app_id = d.app_id  and c.agent_id = d.agent_id 
-        left join db_mp.mp_mp_conf e on d.advertiser_conf_id =e.id
-        left join quchen_text.advertiser_vx f on e.mp_id = f.wechat_account_id 
-        where f.name is not null 
+        left join
+        (select a.name,d.app_id,d.agent_id from quchen_text.advertiser_vx a
+			left join db_mp.h_game b on a.book = b.name 
+			left join db_mp.mp_mp_conf c on a.name= c.wx_name 
+			left join db_mp.mp_conf_agent d on c.id=d.advertiser_conf_id 
+			where d.app_id =b.id
+			group by d.app_id ,d.agent_id ) h 
+			on c.app_id = h.app_id and c.agent_id = h.agent_id
+        where h.name is not null
         and c.create_time >= {timestamp_ymd} and c.create_time <= {timestamp_tom}
         and c.create_time >= {timestamp_ymd} and c.create_time <= {timestamp_tom}
-        group by f.name,wx_date
+        group by h.name,wx_date
         order by wx_date desc
         order by wx_date desc
         ) w on x.channel= w.channel
         ) w on x.channel= w.channel
         
         
@@ -135,13 +174,20 @@ def dw_daily_channel_cost(ymd):
         left join
         left join
         (
         (
         select channel ,
         select channel ,
-if(sum(d_ct)=0,0,ifnull(sum(e_ct),0)/sum(d_ct)) as today_active_user_rate,
+if(max(d_ct)=0,0,ifnull(max(e_ct),0)/max(d_ct)) as today_active_user_rate,
 if(sum(a_ct)=0,0,ifnull(sum(b_ct),0)/sum(a_ct)) as second_stay_rate,
 if(sum(a_ct)=0,0,ifnull(sum(b_ct),0)/sum(a_ct)) as second_stay_rate,
 if(sum(a_ct)=0,0,ifnull(sum(c_ct),0)/sum(a_ct)) as third_stay_rate,
 if(sum(a_ct)=0,0,ifnull(sum(c_ct),0)/sum(a_ct)) as third_stay_rate,
-sum(d_ct) game_user_sum from 
+if(sum(a_ct)=0,0,ifnull(sum(seven_ct),0)/sum(a_ct)) as seven_stay_rate,
+if(sum(a_ct)=0,0,ifnull(sum(fifteen_ct),0)/sum(a_ct)) as fifteen_stay_rate,
+if(sum(a_ct)=0,0,ifnull(sum(thirty_ct),0)/sum(a_ct)) as thirty_stay_rate,
+if(sum(a_ct)=0,0,ifnull(sum(fortyfive_ct),0)/sum(a_ct)) as fortyfive_stay_rate,
+if(sum(a_ct)=0,0,ifnull(sum(sixty_ct),0)/sum(a_ct)) as sixty_stay_rate,
+max(d_ct) game_user_sum from 
 
 
 (select   h.name as channel ,
 (select   h.name as channel ,
-a.ct as a_ct,b.ct as b_ct,c.ct as c_ct,d.ct as d_ct,e.ct as e_ct
+a.ct as a_ct,b.ct as b_ct,c.ct as c_ct,d.ct as d_ct,e.ct as e_ct,
+seven.ct as seven_ct,fifteen.ct as fifteen_ct,thirty.ct as thirty_ct,
+fortyfive.ct as fortyfive_ct,sixty.ct as sixty_ct
 from 
 from 
 (select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
 (select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
 db_mp.h_member  a 
 db_mp.h_member  a 
@@ -167,28 +213,73 @@ where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
 and b.mem_id is not null
 and b.mem_id is not null
 group by a.app_id ,a.agent_id ) c on a.app_id =c.app_id and a.agent_id = c.agent_id
 group by a.app_id ,a.agent_id ) c on a.app_id =c.app_id and a.agent_id = c.agent_id
 left join 
 left join 
-(select app_id ,agent_id ,count(*) as ct from db_mp.h_member hm 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_seven_day} 
+   where date = '{ymd_seven_day}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) seven on a.app_id =seven.app_id and a.agent_id = seven.agent_id
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_fifteen_day} 
+   where date = '{ymd_fifteen_day}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) fifteen on a.app_id =fifteen.app_id and a.agent_id = fifteen.agent_id
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_thirty_day} 
+   where date = '{ymd_thirty_day}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) thirty on a.app_id =thirty.app_id and a.agent_id = thirty.agent_id
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_fortyfive_day} 
+   where date = '{ymd_fortyfive_day}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) fortyfive on a.app_id =fortyfive.app_id and a.agent_id = fortyfive.agent_id
+left join 
+(select '{ymd}',a.app_id,a.agent_id,count(*) as ct from 
+db_mp.h_member  a 
+left join (select distinct(mem_id)  from db_mp.{table_name_login_sixty_day} 
+   where date = '{ymd_sixty_day}' )  b on a.id=b.mem_id  
+where a.create_time >={timestamp_ymd} and a.create_time <={timestamp_tom}
+and b.mem_id is not null
+group by a.app_id ,a.agent_id ) sixty on a.app_id =sixty.app_id and a.agent_id = sixty.agent_id
+left join 
+(select app_id  ,count(*) as ct from db_mp.h_member hm 
 where  create_time <={timestamp_tom}
 where  create_time <={timestamp_tom}
-group by app_id ,agent_id ) d on a.app_id =d.app_id and a.agent_id =d.agent_id
+group by app_id  ) d on a.app_id =d.app_id
 left join 
 left join 
-(select count(distinct(mem_id)) as ct,app_id ,agent_id from db_mp.{table_name_login_today} 
+(select count(distinct(mem_id)) as ct,app_id  from db_mp.{table_name_login_today} 
    where date = '{ymd}' 
    where date = '{ymd}' 
-   group by app_id ,agent_id ) e on a.agent_id =e.agent_id and a.app_id =e.app_id
-left join db_mp.mp_conf_agent f on a.app_id =f.app_id and a.agent_id =f.agent_id 
-left join db_mp.mp_mp_conf g on f.advertiser_conf_id = g.id 
-left join quchen_text.advertiser_vx h on g.mp_id = h.wechat_account_id 
+   group by app_id  ) e on  a.app_id =e.app_id
+left join 
+(select a.name as name,d.app_id as app_id ,d.agent_id as agent_id
+			from quchen_text.advertiser_vx a
+			left join db_mp.h_game b on a.book = b.name 
+			left join db_mp.mp_mp_conf c on a.name= c.wx_name 
+			left join db_mp.mp_conf_agent d on c.id=d.advertiser_conf_id 
+where d.app_id =b.id
+group by d.app_id ,d.agent_id) h on a.app_id = h.app_id and a.agent_id = h.agent_id
 where h.name is not null)  as keep_data
 where h.name is not null)  as keep_data
 group by channel)
 group by channel)
             v on x.channel= v.channel
             v on x.channel= v.channel
         
         
         
         
         """
         """
-    # print(sql)
+    print(sql)
     data = db.quchen_text.get_data_list(sql)
     data = db.quchen_text.get_data_list(sql)
 
 
 
 
     data1 = []
     data1 = []
-    col = "dt,channel,pitcher,stage,platform,book,view_count,click_count,follow_user,cost,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult,reg_num,create_user_num,today_active_user_rate,second_stay_rate,third_stay_rate,game_user_sum"
+    col = "dt,channel,pitcher,stage,platform,book,view_count,click_count,follow_user,cost,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult,reg_num,create_user_num,today_active_user_rate,second_stay_rate,third_stay_rate,seven_stay_rate,fifteen_stay_rate,thirty_stay_rate,fortyfive_stay_rate,sixty_stay_rate,game_user_sum"
     for i in data:
     for i in data:
         i[0] = str(i[0])
         i[0] = str(i[0])
         i[9] = str(i[9])
         i[9] = str(i[9])
@@ -203,12 +294,19 @@ group by channel)
         i[19] = float(i[19]) if i[19] else 0
         i[19] = float(i[19]) if i[19] else 0
         i[20] = float(i[20]) if i[20] else 0
         i[20] = float(i[20]) if i[20] else 0
         i[21] = float(i[21]) if i[21] else 0
         i[21] = float(i[21]) if i[21] else 0
-
+        i[22] = float(i[22]) if i[22] else 0
+        i[23] = float(i[23]) if i[23] else 0
+        i[24] = float(i[24]) if i[24] else 0
+        i[25] = float(i[25]) if i[25] else 0
+        i[26] = float(i[26]) if i[26] else 0
         data1.append(tuple(i))
         data1.append(tuple(i))
+    for _ in data1:
+        print(_)
     ck.execute(f"alter table game_data.dw_daily_channel_cost drop  partition '{ymd}' ")
     ck.execute(f"alter table game_data.dw_daily_channel_cost drop  partition '{ymd}' ")
     logging.info(len(data1))
     logging.info(len(data1))
     ck.insertMany("game_data.dw_daily_channel_cost", col, tuple(data1))
     ck.insertMany("game_data.dw_daily_channel_cost", col, tuple(data1))
 
 
+
 def channel_by_account_daily(ymd):
 def channel_by_account_daily(ymd):
     """返回当天消耗账户对应的公众号表"""
     """返回当天消耗账户对应的公众号表"""
     logging.info("run> channel_by_account_daily")
     logging.info("run> channel_by_account_daily")
@@ -251,6 +349,7 @@ def channel_info_daily(ymd):
                 from advertiser_vx 
                 from advertiser_vx 
                 where name is not null  
                 where name is not null  
                 and start_date <= '{ymd}'
                 and start_date <= '{ymd}'
+                and if(end_date is null,1,end_date  >= '{ymd}')
                 group by name,stage,pitcher,platform,book
                 group by name,stage,pitcher,platform,book
                 ) b on a.name=b.name
                 ) b on a.name=b.name
                 """
                 """
@@ -323,12 +422,12 @@ def order_account_text():
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     # channel_info_daily('2021-02-06')
     # channel_info_daily('2021-02-06')
-
+    # dw_daily_channel_cost('2021-10-11')
+    # exit()
     # channel_by_account_daily('2021-02-05')
     # channel_by_account_daily('2021-02-05')
-    dw_daily_channel_cost('2021-11-06')
-    exit()
-    for i in dt.getDateLists('2021-09-08', '2021-11-02'):
+    for i in dt.getDateLists('2021-09-08', '2021-11-13'):
         print(i)
         print(i)
+        channel_by_account_daily(i)
         channel_info_daily(i)
         channel_info_daily(i)
 
 
         dw_daily_channel_cost(i)
         dw_daily_channel_cost(i)