ck 4 years ago
parent
commit
f57d69236f
3 changed files with 192 additions and 73 deletions
  1. 4 1
      app/etl/data_stat_run.py
  2. 0 64
      app/etl/data_stat_task.py
  3. 188 8
      app/etl/dw/dw_channel_daily.py

+ 4 - 1
app/etl/data_stat_run.py

@@ -4,7 +4,7 @@ from model.DingTalkUtils import DingTalkUtils
 from data_stat_task import *
 from sync_to_ck_task import *
 from app.etl.dm.dm_pitcher_daily_overview import dm_pitcher_daily_overview
-from app.etl.dw.dw_channel_daily import dw_daily_channel
+from app.etl.dw.dw_channel_daily import dw_daily_channel,dw_channel
 from app.etl.dw.dw_pitcher_daily import dw_pitcher_trend
 from app.etl.ods.ods_book_info_daily import book_info_daily
 log=logger()
@@ -14,10 +14,12 @@ import sys
 
 def do_order(st,et):
     for i in du.getDateLists(st,et):
+        print(i)
         ods_order(i)
         order_sync_ck(i)
 def do_cost(st,et):
     for i in du.getDateLists(st,et):
+        print(i)
         channel_by_account_daily(i)
         channel_info_daily(i)
         book_info_daily(i)
@@ -30,6 +32,7 @@ def main(st,et):
         do_order(st,et)
         do_cost(st,et)
         dw_daily_channel()
+        dw_channel()
         dw_pitcher_trend()
         dm_pitcher_daily_overview()
     except Exception as e:

+ 0 - 64
app/etl/data_stat_task.py

@@ -149,67 +149,3 @@ def order_account_text():
 
 
 
-
-def   dw_channel_daily_total(ymd):
-    sql=f"""insert into dw_channel_daily_total
-            select '{ymd}' dt,channel,total_cost,total_amount,total_first_amount from
-            (select channel,sum(cost) total_cost  from dw_daily_channel_cost where  dt<='{ymd}'  group by channel)a
-            left outer join
-            (select  channel,sum(amount) total_amount,
-            sum(if(toDate(formatDateTime(reg_time,'%Y-%m-%d'))=date,amount,0)) total_first_amount
-            from order where date<='{ymd}' group by channel) b
-             on a.channel=b.channel 
-    
-            """
-    ck.execute(f"alter table dw_channel_daily_total drop partition '{ymd}'")
-    print(sql)
-    ck.execute(sql)
-
-
-
-
-
-if __name__ == '__main__':
-    for i in dt.getDateLists('2019-03-18','2021-01-27'):
-        channel_by_account_daily(i)
-        channel_info_daily(i)
-
-    # dw_daily_pitcher('2021-01-14')
-    # dw_daily_channel_cost('2021-01-28')
-
-    exit(0)
-    # dw_channel_daily_total('2020-07-20')
-    # channel_by_account_daily('2020-12-17')
-    # dw_daily_channel_cost('2020-12-17')
-    # dw_order_channel_cost_sync_ck('2020-12-17')
-    # exit(0)
-    # ods_order('2020-12-20')
-    # dw_daily_channel_plus()
-    # exit()
-    # dw_daily_channel()
-    # exit(0)
-    # dm_pitcher_daily_page_total()
-    # dm_pitcher_daily_page_total()
-    # exit(0)
-    # dw_channel_daily_total('2021-01-11')
-    # dw_daily_channel_cost('2021-01-12')
-    # dw_channel_daily_total('2021-01-13')
-    # exit(0)
-
-    # dw_daily_channel()
-    # # exit(0)
-    # for i in dt.getDateLists('2019-03-18','2021-01-14'):
-    #     print(i)
-        # dw_channel_daily_total(i)
-        # dw_daily_pitcher(i)
-        # channel_by_account_daily(i)
-    #     dw_daily_channel_cost(i)
-    #     dw_order_channel_cost_sync_ck(i)
-    # dw_daily_channel()
-
-    #     order_sync_ck(today)
-    #     # ods_order(i)
-        # channel_info_daily(i)
-        # dw_daily_channel_cost(i)
-        # dw_order_channel_cost_sync_ck(i)
-        # dw_channel_daily_total(i)

+ 188 - 8
app/etl/dw/dw_channel_daily.py

@@ -17,9 +17,11 @@ select
        dt,channel,pitcher,stage,platform,book,
        order_count,order_user,order_amount,first_order_count,first_order_user,first_order_amount,
        view_count,click_count,follow_user,cost,reg_order_count,reg_order_user,
-       reg_order_amount,reg_order_amount30,web_view_count,platform_view_count,web_order_count,total_cost,
-       total_amount,reg_order_user_again,reg_order_user7,reg_order_user30,reg_order_amount7,type,
-       total_first_amount,require_roi,require_mult
+       reg_order_amount,reg_order_amount30,web_view_count,platform_view_count,web_order_count,
+       0 total_cost,0 total_amount,
+       reg_order_user_again,reg_order_user7,reg_order_user30,reg_order_amount7,type,
+       0 total_first_amount,
+       require_roi,require_mult
 from
 (select dt,channel, pitcher,stage,platform,book,cost,view_count,click_count,follow_user,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult from dw_daily_channel_cost) a
     left outer join
@@ -46,10 +48,6 @@ from
              sum(amount) as order_amount
        from order  group by date,channel) d
         on dt=dt4 and channel=channel4
-left outer join
-     (select dt dt5,channel channel5,total_cost,total_amount,total_first_amount from dw_channel_daily_total ) e
-         on dt=dt5 and channel=channel5
-
 left outer join (
 select sum(if(user_order_count>1,1,0)) reg_order_user_again,channel channel6,toDate(reg_date) dt6  from (
 select formatDateTime(reg_time,'%Y-%m-%d') reg_date,channel,count(1) user_order_count
@@ -72,6 +70,188 @@ select formatDateTime(reg_time,'%Y-%m-%d') reg_date,channel,count(1) user_order_
 
 
 
+def dw_channel_daily():
+    sql="""
+select
+       dt,channel,pitcher,stage,platform,book,type,
+       order_count,order_user,order_amount,
+       first_order_count,first_order_user,first_order_amount,
+       view_count,click_count,follow_user,
+       cost,reg_order_count,reg_order_user,reg_order_amount,
+       web_view_count,platform_view_count,web_order_count,
+       reg_order_user_again,
+       reg_order_user_again3,
+       reg_order_user_again4,
+       reg_order_user_again5,
+       reg_order_user_again6
+from
+(select dt,channel, pitcher,stage,platform,book,cost,view_count,click_count,   ---基础属性和消耗数据
+        follow_user,web_view_count,platform_view_count,web_order_count,type,
+        require_roi,require_mult from dw_daily_channel_cost) a
+
+
+left outer join
+     (select date as dt3,channel as channel3,count(1) as first_order_count,          ---新用户首日充值
+     count(distinct user_id) as first_order_user,sum(amount) as first_order_amount
+    from order where toDate(reg_time)=date  group by date,channel) c
+    on dt=dt3 and channel=channel3
+
+left outer join
+        (select date as dt4,channel as channel4,count(1) as order_count,   ---账面充值
+        count(distinct user_id) as order_user,sum(amount) as order_amount
+        from order  group by date,channel) d
+        on dt=dt4 and channel=channel4
+
+left outer join (
+    select sum(if(user_order_count>=2,1,0)) reg_order_user_again,channel channel6,toDate(reg_date) dt6,  ---复冲人数
+           sum(if(user_order_count>=3,1,0)) reg_order_user_again3,
+           sum(if(user_order_count>=4,1,0)) reg_order_user_again4,
+           sum(if(user_order_count>=5,1,0)) reg_order_user_again5,
+           sum(if(user_order_count>=6,1,0)) reg_order_user_again6
+    from (select formatDateTime(reg_time,'%Y-%m-%d') reg_date,channel,count(1) user_order_count
+    from order group by formatDateTime(reg_time,'%Y-%m-%d') ,user_id,channel) x group by reg_date,channel
+    ) f on dt=dt6 and channel=channel6
+
+left outer join
+   (
+       select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt2,    ---新用户累计充值数据
+   channel as channel2,
+   sum(amount) as reg_order_amount,
+   count(distinct user_id) as reg_order_user,
+   count(1) as reg_order_count
+   from order where reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel) b
+    on dt=dt2 and channel=channel2
+    having order_amount+cost+reg_order_amount>0"""
+
+    data=ck.execute(sql)
+    isql="replace into dw_channel values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+    db.dm.executeMany(isql,data)
+
+def dw_channel_user_daily():
+    sql="""
+select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt,
+            channel,
+           count(distinct if(subtractDays(date, 1)>=reg_time,null,user_id)) dc1,
+           count(distinct if(subtractDays(date, 2)>=reg_time,null,user_id)) dc2,
+           count(distinct if(subtractDays(date, 3)>=reg_time,null,user_id)) dc3,
+           count(distinct if(subtractDays(date, 4)>=reg_time,null,user_id)) dc4,
+           count(distinct if(subtractDays(date, 5)>=reg_time,null,user_id)) dc5,
+           count(distinct if(subtractDays(date, 6)>=reg_time,null,user_id)) dc6,
+           count(distinct if(subtractDays(date, 7)>=reg_time,null,user_id)) dc7,
+           count(distinct if(subtractDays(date, 8)>=reg_time,null,user_id)) dc8,
+           count(distinct if(subtractDays(date, 9)>=reg_time,null,user_id)) dc9,
+           count(distinct if(subtractDays(date, 10)>=reg_time,null,user_id)) dc10,
+           count(distinct if(subtractDays(date, 11)>=reg_time,null,user_id)) dc11,
+           count(distinct if(subtractDays(date, 12)>=reg_time,null,user_id)) dc12,
+           count(distinct if(subtractDays(date, 13)>=reg_time,null,user_id)) dc13,
+           count(distinct if(subtractDays(date, 14)>=reg_time,null,user_id)) dc14,
+           count(distinct if(subtractDays(date, 15)>=reg_time,null,user_id)) dc15,
+           count(distinct if(subtractDays(date, 16)>=reg_time,null,user_id)) dc16,
+           count(distinct if(subtractDays(date, 17)>=reg_time,null,user_id)) dc17,
+           count(distinct if(subtractDays(date, 18)>=reg_time,null,user_id)) dc18,
+           count(distinct if(subtractDays(date, 19)>=reg_time,null,user_id)) dc19,
+           count(distinct if(subtractDays(date, 20)>=reg_time,null,user_id)) dc20,
+           count(distinct if(subtractDays(date, 21)>=reg_time,null,user_id)) dc21,
+           count(distinct if(subtractDays(date, 22)>=reg_time,null,user_id)) dc22,
+           count(distinct if(subtractDays(date, 23)>=reg_time,null,user_id)) dc23,
+           count(distinct if(subtractDays(date, 24)>=reg_time,null,user_id)) dc24,
+           count(distinct if(subtractDays(date, 25)>=reg_time,null,user_id)) dc25,
+           count(distinct if(subtractDays(date, 26)>=reg_time,null,user_id)) dc26,
+           count(distinct if(subtractDays(date, 27)>=reg_time,null,user_id)) dc27,
+           count(distinct if(subtractDays(date, 28)>=reg_time,null,user_id)) dc28,
+           count(distinct if(subtractDays(date, 29)>=reg_time,null,user_id)) dc29,
+           count(distinct if(subtractDays(date, 30)>=reg_time,null,user_id)) dc30
+from order where reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel"""
+
+    data =ck.execute(sql)
+    isql="replace into dw_channel_user_daily values " \
+         "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
+         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
+         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+    db.dm.executeMany(isql,data)
+
+def dw_channel_amount_daily():
+    sql="""
+ select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt,
+   channel as channel,
+           sum(if(subtractDays(date, 1)>=reg_time,0,amount)) as da1,
+           sum(if(subtractDays(date, 2)>=reg_time,0,amount)) as da2,
+           sum(if(subtractDays(date, 3)>=reg_time,0,amount)) as da3,
+           sum(if(subtractDays(date, 4)>=reg_time,0,amount)) as da4,
+           sum(if(subtractDays(date, 5)>=reg_time,0,amount)) as da5,
+           sum(if(subtractDays(date, 6)>=reg_time,0,amount)) as da6,
+           sum(if(subtractDays(date, 7)>=reg_time,0,amount)) as da7,
+           sum(if(subtractDays(date, 8)>=reg_time,0,amount)) as da8,
+           sum(if(subtractDays(date, 9)>=reg_time,0,amount)) as da9,
+           sum(if(subtractDays(date, 10)>=reg_time,0,amount)) as da10,
+           sum(if(subtractDays(date, 11)>=reg_time,0,amount)) as da11,
+           sum(if(subtractDays(date, 12)>=reg_time,0,amount)) as da12,
+           sum(if(subtractDays(date, 13)>=reg_time,0,amount)) as da13,
+           sum(if(subtractDays(date, 14)>=reg_time,0,amount)) as da14,
+           sum(if(subtractDays(date, 15)>=reg_time,0,amount)) as da15,
+           sum(if(subtractDays(date, 16)>=reg_time,0,amount)) as da16,
+           sum(if(subtractDays(date, 17)>=reg_time,0,amount)) as da17,
+           sum(if(subtractDays(date, 18)>=reg_time,0,amount)) as da18,
+           sum(if(subtractDays(date, 19)>=reg_time,0,amount)) as da19,
+           sum(if(subtractDays(date, 20)>=reg_time,0,amount)) as da20,
+           sum(if(subtractDays(date, 21)>=reg_time,0,amount)) as da21,
+           sum(if(subtractDays(date, 22)>=reg_time,0,amount)) as da22,
+           sum(if(subtractDays(date, 23)>=reg_time,0,amount)) as da23,
+           sum(if(subtractDays(date, 24)>=reg_time,0,amount)) as da24,
+           sum(if(subtractDays(date, 25)>=reg_time,0,amount)) as da25,
+           sum(if(subtractDays(date, 26)>=reg_time,0,amount)) as da26,
+           sum(if(subtractDays(date, 27)>=reg_time,0,amount)) as da27,
+           sum(if(subtractDays(date, 28)>=reg_time,0,amount)) as da28,
+           sum(if(subtractDays(date, 29)>=reg_time,0,amount)) as da29,
+           sum(if(subtractDays(date, 30)>=reg_time,0,amount)) as da30,
+           sum(if(subtractDays(date, 31)>=reg_time,0,amount)) as da31,
+           sum(if(subtractDays(date, 32)>=reg_time,0,amount)) as da32,
+           sum(if(subtractDays(date, 33)>=reg_time,0,amount)) as da33,
+           sum(if(subtractDays(date, 34)>=reg_time,0,amount)) as da34,
+           sum(if(subtractDays(date, 35)>=reg_time,0,amount)) as da35,
+           sum(if(subtractDays(date, 36)>=reg_time,0,amount)) as da36,
+           sum(if(subtractDays(date, 37)>=reg_time,0,amount)) as da37,
+           sum(if(subtractDays(date, 38)>=reg_time,0,amount)) as da38,
+           sum(if(subtractDays(date, 39)>=reg_time,0,amount)) as da39,
+           sum(if(subtractDays(date, 40)>=reg_time,0,amount)) as da40,
+           sum(if(subtractDays(date, 41)>=reg_time,0,amount)) as da41,
+           sum(if(subtractDays(date, 42)>=reg_time,0,amount)) as da42,
+           sum(if(subtractDays(date, 43)>=reg_time,0,amount)) as da43,
+           sum(if(subtractDays(date, 44)>=reg_time,0,amount)) as da44,
+           sum(if(subtractDays(date, 45)>=reg_time,0,amount)) as da45,
+           sum(if(subtractDays(date, 46)>=reg_time,0,amount)) as da46,
+           sum(if(subtractDays(date, 47)>=reg_time,0,amount)) as da47,
+           sum(if(subtractDays(date, 48)>=reg_time,0,amount)) as da48,
+           sum(if(subtractDays(date, 49)>=reg_time,0,amount)) as da49,
+           sum(if(subtractDays(date, 50)>=reg_time,0,amount)) as da50,
+           sum(if(subtractDays(date, 51)>=reg_time,0,amount)) as da51,
+           sum(if(subtractDays(date, 52)>=reg_time,0,amount)) as da52,
+           sum(if(subtractDays(date, 53)>=reg_time,0,amount)) as da53,
+           sum(if(subtractDays(date, 54)>=reg_time,0,amount)) as da54,
+           sum(if(subtractDays(date, 55)>=reg_time,0,amount)) as da55,
+           sum(if(subtractDays(date, 56)>=reg_time,0,amount)) as da56,
+           sum(if(subtractDays(date, 57)>=reg_time,0,amount)) as da57,
+           sum(if(subtractDays(date, 58)>=reg_time,0,amount)) as da58,
+           sum(if(subtractDays(date, 59)>=reg_time,0,amount)) as da59,
+           sum(if(subtractDays(date, 60)>=reg_time,0,amount)) as da60,
+           sum(if(subtractDays(date, 90)>=reg_time,0,amount)) as dm3,
+           sum(if(subtractDays(date, 120)>=reg_time,0,amount)) as dm4,
+           sum(if(subtractDays(date, 150)>=reg_time,0,amount)) as dm5
+ from order where reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel"""
+    data=ck.execute(sql)
+    isql="replace into dw_channel_amount_daily values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
+         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+
+    db.dm.executeMany(isql,data)
+
+
+def dw_channel():
+    dw_channel_daily()
+    dw_channel_user_daily()
+    dw_channel_amount_daily()
+
+
 if __name__ == '__main__':
 
-    dw_daily_channel()
+    # dw_daily_channel()
+    dw_channel()