bytedance_info_tmp_insert.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. from model.DateUtils import DateUtils
  2. from model.DataBaseUtils import MysqlUtils, CkUtils
  3. db = MysqlUtils()
  4. du = DateUtils()
  5. ck = CkUtils()
  6. def insert_data():
  7. bt_sql = '''
  8. select b.pitcher,b.channel ,DATE_FORMAT(a.`date`, '%Y-%m-%d'),
  9. round(sum(cost)/100,2) as cost,sum(view_count) as view_count ,
  10. sum(valid_click_count) as click_count from daily_tt a
  11. left join bytedance_pitcher_change b
  12. on a.account_id =b.advertiser_id
  13. group by b.pitcher ,a.`date` ,b.channel
  14. '''
  15. byte_list = db.quchen_text.get_data_list(bt_sql)
  16. isql = "insert into bytedance_info_tmp values (%s,%s,%s,%s,%s,%s)"
  17. db.dm.execute("truncate table bytedance_info_tmp")
  18. db.dm.executeMany(isql, byte_list)
  19. def dw_daily_bytedance_cost(ymd):
  20. dt_sql = f'''
  21. select b.pitcher,b.channel ,a.`date` ,round(sum(cost)/100,2) as cost,sum(view_count) as view_count ,
  22. sum(valid_click_count) as click_count ,c.stage as stage,d.book as book, e.current_platform as platform
  23. from daily_tt a
  24. left join bytedance_pitcher_change b on a.account_id =b.advertiser_id
  25. left join stage_change c on b.channel =c.channel
  26. left join book_change d on b.channel =d.name
  27. left join platform_change e on b.channel = e.name
  28. where a.`date`='{ymd}'
  29. group by b.pitcher ,a.`date` ,b.channel
  30. '''
  31. data_list = db.quchen_text.get_data_list(dt_sql)
  32. byte_list = []
  33. for _ in data_list:
  34. _[2] = str(_[2]) if _[2] else 0
  35. _[3] = round(float(_[3]), 2) if _[3] else 0
  36. _[4] = round(float(_[4]), 2) if _[4] else 0
  37. _[5] = round(float(_[5]), 2) if _[5] else 0
  38. _[6] = str(_[6]) if _[6] else ''
  39. _[7] = str(_[7]) if _[7] else ''
  40. _[8] = str(_[8]) if _[8] else ''
  41. _.append('BYTEDANCE')
  42. byte_list.append(tuple(_))
  43. col = '''pitcher,channel,dt,cost,view_count,click_count,stage,book,platform,type'''
  44. print(byte_list)
  45. ck.execute(f"alter table dw_daily_bytedance_cost drop partition '{ymd}' ")
  46. ck.insertMany("dw_daily_bytedance_cost", col, tuple(byte_list))
  47. def replace_data():
  48. channel_name='BYTEDANCE'
  49. sql = f"""
  50. select
  51. dt1,channel1,pitcher,stage,platform,book,'{channel_name}' type,
  52. order_count,order_user,order_amount,
  53. first_order_count,first_order_user,first_order_amount,
  54. view_count,click_count,follow_user,
  55. cost,reg_order_count,reg_order_user,reg_order_amount,
  56. web_view_count,platform_view_count,web_order_count,
  57. reg_order_user_again,
  58. reg_order_user_again3,
  59. reg_order_user_again4,
  60. reg_order_user_again5,
  61. reg_order_user_again6
  62. from (
  63. select
  64. if(dt4='1970-01-01',dt,dt4) dt1,if(channel4='',channel,channel4) channel1,
  65. pitcher,stage,platform,book,cost,view_count,click_count, ---基础属性和消耗数据
  66. follow_user,web_view_count,platform_view_count,web_order_count,'{channel_name}' type,
  67. require_roi,require_mult,order_count,order_user,order_amount from
  68. (select * from dw_daily_bytedance_cost) aa
  69. left join
  70. (select date as dt4,channel as channel4,count(1) as order_count, ---账面充值
  71. count(distinct user_id) as order_user,sum(amount) as order_amount
  72. from order where status=2 group by date,channel) dd
  73. on dt=dt4 and channel=channel4) a
  74. left outer join (
  75. select sum(if(user_order_count>=2,1,0)) reg_order_user_again,channel channel6,toDate(reg_date) dt6, ---复冲人数
  76. sum(if(user_order_count>=3,1,0)) reg_order_user_again3,
  77. sum(if(user_order_count>=4,1,0)) reg_order_user_again4,
  78. sum(if(user_order_count>=5,1,0)) reg_order_user_again5,
  79. sum(if(user_order_count>=6,1,0)) reg_order_user_again6
  80. from (select formatDateTime(reg_time,'%Y-%m-%d') reg_date,channel,count(1) user_order_count
  81. from order where status=2 group by formatDateTime(reg_time,'%Y-%m-%d') ,user_id,channel) x group by reg_date,channel
  82. ) f on dt1=dt6 and channel1=channel6
  83. left outer join
  84. (
  85. select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt2, ---新用户累计充值数据
  86. channel as channel2,
  87. sum(amount) as reg_order_amount,
  88. count(distinct user_id) as reg_order_user,
  89. count(1) as reg_order_count
  90. from order where status=2 and reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel) b
  91. on dt1=dt2 and channel1=channel2
  92. left outer join
  93. (select date as dt3,channel as channel3,count(1) as first_order_count, ---新用户首日充值
  94. count(distinct user_id) as first_order_user,sum(amount) as first_order_amount
  95. from order where status=2 and toDate(reg_time)=date group by date,channel) c
  96. on dt1=dt3 and channel1=channel3
  97. having order_amount+cost+reg_order_amount>0"""
  98. data = ck.execute(sql)
  99. print(data)
  100. isql = "replace into dw_channel values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  101. # db.dm.execute("truncate table dw_channel")
  102. db.dm.executeMany(isql, data)
  103. def platform_data_sum(ymd):
  104. ck.execute("alter table dw_daily_platform_cost drop partition '{}' ".format(ymd))
  105. sql=f'''
  106. insert into dw_daily_platform_cost
  107. select * from
  108. (select * from dw_daily_bytedance_cost a where dt='{ymd}'
  109. union all
  110. select * from dw_daily_channel_cost b where dt='{ymd}'
  111. AND channel not in (select channel from dw_daily_bytedance_cost a
  112. where dt='{ymd}'))
  113. '''
  114. ck.execute(sql)
  115. if __name__ == '__main__':
  116. dt = DateUtils()
  117. # insert_data()
  118. for i in dt.getDateLists('2021-09-18','2021-09-23'):
  119. print(i)
  120. dw_daily_bytedance_cost(i)
  121. platform_data_sum(i)
  122. exit()
  123. # replace_data()
  124. for i in dt.getDateLists('2019-03-18','2021-10-17'):
  125. print(i)
  126. platform_data_sum(i)