data_stat_task.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. from model.DataBaseUtils import MysqlUtils, CkUtils
  2. from model.DateUtils import DateUtils
  3. import logging
  4. db = MysqlUtils()
  5. ck = CkUtils()
  6. dt = DateUtils()
  7. def dw_daily_bytedance_cost(ymd):
  8. logging.info(f'dw_daily_bytedance_cost 数据填充开始')
  9. dt_sql = f'''
  10. select b.pitcher,b.channel ,a.`date` ,round(sum(cost)/100,2) as cost,sum(view_count) as view_count ,
  11. sum(valid_click_count) as click_count ,c.stage as stage,d.book as book, e.current_platform as platform
  12. from daily_tt a
  13. left join bytedance_pitcher_change b on a.account_id =b.advertiser_id
  14. left join stage_change c on b.channel =c.channel
  15. left join book_change d on b.channel =d.name
  16. left join platform_change e on b.channel = e.name
  17. where a.`date`='{ymd}'
  18. group by b.pitcher ,a.`date` ,b.channel
  19. '''
  20. data_list = db.quchen_text.get_data_list(dt_sql)
  21. byte_list = []
  22. for _ in data_list:
  23. _[2] = str(_[2]) if _[2] else 0
  24. _[3] = round(float(_[3]), 2) if _[3] else 0
  25. _[4] = round(float(_[4]), 2) if _[4] else 0
  26. _[5] = round(float(_[5]), 2) if _[5] else 0
  27. _[6] = str(_[6]) if _[6] else ''
  28. _[7] = str(_[7]) if _[7] else ''
  29. _[8] = str(_[8]) if _[8] else ''
  30. _.append('BYTEDANCE')
  31. byte_list.append(tuple(_))
  32. col = '''pitcher,channel,dt,cost,view_count,click_count,stage,book,platform,type'''
  33. logging.info(f'dw_daily_bytedance_cost add info {ymd}')
  34. ck.execute(f"alter table dw_daily_bytedance_cost drop partition '{ymd}' ")
  35. ck.insertMany("dw_daily_bytedance_cost", col, tuple(byte_list))
  36. logging.info(f'dw_daily_bytedance_cost 数据填充结束')
  37. def platform_data_sum(ymd):
  38. logging.info('dw_daily_platform_cost开始数据更新')
  39. ck.execute("alter table dw_daily_platform_cost drop partition '{}' ".format(ymd))
  40. sql=f'''
  41. insert into dw_daily_platform_cost
  42. select * from
  43. (select * from dw_daily_bytedance_cost a where dt='{ymd}'
  44. union all
  45. select * from dw_daily_channel_cost b where dt='{ymd}'
  46. AND channel not in (select channel from dw_daily_bytedance_cost a
  47. where dt='{ymd}'))
  48. '''
  49. ck.execute(sql)
  50. logging.info('dw_daily_platform_cost数据更新,结束')
  51. def dw_daily_channel_cost(ymd):
  52. logging.info("run> dw_daily_channel_cost")
  53. sql = """
  54. select x.dt,x.channel,pitcher,stage,x.platform,x.book,
  55. ifnull(view_count,0),ifnull(click_count,0),
  56. ifnull(follow_user,0),ifnull(cost,0)/100 as cost,
  57. ifnull(web_view_count,0) web_view_count,
  58. ifnull(platform_view_count,0) platform_view_count,
  59. ifnull(web_order_count,0) web_order_count,
  60. if(stage ='趣程15期' or stage ='趣程26期' or stage ='趣程30期','GDT','MP') type
  61. ,0 require_roi,0 require_mult
  62. from
  63. (select dt,channel,stage,pitcher,platform,book from channel_info_daily where dt='{0}' and channel!='' ) x
  64. left join
  65. (select channel,sum(cost) as cost,sum(view_count) as view_count,sum(valid_click_count) as click_count,sum(from_follow_uv) as follow_user,
  66. sum(web_view_count) as web_view_count,sum(platform_view_count) as platform_view_count,sum(web_order_count) as web_order_count
  67. from
  68. (select account_id,cost,view_count,valid_click_count,round(valid_click_count*official_account_follow_rate,0) as from_follow_uv,
  69. 0 as web_view_count,
  70. 0 as platform_view_count,
  71. 0 as web_order_count
  72. from daily_vx where date='{0} 00:00:00'
  73. union
  74. select account_id,cost,view_count,valid_click_count,from_follow_uv,
  75. ifnull(web_commodity_page_view_count,0) as web_view_count,
  76. ifnull(platform_page_view_count,0) as platform_view_count,
  77. ifnull(web_order_count,0) as web_order_count
  78. from daily_qq where date='{0} 00:00:00' ) a
  79. left join
  80. (select account_id,channel from channel_by_account_daily where dt='{0}') b
  81. on a.account_id=b.account_id group by channel)
  82. z on x.channel=z.channel
  83. """.format(ymd)
  84. data = db.quchen_text.get_data_list(sql)
  85. data1 = []
  86. col = "dt,channel,pitcher,stage,platform,book,view_count,click_count,follow_user,cost,web_view_count,platform_view_count,web_order_count,type,require_roi,require_mult"
  87. for i in data:
  88. i[0] = str(i[0])
  89. i[9] = str(i[9])
  90. i[6] = float(i[6])
  91. i[7] = float(i[7])
  92. i[8] = float(i[8])
  93. i[9] = float(i[9])
  94. i[10] = float(i[10])
  95. i[11] = float(i[11])
  96. i[12] = float(i[12])
  97. data1.append(tuple(i))
  98. ck.execute(f"alter table dw_daily_channel_cost drop partition '{ymd}' ")
  99. logging.info(len(data1))
  100. ck.insertMany("dw_daily_channel_cost", col, tuple(data1))
  101. def channel_by_account_daily(ymd):
  102. """返回当天消耗账户对应的公众号表"""
  103. logging.info("run> channel_by_account_daily")
  104. sql = """replace into channel_by_account_daily
  105. select '{0}' as dt,a.account_id as account_id, ifnull(ifnull(b.name,a.name),'') as channel,type from
  106. (select account_id,name,'GDT' type from advertiser_qq
  107. union
  108. select account_id,name,'MP' type from advertiser_vx
  109. ) a
  110. left join
  111. (select b.account_id,b.name from
  112. (select min(end_time) as end_time,account_id from account_change where end_time>'{0}' GROUP BY account_id) a
  113. left join account_change b on a.end_time=b.end_time and a.account_id=b.account_id) b on a.account_id=b.account_id""".format(
  114. ymd)
  115. db.quchen_text.execute(sql)
  116. def channel_info_daily(ymd):
  117. """获取公众号某天的期数,投手,平台,书籍
  118. @ return [[]]
  119. """
  120. # 获取现在的全量公众号信息
  121. logging.info("run> channel_info_daily")
  122. sql = """select '{}' as dt,a.name ,ifnull(stage,''),ifnull(pitcher,''),ifnull(platform,''),ifnull(book,'') from (
  123. select name from advertiser_vx where name is not null group by name-- 公众号全量表
  124. union
  125. select name from advertiser_qq where name is not null group by name
  126. union
  127. select name from account_change group by name
  128. union
  129. select channel as name from pitcher_change group by channel
  130. union
  131. select name from platform_change group by name
  132. union
  133. select name from book_change group by name) a left join (
  134. select name,ifnull(stage,'') stage,ifnull(pitcher,'') pitcher,ifnull(platform,'') platform,ifnull(book,'') book from advertiser_qq where name is not null group by name,stage,pitcher,platform,book
  135. union
  136. select name,ifnull(stage,'') stage,ifnull(pitcher,'') pitcher,ifnull(platform,'') platform,ifnull(book,'') book from advertiser_vx where name is not null and name !=''
  137. ) b on a.name=b.name
  138. """.format(ymd)
  139. data = db.quchen_text.get_data_list(sql)
  140. pitcher_change = db.quchen_text.getData(
  141. "select b.channel as channel,pitcher from "
  142. "(select max(start_time) as start_time,channel from pitcher_change "
  143. " where start_time<='{}' GROUP BY channel) a"
  144. " left join pitcher_change b on a.start_time=b.start_time and a.channel=b.channel".format(ymd))
  145. platform_change = db.quchen_text.getData(
  146. "select b.name as channel,current_platform as platform from (select max(change_date) as change_date,name from platform_change "
  147. "where change_date<='{}' GROUP BY name) a "
  148. "left join platform_change b on a.change_date=b.change_date and a.name=b.name".format(ymd))
  149. book_change = db.quchen_text.getData(
  150. "select b.name as channel,book from (select max(start_time) as start_time,name from book_change "
  151. "where start_time<='{}' GROUP BY name) a "
  152. "left join book_change b on a.start_time=b.start_time and a.name=b.name".format(ymd))
  153. stage_change = db.quchen_text.getData(
  154. "select channel,stage from (select max(start_date) as start_date,channel from stage_change "
  155. "where start_date<='{}' GROUP BY channel) a "
  156. "left join stage_change using(start_date,channel)".format(ymd))
  157. for i in data:
  158. for j in pitcher_change:
  159. if i[1] == j[0]:
  160. i[3] = j[1]
  161. for k in platform_change:
  162. if i[1] == k[0]:
  163. i[4] = k[1]
  164. for h in book_change:
  165. if i[1] == h[0]:
  166. i[5] = h[1]
  167. for m in stage_change:
  168. if i[1] == m[0]:
  169. i[2] = m[1]
  170. insert_sql = "replace into channel_info_daily values (%s,%s,%s,%s,%s,%s) "
  171. db.quchen_text.executeMany(insert_sql, data)
  172. def ods_order(dt):
  173. sql = """ replace into ods_order
  174. select
  175. case platform when '掌中云' then DATE_FORMAT(STR_TO_DATE(order_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d')
  176. when '掌读' then from_unixtime(order_time, '%Y-%m-%d')
  177. ELSE order_time end date,
  178. stage,platform,channel,channel_id,user_id,
  179. case when platform='掌中云' then DATE_FORMAT(STR_TO_DATE(order_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d %H:%i:%s')
  180. when platform='掌读' then from_unixtime(order_time, '%Y-%m-%d %H:%i:%s')
  181. ELSE order_time end order_time,
  182. case when platform='掌中云' then DATE_FORMAT(STR_TO_DATE(reg_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d %H:%i:%s')
  183. when platform='掌读' then from_unixtime(reg_time, '%Y-%m-%d %H:%i:%s')
  184. ELSE reg_time end reg_time,
  185. amount,from_novel,order_id,2 from `order` where date=UNIX_TIMESTAMP('{}')
  186. """.format(dt)
  187. db.quchen_text.execute(sql)
  188. def order_account_text():
  189. db.quchen_text.execute("truncate order_account_text")
  190. with open('./wending_account_config.csv', encoding='utf-8') as f:
  191. for i in f.readlines():
  192. db.quchen_text.execute("insert into order_account_text(platform,text) values ('文鼎','{}')".format(i))
  193. if __name__ == '__main__':
  194. # channel_info_daily('2021-02-06')
  195. # channel_by_account_daily('2021-02-05')
  196. for i in dt.getDateLists('2021-07-23', '2021-09-17'):
  197. print(i)
  198. dw_daily_channel_cost(i)
  199. # ods_order('2021-05-06')