data_stat_task.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. from model.DataBaseUtils import MysqlUtils,CkUtils
  2. from model.DateUtils import DateUtils
  3. from model.log import logger
  4. log=logger()
  5. db = MysqlUtils()
  6. ck = CkUtils()
  7. dt = DateUtils()
  8. from datetime import datetime
  9. from sync_to_ck_task import dw_order_channel_cost_sync_ck
  10. def dw_daily_channel_cost(ymd):
  11. sql="""replace into dw_daily_channel_cost
  12. select dt,x.channel,pitcher,stage,platform,book,
  13. ifnull(view_count,0),ifnull(click_count,0),ifnull(follow_user,0),ifnull(cost,0)/100 as cost,
  14. ifnull(web_view_count,0) web_view_count,
  15. ifnull(platform_view_count,0) platform_view_count,
  16. ifnull(web_order_count,0) web_order_count,
  17. ifnull(type,'')
  18. from
  19. (select dt,channel,stage,pitcher,platform,book from channel_info_daily where dt='{0}' and channel!='') x
  20. left join
  21. (select channel,sum(cost) as cost,sum(view_count) as view_count,sum(valid_click_count) as click_count,sum(from_follow_uv) as follow_user,
  22. sum(web_view_count) as web_view_count,sum(platform_view_count) as platform_view_count,sum(web_order_count) as web_order_count
  23. from
  24. (select account_id,cost,view_count,valid_click_count,round(valid_click_count*official_account_follow_rate,0) as from_follow_uv,
  25. 0 as web_view_count,
  26. 0 as platform_view_count,
  27. 0 as web_order_count
  28. from daily_vx where date='{0} 00:00:00'
  29. union
  30. select account_id,cost,view_count,valid_click_count,from_follow_uv,
  31. ifnull(web_commodity_page_view_count,0) as web_view_count,
  32. ifnull(platform_page_view_count,0) as platform_view_count,
  33. ifnull(web_order_count,0) as web_order_count
  34. from daily_qq where date='{0} 00:00:00') a
  35. left join
  36. (select account_id,channel from channel_by_account_daily where dt='{0}') b on a.account_id=b.account_id group by channel)
  37. z on x.channel=z.channel
  38. left join (SELECT channel,type from channel_by_account_daily GROUP By channel,type) k on x.channel=k.channel
  39. """.format(ymd)
  40. print(sql)
  41. db.quchen_text.execute(sql)
  42. def channel_by_account_daily(ymd):
  43. """返回当天消耗账户对应的公众号表"""
  44. sql="""replace into channel_by_account_daily
  45. select '{0}' as dt,a.account_id as account_id, ifnull(ifnull(b.name,a.name),'') as channel,type from
  46. (select account_id,name,'qq' as type from advertiser_qq
  47. union
  48. select account_id,name,'vx' as type from advertiser_vx
  49. ) a
  50. left join
  51. (select b.account_id,b.name from
  52. (select min(end_time) as end_time,account_id from account_change where end_time>'{0}' GROUP BY account_id) a
  53. left join account_change b on a.end_time=b.end_time and a.account_id=b.account_id) b on a.account_id=b.account_id""".format(ymd)
  54. print(sql)
  55. db.quchen_text.execute(sql)
  56. def channel_info_daily(ymd):
  57. """获取公众号某天的期数,投手,平台,书籍
  58. @ return [[]]
  59. """
  60. # 获取现在的全量公众号信息
  61. sql="""select '{}' as dt,a.name ,ifnull(stage,''),ifnull(pitcher,''),ifnull(platform,''),ifnull(book,'') from (
  62. select name from advertiser_vx where name is not null group by name-- 公众号全量表
  63. union
  64. select name from advertiser_qq where name is not null group by name
  65. union
  66. select name from account_change group by name
  67. union
  68. select channel as name from pitcher_change group by channel
  69. union
  70. select name from platform_change group by name
  71. union
  72. select name from book_change group by name) a left join (
  73. select name,ifnull(stage,'') stage,ifnull(pitcher,'') pitcher,ifnull(platform,'') platform,ifnull(book,'') book from advertiser_qq where name is not null group by name,stage,pitcher,platform,book
  74. union
  75. select name,ifnull(stage,'') stage,ifnull(pitcher,'') pitcher,ifnull(platform,'') platform,ifnull(book,'') book from advertiser_vx where name is not null and name !=''
  76. ) b on a.name=b.name
  77. """.format(ymd)
  78. data=db.quchen_text.get_data_list(sql)
  79. pitcher_change=db.quchen_text.getData(
  80. "select b.channel as channel,pitcher from "
  81. "(select max(start_time) as start_time,channel from pitcher_change "
  82. " where start_time<='{}' GROUP BY channel) a"
  83. " left join pitcher_change b on a.start_time=b.start_time and a.channel=b.channel".format(ymd))
  84. platform_change=db.quchen_text.getData("select b.name as channel,current_platform as platform from (select max(change_date) as change_date,name from platform_change "
  85. "where change_date<='{}' GROUP BY name) a "
  86. "left join platform_change b on a.change_date=b.change_date and a.name=b.name".format(ymd))
  87. book_change=db.quchen_text.getData("select b.name as channel,book from (select max(start_time) as start_time,name from book_change "
  88. "where start_time<='{}' GROUP BY name) a "
  89. "left join book_change b on a.start_time=b.start_time and a.name=b.name".format(ymd))
  90. for i in data:
  91. for j in pitcher_change:
  92. if i[1]==j[0]:
  93. i[3]=j[1]
  94. for k in platform_change:
  95. if i[1]==k[0]:
  96. i[4]=k[1]
  97. for h in book_change:
  98. if i[1]==h[0]:
  99. i[5]=h[1]
  100. print(data)
  101. insert_sql="replace into channel_info_daily values (%s,%s,%s,%s,%s,%s) "
  102. db.quchen_text.executeMany(insert_sql,data)
  103. def ods_order(dt):
  104. sql=""" replace into ods_order
  105. select
  106. case platform when '掌中云' then DATE_FORMAT(STR_TO_DATE(order_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d')
  107. when '掌读' then from_unixtime(order_time, '%Y-%m-%d')
  108. ELSE order_time end date,
  109. stage,platform,channel,channel_id,user_id,
  110. case when platform='掌中云' then DATE_FORMAT(STR_TO_DATE(order_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d %H:%i:%s')
  111. when platform='掌读' then from_unixtime(order_time, '%Y-%m-%d %H:%i:%s')
  112. ELSE order_time end order_time,
  113. case when platform='掌中云' then DATE_FORMAT(STR_TO_DATE(reg_time,'%Y-%m-%dT%H:%i:%s'),'%Y-%m-%d %H:%i:%s')
  114. when platform='掌读' then from_unixtime(reg_time, '%Y-%m-%d %H:%i:%s')
  115. ELSE reg_time end reg_time,
  116. amount,from_novel,order_id from `order` where date=UNIX_TIMESTAMP('{}')
  117. """.format(dt)
  118. db.quchen_text.execute(sql)
  119. def order_account_text():
  120. db.quchen_text.execute("truncate order_account_text")
  121. with open('./wending_account_config.csv',encoding='utf-8') as f:
  122. for i in f.readlines():
  123. db.quchen_text.execute("insert into order_account_text(platform,text) values ('文鼎','{}')".format(i))
  124. def dw_daily_channel():
  125. """快照表 每日一更新 t-1"""
  126. sql="""insert into dw_daily_channel
  127. select
  128. dt,channel,pitcher,stage,platform,book,
  129. order_count,order_user,order_amount,first_order_count,first_order_user,first_order_amount,
  130. view_count,click_count,follow_user,cost,reg_order_count,reg_order_user,reg_order_amount,reg_order_amount30,
  131. web_view_count,platform_view_count,web_order_count,total_cost,total_amount,reg_order_user_again,reg_order_user7,reg_order_user30,reg_order_amount7,type,
  132. total_first_amount
  133. from
  134. (select dt,channel, pitcher,stage,platform,book,cost,view_count,click_count,follow_user,web_view_count,platform_view_count,web_order_count,type from dw_daily_channel_cost) a
  135. left outer join
  136. (select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt2,channel as channel2,
  137. sum(amount) as reg_order_amount,
  138. count(distinct user_id) as reg_order_user,
  139. count(1) as reg_order_count,
  140. sum(if(subtractDays(date, 30)>reg_time,0,amount)) as reg_order_amount30,
  141. count(distinct if(subtractDays(date, 7)>reg_time,'',user_id))-1 reg_order_user7,
  142. sum(if(subtractDays(date, 7)>reg_time,0,amount)) as reg_order_amount7,
  143. count(distinct if(subtractDays(date, 30)>reg_time,'',user_id))-1 reg_order_user30
  144. from order where reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel) b
  145. on dt=dt2 and channel=channel2 left outer join
  146. (select date as dt3,channel as channel3,
  147. count(1) as first_order_count,
  148. count(distinct user_id) as first_order_user,
  149. sum(amount) as first_order_amount
  150. from order where toDate(reg_time)=date group by date,channel) c
  151. on dt=dt3 and channel=channel3
  152. left outer join
  153. (select date as dt4,channel as channel4,
  154. count(1) as order_count,
  155. count(distinct user_id) as order_user,
  156. sum(amount) as order_amount
  157. from order group by date,channel) d
  158. on dt=dt4 and channel=channel4
  159. left outer join
  160. (select dt dt5,channel channel5,total_cost,total_amount,total_first_amount from dw_channel_daily_total ) e
  161. on dt=dt5 and channel=channel5
  162. left outer join (
  163. select sum(if(user_order_count>1,1,0)) reg_order_user_again,channel channel6,toDate(reg_date) dt6 from (
  164. select formatDateTime(reg_time,'%Y-%m-%d') reg_date,channel,count(1) user_order_count
  165. from order group by formatDateTime(reg_time,'%Y-%m-%d') ,user_id,channel
  166. ) x group by reg_date,channel
  167. ) f on dt=dt6 and channel=channel6
  168. """
  169. ck.execute("truncate table dw_daily_channel")
  170. ck.execute(sql)
  171. print("ok")
  172. def dm_pitcher_daily_page_total():
  173. sql=f"""
  174. insert into dm_pitcher_daily_page_total
  175. select '{dt.get_n_days(0)}' dt,
  176. pitcher,
  177. round(cost,2),
  178. round(amount,2),
  179. round(roi,4),
  180. channel_count,
  181. on_channel_count,
  182. channel_count-on_channel_count off_channel_count,
  183. round(this_month_cost,2),
  184. round(this_month_amount,2),
  185. if(this_month_cost = 0, 0, round(this_month_amount / this_month_cost, 4)) this_month_roi,
  186. last_month_cost,
  187. last_month_amount,
  188. if(last_month_cost = 0, 0, last_month_amount / last_month_cost) last_month_roi,
  189. round(last_month_far_amount,2),
  190. follow_user,
  191. if(last_month_cost = 0, 0, last_month_far_amount / last_month_cost) last_month_far_roi
  192. from (select pitcher,
  193. sum(cost) cost,
  194. sum(order_amount) amount,
  195. sum(follow_user) follow_user,
  196. if(cost = 0, 0, round(amount / cost, 2)) roi
  197. from dw_daily_channel
  198. group by pitcher) q
  199. left outer join
  200. (select count(distinct channel) channel_count, pitcher
  201. from dw_daily_channel_cost
  202. where dt = '{dt.get_n_days(0)}'
  203. group by pitcher) w
  204. on q.pitcher = w.pitcher
  205. left outer join(
  206. select count(distinct channel) on_channel_count, pitcher
  207. from dw_daily_channel_cost
  208. where dt >= '{dt.get_n_days(-15)}'
  209. and cost > 0
  210. group by pitcher) e
  211. on q.pitcher = e.pitcher
  212. left outer join (
  213. select pitcher,
  214. sum(cost) this_month_cost
  215. from dw_daily_channel
  216. where dt >= '{dt.get_n_pre_month_first_day(0)}'
  217. group by pitcher) r
  218. on q.pitcher = r.pitcher
  219. left outer join(
  220. select pitcher,
  221. sum(cost) last_month_cost
  222. from dw_daily_channel
  223. where dt >= '{dt.get_n_pre_month_first_day(1)}'
  224. and dt < '{dt.get_n_pre_month_first_day(0)}'
  225. group by pitcher) t on q.pitcher = t.pitcher
  226. left outer join (
  227. select b.pitcher, sum(amount) last_month_far_amount
  228. from order a
  229. left outer join dw_daily_channel_cost b on a.channel = b.channel and a.date = b.dt
  230. where reg_time >= '{dt.get_n_pre_month_first_day(1)} 00:00:00' and reg_time<'{dt.get_n_pre_month_first_day(0)} 00:00:00'
  231. group by pitcher
  232. ) y on q.pitcher = y.pitcher
  233. left outer join (
  234. select b.pitcher, sum(amount) last_month_amount
  235. from order a
  236. left outer join dw_daily_channel_cost b on a.channel = b.channel and a.date = b.dt
  237. where reg_time >= '{dt.get_n_pre_month_first_day(1)} 00:00:00' and reg_time<'{dt.get_n_pre_month_first_day(0)} 00:00:00'
  238. and dt<'{dt.get_n_pre_month_first_day(0)}'
  239. group by pitcher
  240. ) p on q.pitcher=p.pitcher
  241. left outer join (
  242. select b.pitcher, sum(amount) this_month_amount
  243. from order a
  244. left outer join dw_daily_channel_cost b on a.channel = b.channel and a.date = b.dt
  245. where reg_time >= '{dt.get_n_pre_month_first_day(0)} 00:00:00'
  246. group by pitcher
  247. ) g on q.pitcher=g.pitcher
  248. having pitcher != ''"""
  249. print(sql)
  250. ck.execute(f"alter table dm_pitcher_daily_page_total drop partition '{dt.get_n_days(0)}'")
  251. ck.execute(sql)
  252. def dw_channel_daily_total(ymd):
  253. sql=f"""insert into dw_channel_daily_total
  254. select '{ymd}' dt,channel,total_cost,total_amount,total_first_amount from
  255. (select channel,sum(cost) total_cost from dw_daily_channel_cost where dt<='{ymd}' group by channel)a
  256. left outer join
  257. (select channel,sum(amount) total_amount,
  258. sum(if(toDate(formatDateTime(reg_time,'%Y-%m-%d'))=date,amount,0)) total_first_amount
  259. from order where date<='{ymd}' group by channel) b
  260. on a.channel=b.channel
  261. """
  262. ck.execute(f"alter table dw_channel_daily_total drop partition '{ymd}'")
  263. print(sql)
  264. ck.execute(sql)
  265. def dw_daily_pitcher(ymd):
  266. sql=f"""insert into dw_daily_pitcher
  267. select '{ymd}' dt, a.pitcher,
  268. start_cost_date,end_cost_date,
  269. sum(if(dt>=start_cost_date,reg_order_amount,0)) total_amount,
  270. sum(if(dt>=start_cost_date,cost,0)) total_cost
  271. from (select reg_order_amount,cost,dt,pitcher from dw_daily_channel where dt<='{ymd}')
  272. a left outer join
  273. (select min(dt) start_cost_date,max(dt) end_cost_date,pitcher
  274. from dw_daily_channel_cost where cost>0 group by pitcher) b on a.pitcher=b.pitcher
  275. group by pitcher,start_cost_date,end_cost_date
  276. """
  277. # print(sql)
  278. ck.execute(f"alter table dw_daily_pitcher drop partition '{ymd}'")
  279. ck.execute(sql)
  280. if __name__ == '__main__':
  281. # dw_channel_daily_total('2020-07-20')
  282. # channel_by_account_daily('2020-12-17')
  283. # dw_daily_channel_cost('2020-12-17')
  284. # dw_order_channel_cost_sync_ck('2020-12-17')
  285. # exit(0)
  286. # ods_order('2020-12-20')
  287. # dw_daily_channel_plus()
  288. # exit()
  289. dw_daily_channel()
  290. exit(0)
  291. # dm_pitcher_daily_page_total()
  292. # dm_pitcher_daily_page_total()
  293. # dw_channel_daily_total('2021-01-11')
  294. # dw_daily_channel_cost('2021-01-12')
  295. # dw_channel_daily_total('2021-01-13')
  296. # exit(0)
  297. # dw_daily_channel()
  298. # # exit(0)
  299. for i in dt.getDateLists('2019-03-18','2021-01-13'):
  300. print(i)
  301. dw_channel_daily_total(i)
  302. # dw_daily_pitcher(i)
  303. # channel_by_account_daily(i)
  304. # dw_daily_channel_cost(i)
  305. # dw_order_channel_cost_sync_ck(i)
  306. # dw_daily_channel()
  307. # order_sync_ck(today)
  308. # # ods_order(i)
  309. # channel_info_daily(i)
  310. # dw_daily_channel_cost(i)
  311. # dw_order_channel_cost_sync_ck(i)
  312. # dw_channel_daily_total(i)