MaterialDataClean.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. desc : 素材库数据清洗
  3. 每日运行一次
  4. """
  5. from model.DataBaseUtils import MysqlUtils
  6. db =MysqlUtils()
  7. def title():
  8. sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
  9. sum(cost) consume_amount,
  10. sum(click_count) click_times,
  11. sum(view_count) view_times,
  12. group_concat(distinct book) novels,
  13. max(dt) end_date,min(dt) start_date
  14. from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
  15. """
  16. df = db.dm.getData_pd(sql)
  17. # print(df)
  18. df["data_type"] = 'all'
  19. df['type'] = 1
  20. df['create_by'] = 0
  21. key = ["content", "type"]
  22. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  23. table = "t_ads_content"
  24. db.zx_ads.dfsave2mysql(df, table, key, tag)
  25. def description():
  26. sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
  27. sum(cost) consume_amount,
  28. sum(click_count) click_times,
  29. sum(view_count) view_times,
  30. group_concat(distinct book) novels,
  31. max(dt) end_date,min(dt) start_date
  32. from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
  33. """
  34. df = db.dm.getData_pd(sql)
  35. # print(df)
  36. df["data_type"] = 'all'
  37. df['type'] = 2
  38. df['create_by'] = 0
  39. key = ["content", "type"]
  40. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  41. table = "t_ads_content"
  42. db.zx_ads.dfsave2mysql(df, table, key, tag)
  43. def image():
  44. sql="""select signature,sum(consume_amount) consume_amount,
  45. sum(click_times) click_times,
  46. sum(view_times) view_times,
  47. group_concat(distinct novels) novels ,
  48. max(end_date) end_date,
  49. min(start_date) start_date,
  50. min(content) content,
  51. min(type) type,
  52. if(locate(',',signature)>0,0,1) single_img,
  53. min(width ) width ,
  54. min(height ) height
  55. from (select replace(signature,' ,','') as signature ,
  56. sum(cost) consume_amount,
  57. sum(click_count) click_times,
  58. sum(view_count) view_times,
  59. group_concat(distinct book) novels ,
  60. max(dt) end_date,
  61. min(dt) start_date,
  62. replace (min(preview_url),' ,','') as content,
  63. if(is_video=1,2,1) type,
  64. if(locate(',',signature)>0,0,1) single_img,
  65. min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
  66. min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height
  67. from dw_image_cost_day
  68. where signature is not null and signature !=''
  69. and length (replace (replace (signature,',',''),' ',''))>0
  70. GROUP BY signature,is_video) as foo
  71. group by signature
  72. """
  73. df = db.dm.getData_pd(sql)
  74. # print(df)
  75. df['create_by'] = 0
  76. df["data_type"] = 'all'
  77. key = ["signature"]
  78. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type','width','height']
  79. table = "t_ads_media"
  80. db.zx_ads.dfsave2mysql(df, table, key, tag)
  81. def adcreative():
  82. sql="""select signature,title,article,
  83. sum(click_times) click_times,
  84. sum(view_times) view_times,
  85. sum(consume_amount) consume_amount,
  86. group_concat(distinct novels) novels,
  87. min(start_date) start_date,
  88. max(end_date) end_date,
  89. min(media) media,
  90. min(channel) channel ,
  91. min(type) type,
  92. if(locate(',',signature)>0,0,1) single_img,
  93. min(width) width ,
  94. min(height) height
  95. from
  96. (select replace(signature ,' ,','') as signature,title,description article,
  97. sum(click_count) click_times,
  98. sum(view_count) view_times,
  99. sum(cost) consume_amount,
  100. group_concat(distinct book) novels,
  101. min(dt) start_date,max(dt) end_date,
  102. min(replace(preview_url ,' ,','')) media,
  103. min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
  104. min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height ,
  105. type channel,
  106. if(is_video=1,2,1) type,
  107. if(locate(',',signature)>0,0,1) single_img
  108. from dw_image_cost_day where signature is not null and signature!=''
  109. GROUP BY signature,title,description,type,is_video) as foo
  110. group by signature ,title,article """
  111. df = db.dm.getData_pd(sql)
  112. key = ["signature",'title','article']
  113. tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media','width','height']
  114. table = "t_ads_idea"
  115. db.zx_ads.dfsave2mysql(df, table, key, tag)
  116. def run():
  117. title()
  118. description()
  119. image()
  120. adcreative()
  121. if __name__ == '__main__':
  122. run()