MaterialDataClean.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. desc : 素材库数据清洗
  3. 每日运行一次
  4. """
  5. from model.DataBaseUtils import MysqlUtils
  6. db =MysqlUtils()
  7. def title():
  8. sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
  9. sum(cost) consume_amount,
  10. sum(click_count) click_times,
  11. sum(view_count) view_times,
  12. group_concat(distinct book) novels,
  13. max(dt) end_date,min(dt) start_date
  14. from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
  15. """
  16. df = db.dm.getData_pd(sql)
  17. # print(df)
  18. df["data_type"] = 'all'
  19. df['type'] = 1
  20. df['create_by'] = 0
  21. key = ["content", "type"]
  22. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  23. table = "t_ads_content"
  24. db.zx_ads.dfsave2mysql(df, table, key, tag)
  25. def description():
  26. sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
  27. sum(cost) consume_amount,
  28. sum(click_count) click_times,
  29. sum(view_count) view_times,
  30. group_concat(distinct book) novels,
  31. max(dt) end_date,min(dt) start_date
  32. from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
  33. """
  34. df = db.dm.getData_pd(sql)
  35. # print(df)
  36. df["data_type"] = 'all'
  37. df['type'] = 2
  38. df['create_by'] = 0
  39. key = ["content", "type"]
  40. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  41. table = "t_ads_content"
  42. db.zx_ads.dfsave2mysql(df, table, key, tag)
  43. def image():
  44. sql="""select signature,sum(consume_amount) consume_amount,
  45. sum(click_times) click_times,
  46. sum(view_times) view_times,
  47. group_concat(distinct novels) novels ,
  48. max(end_date) end_date,
  49. min(start_date) start_date,
  50. min(content) content,
  51. min(type) type,
  52. if(locate(',',signature)>0,0,1) single_img
  53. from (select replace(signature,' ,','') as signature ,
  54. sum(cost) consume_amount,
  55. sum(click_count) click_times,
  56. sum(view_count) view_times,
  57. group_concat(distinct book) novels ,
  58. max(dt) end_date,
  59. min(dt) start_date,
  60. replace (min(preview_url),' ,','') as content,
  61. if(is_video=1,2,1) type,
  62. if(locate(',',signature)>0,0,1) single_img
  63. from dw_image_cost_day
  64. where signature is not null and signature !=''
  65. and length (replace (replace (signature,',',''),' ',''))>0
  66. GROUP BY signature,is_video) as foo
  67. group by signature
  68. """
  69. df = db.dm.getData_pd(sql)
  70. # print(df)
  71. df['create_by'] = 0
  72. df["data_type"] = 'all'
  73. key = ["signature"]
  74. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type']
  75. table = "t_ads_media"
  76. db.zx_ads.dfsave2mysql(df, table, key, tag)
  77. def adcreative():
  78. sql="""select signature,title,description article,
  79. sum(click_count) click_times,
  80. sum(view_count) view_times,
  81. sum(cost) consume_amount,
  82. group_concat(distinct book) novels,
  83. max(dt) start_date,min(dt) end_date,
  84. min(preview_url) media,
  85. type channel,
  86. if(is_video=1,2,1) type,
  87. if(locate(',',signature)>0,0,1) single_img
  88. from dw_image_cost_day where signature is not null and signature!='' GROUP BY signature,title,description,type,is_video
  89. """
  90. df = db.dm.getData_pd(sql)
  91. key = ["signature",'title','article']
  92. tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media']
  93. table = "t_ads_idea"
  94. db.zx_ads.dfsave2mysql(df, table, key, tag)
  95. def run():
  96. title()
  97. description()
  98. image()
  99. adcreative()
  100. if __name__ == '__main__':
  101. run()