MaterialDataClean.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. """
  2. desc : 素材库数据清洗
  3. 每日运行一次
  4. """
  5. from model.DataBaseUtils import MysqlUtils
  6. db =MysqlUtils()
  7. def title():
  8. sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
  9. sum(cost) consume_amount,
  10. sum(click_count) click_times,
  11. sum(view_count) view_times,
  12. group_concat(distinct book) novels,
  13. max(dt) end_date,min(dt) start_date
  14. from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') limit 1000
  15. """
  16. df = db.dm.getData_pd(sql)
  17. print(df)
  18. df["data_type"] = 'all'
  19. df['type'] = 1
  20. df['create_by'] = 0
  21. key = ["content", "type"]
  22. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  23. table = "t_ads_content"
  24. db.zx_test.dfsave2mysql(df, table, key, tag)
  25. def description():
  26. sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
  27. sum(cost) consume_amount,
  28. sum(click_count) click_times,
  29. sum(view_count) view_times,
  30. group_concat(distinct book) novels,
  31. max(dt) end_date,min(dt) start_date
  32. from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') limit 1000
  33. """
  34. df = db.dm.getData_pd(sql)
  35. print(df)
  36. df["data_type"] = 'all'
  37. df['type'] = 2
  38. df['create_by'] = 0
  39. key = ["content", "type"]
  40. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
  41. table = "t_ads_content"
  42. db.zx_test.dfsave2mysql(df, table, key, tag)
  43. def image():
  44. sql="""select signature,
  45. sum(cost) consume_amount,
  46. sum(click_count) click_times,
  47. sum(view_count) view_times,
  48. group_concat(distinct book) novels ,
  49. max(dt) end_date,
  50. min(dt) start_date,
  51. min(preview_url) content,
  52. if(flag=1,2,1) type,
  53. if(locate(',',signature)>0,0,1) single_img
  54. from dw_image_cost_day where signature is not null and signature !='' GROUP BY signature,flag limit 1000"""
  55. df = db.dm.getData_pd(sql)
  56. # print(df)
  57. df['create_by'] = 0
  58. df["data_type"] = 'all'
  59. key = ["signature"]
  60. tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type']
  61. table = "t_ads_media"
  62. db.zx_test.dfsave2mysql(df, table, key, tag)
  63. def adcreative():
  64. sql="""select signature,title,description article,
  65. sum(click_count) click_times,
  66. sum(view_count) view_times,
  67. sum(cost) consume_amount,
  68. group_concat(distinct book) novels,
  69. max(dt) start_date,min(dt) end_date,
  70. min(preview_url) media,
  71. type channel,
  72. if(flag=1,2,1) type,
  73. if(locate(',',signature)>0,0,1) single_img
  74. from dw_image_cost_day where signature is not null and signature!='' GROUP BY signature,title,description,type,flag
  75. """
  76. df = db.dm.getData_pd(sql)
  77. key = ["signature",'title','article']
  78. tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media']
  79. table = "t_ads_idea"
  80. db.zx_test.dfsave2mysql(df, table, key, tag)
  81. if __name__ == '__main__':
  82. # title()
  83. # description()
  84. # image()
  85. adcreative()