123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- """
- desc : 素材库数据清洗
- 每日运行一次
- """
- from model.DataBaseUtils import MysqlUtils
- db =MysqlUtils()
- def title():
- sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
- sum(cost) consume_amount,
- sum(click_count) click_times,
- sum(view_count) view_times,
- group_concat(distinct book) novels,
- max(dt) end_date,min(dt) start_date
- from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
- """
- df = db.dm.getData_pd(sql)
- # print(df)
- df["data_type"] = 'all'
- df['type'] = 1
- df['create_by'] = 0
- key = ["content", "type"]
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
- table = "t_ads_content"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def description():
- sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
- sum(cost) consume_amount,
- sum(click_count) click_times,
- sum(view_count) view_times,
- group_concat(distinct book) novels,
- max(dt) end_date,min(dt) start_date
- from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
- """
- df = db.dm.getData_pd(sql)
- # print(df)
- df["data_type"] = 'all'
- df['type'] = 2
- df['create_by'] = 0
- key = ["content", "type"]
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
- table = "t_ads_content"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def image():
- sql="""select signature,sum(consume_amount) consume_amount,
- sum(click_times) click_times,
- sum(view_times) view_times,
- group_concat(distinct novels) novels ,
- max(end_date) end_date,
- min(start_date) start_date,
- min(content) content,
- min(type) type,
- if(locate(',',signature)>0,0,1) single_img,
- min(width ) width ,
- min(height ) height
- from (select replace(signature,' ,','') as signature ,
- sum(cost) consume_amount,
- sum(click_count) click_times,
- sum(view_count) view_times,
- group_concat(distinct book) novels ,
- max(dt) end_date,
- min(dt) start_date,
- replace (min(preview_url),' ,','') as content,
- if(is_video=1,2,1) type,
- if(locate(',',signature)>0,0,1) single_img,
- min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
- min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height
- from dw_image_cost_day
- where signature is not null and signature !=''
- and length (replace (replace (signature,',',''),' ',''))>0
- GROUP BY signature,is_video) as foo
- group by signature
- """
- df = db.dm.getData_pd(sql)
- # print(df)
- df['create_by'] = 0
- df["data_type"] = 'all'
- key = ["signature"]
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type','width','height']
- table = "t_ads_media"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def adcreative():
- sql="""select signature,title,article,
- sum(click_times) click_times,
- sum(view_times) view_times,
- sum(consume_amount) consume_amount,
- group_concat(distinct novels) novels,
- min(start_date) start_date,
- max(end_date) end_date,
- min(media) media,
- min(channel) channel ,
- min(type) type,
- if(locate(',',signature)>0,0,1) single_img,
- min(width) width ,
- min(height) height
- from
- (select replace(signature ,' ,','') as signature,title,description article,
- sum(click_count) click_times,
- sum(view_count) view_times,
- sum(cost) consume_amount,
- group_concat(distinct book) novels,
- min(dt) start_date,max(dt) end_date,
- min(replace(preview_url ,' ,','')) media,
- min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
- min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height ,
- type channel,
- if(is_video=1,2,1) type,
- if(locate(',',signature)>0,0,1) single_img
- from dw_image_cost_day where signature is not null and signature!=''
- GROUP BY signature,title,description,type,is_video) as foo
- group by signature ,title,article """
- df = db.dm.getData_pd(sql)
- key = ["signature",'title','article']
- tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media','width','height']
- table = "t_ads_idea"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def run():
- title()
- description()
- image()
- adcreative()
- if __name__ == '__main__':
- run()
|