123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- """
- desc : 素材库数据清洗
- 每日运行一次
- """
- from model.DataBaseUtils import MysqlUtils
- db = MysqlUtils()
- def title():
- sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
- cast(sum(cost) as float) consume_amount,
- sum(click_count) click_times,
- sum(view_count) view_times,
- cast(count(*) as decimal(10,2)) use_times,
- group_concat(distinct book) novels,
- max(dt) end_date,min(dt) start_date
- from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
- """
- df = db.dm.pd_data_sql(sql)
- df["data_type"] = 'all'
- df['type'] = '1'
- df['create_by'] = '0'
- key = ["content", "type"]
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
- 'consume_amount']
- table = "t_ads_content"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def description():
- sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
- sum(cost) consume_amount,
- sum(click_count) click_times,
- sum(view_count) view_times,
- cast(count(*) as decimal(10,2)) use_times,
- group_concat(distinct book) novels,
- max(dt) end_date,min(dt) start_date
- from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
- """
- df = db.dm.pd_data_sql(sql)
- df["data_type"] = 'all'
- df['type'] = '2'
- df['create_by'] = '0'
- key = ["content", "type"]
- tag = ['use_times', "view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
- 'consume_amount']
- table = "t_ads_content"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def image():
- # TODO:视频,图片新添加的列没有归并到media,idea列里面,在dw_image_cost_day也没有进行对应归并
- sql = """select signature,sum(consume_amount) consume_amount,
- sum(click_times) click_times,
- sum(view_times) view_times,
- sum(use_times) use_times,
- group_concat(distinct novels) novels ,
- max(end_date) end_date,
- min(start_date) start_date,
- min(content) content,
- min(type) type,
- if(locate(',',signature)>0,0,1) single_img,
- min(width ) width ,
- min(height ) height ,
- min(media_size) media_size ,
- min(media_format) media_format,
- min(video_length) video_length,
- min(video_bit_rate) video_bit_rate,
- 0 max_media_size,
- min(width)/if(min(height),min(height),1) aspect_ratio
- from (select replace(signature,' ,','') as signature ,
- sum(cost) consume_amount,
- sum(click_count) click_times,
- sum(use_times) use_times,
- sum(view_count) view_times,
- group_concat(distinct book) novels ,
- max(dt) end_date,
- min(dt) start_date,
- replace (min(preview_url),' ,','') as content,
- if(is_video=1,2,1) type,
- if(locate(',',signature)>0,0,1) single_img,
- if(min(width)>0,min(width),0) width,
- if(min(height)>0,min(height),0) height,
- min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size,
- min(replace(format ,' ,','')) media_format,
- if(min(video_length)>0,min(video_length),0) video_length,
- if(min(video_bit_rate)>0,min(video_bit_rate),0) video_bit_rate
- from dw_image_cost_day
- where signature is not null and signature !=''
- and length (replace (replace (signature,',',''),' ',''))>0
- GROUP BY signature,is_video) as foo
- group by signature
- """
- # df = db.dm.getData_pd(sql)
- df = db.dm.pd_data_sql(sql)
- # print(df)
- # 进行数据转换-----添加max_media_size
- for i in range(len(df['media_size'])):
- if not df['media_size'][i]:
- continue
- size_list = df['media_size'][i].split(',')
- max_size = 0
- for size_data in size_list:
- if size_data != 'None':
- if float(size_data) > float(max_size):
- max_size = str(size_data)
- df['max_media_size'][i] = max_size
- df['create_by'] = '0'
- df["data_type"] = 'all'
- key = ["signature"]
- tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate',
- 'max_media_size', 'use_times', "view_times",
- "click_times",
- "novels", "start_date", "end_date", "create_by", "single_img",
- "content", 'consume_amount', 'type', 'width', 'height', 'aspect_ratio']
- table = "t_ads_media"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- def adcreative():
- sql = """select signature,title,article,
- sum(click_times) click_times,
- sum(view_times) view_times,
- sum(use_times) use_times,
- sum(consume_amount) consume_amount,
- group_concat(distinct novels) novels,
- min(start_date) start_date,
- max(end_date) end_date,
- min(media) media,
- min(channel) channel ,
- min(type) type,
- if(locate(',',signature)>0,0,1) single_img,
- min(width) width,
- min(height) height,
- min(media_size) media_size ,
- min(media_format) media_format,
- min(video_length) video_length,
- min(video_bit_rate) video_bit_rate,
- 0 max_media_size,
- min(width)/if(min(height),min(height),1) aspect_ratio
- from
- (select replace(signature ,' ,','') as signature,title,description article,
- sum(click_count) click_times,
- sum(view_count) view_times,
- sum(use_times) use_times,
- sum(cost) consume_amount,
- group_concat(distinct book) novels,
- min(dt) start_date,max(dt) end_date,
- min(replace(preview_url ,' ,','')) media,
- if(min(width)>0,min(width),0) width,
- if(min(height)>0,min(height),0) height,
- min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size ,
- min(replace(format ,' ,','')) media_format,
- if(min(video_length)>0,min(video_length),0) video_length,
- if(min(video_bit_rate)>0,min(video_bit_rate),0) video_bit_rate,
- type channel,
- if(is_video=1,2,1) type,
- if(locate(',',signature)>0,0,1) single_img
- from dw_image_cost_day where signature is not null and signature!=''
- GROUP BY signature,title,description,type,is_video) as foo
- group by signature ,title ,article
- """
- # df = db.dm.getData_pd(sql)
- df = db.dm.pd_data_sql(sql)
- # 进行数据转换-----添加max_media_size
- for i in range(len(df['media_size'])):
- if not df['media_size'][i]:
- continue
- size_list = df['media_size'][i].split(',')
- max_size = 0
- for size_data in size_list:
- if size_data != 'None':
- if float(size_data) > float(max_size):
- max_size = str(size_data)
- df['max_media_size'][i] = max_size
- key = ["signature", 'title', 'article']
- tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate', 'max_media_size',
- 'use_times', "view_times",
- "click_times", "novels", "start_date", "end_date", "type", "channel",
- 'consume_amount', 'single_img', 'media', 'width', 'height', 'aspect_ratio']
- table = "t_ads_idea"
- db.zx_ads.dfsave2mysql(df, table, key, tag)
- # TODO:线上,线下视频进行归并
- # 同一个signature选择同一个
- def run():
- title()
- description()
- image()
- adcreative()
- if __name__ == '__main__':
- run()
- # title()
- # description()
- # image()
- # adcreative()
|