|  | @@ -4,57 +4,66 @@ desc : 素材库数据清洗
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  from model.DataBaseUtils import MysqlUtils
 | 
	
		
			
				|  |  | -db =MysqlUtils()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +db = MysqlUtils()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def title():
 | 
	
		
			
				|  |  |      sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
 | 
	
		
			
				|  |  | -        sum(cost) consume_amount,
 | 
	
		
			
				|  |  | +        cast(sum(cost) as float) consume_amount,
 | 
	
		
			
				|  |  |          sum(click_count) click_times,
 | 
	
		
			
				|  |  |          sum(view_count) view_times,
 | 
	
		
			
				|  |  | +        cast(count(*) as decimal(10,2)) use_times,
 | 
	
		
			
				|  |  |          group_concat(distinct book) novels,
 | 
	
		
			
				|  |  |          max(dt) end_date,min(dt) start_date 
 | 
	
		
			
				|  |  |          from dw_image_cost_day where title!='' and title is not null  GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
 | 
	
		
			
				|  |  |  				"""
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    df =  db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | -    # print(df)
 | 
	
		
			
				|  |  | +    df = db.dm.pd_data_sql(sql)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      df["data_type"] = 'all'
 | 
	
		
			
				|  |  | -    df['type'] = 1
 | 
	
		
			
				|  |  | -    df['create_by'] = 0
 | 
	
		
			
				|  |  | +    df['type'] = '1'
 | 
	
		
			
				|  |  | +    df['create_by'] = '0'
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      key = ["content", "type"]
 | 
	
		
			
				|  |  | -    tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
 | 
	
		
			
				|  |  | +    tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
 | 
	
		
			
				|  |  | +           'consume_amount']
 | 
	
		
			
				|  |  |      table = "t_ads_content"
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      db.zx_ads.dfsave2mysql(df, table, key, tag)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  def description():
 | 
	
		
			
				|  |  |      sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
 | 
	
		
			
				|  |  |              sum(cost) consume_amount,
 | 
	
		
			
				|  |  |              sum(click_count) click_times,
 | 
	
		
			
				|  |  |              sum(view_count) view_times,
 | 
	
		
			
				|  |  | +            cast(count(*) as decimal(10,2)) use_times,
 | 
	
		
			
				|  |  |              group_concat(distinct book) novels,
 | 
	
		
			
				|  |  |              max(dt) end_date,min(dt) start_date
 | 
	
		
			
				|  |  |              from dw_image_cost_day where description!='' and description is not null  GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
 | 
	
		
			
				|  |  |      				"""
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    df = db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | +    df = db.dm.pd_data_sql(sql)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      # print(df)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      df["data_type"] = 'all'
 | 
	
		
			
				|  |  | -    df['type'] = 2
 | 
	
		
			
				|  |  | -    df['create_by'] = 0
 | 
	
		
			
				|  |  | +    df['type'] = '2'
 | 
	
		
			
				|  |  | +    df['create_by'] = '0'
 | 
	
		
			
				|  |  |      key = ["content", "type"]
 | 
	
		
			
				|  |  | -    tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
 | 
	
		
			
				|  |  | +    tag = ['use_times', "view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
 | 
	
		
			
				|  |  | +           'consume_amount']
 | 
	
		
			
				|  |  |      table = "t_ads_content"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      db.zx_ads.dfsave2mysql(df, table, key, tag)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def image():
 | 
	
		
			
				|  |  | -    sql="""select signature,sum(consume_amount) consume_amount,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    sql = """select signature,sum(consume_amount) consume_amount,
 | 
	
		
			
				|  |  |              sum(click_times) click_times,
 | 
	
		
			
				|  |  |              sum(view_times) view_times,
 | 
	
		
			
				|  |  | +            sum(use_times) use_times,
 | 
	
		
			
				|  |  |              group_concat(distinct novels) novels ,
 | 
	
		
			
				|  |  |              max(end_date) end_date,
 | 
	
		
			
				|  |  |              min(start_date) start_date,
 | 
	
	
		
			
				|  | @@ -62,10 +71,16 @@ def image():
 | 
	
		
			
				|  |  |              min(type) type,  
 | 
	
		
			
				|  |  |              if(locate(',',signature)>0,0,1) single_img,
 | 
	
		
			
				|  |  |              min(width ) width ,
 | 
	
		
			
				|  |  | -            min(height ) height 
 | 
	
		
			
				|  |  | +            min(height ) height ,
 | 
	
		
			
				|  |  | +            min(media_size) media_size ,
 | 
	
		
			
				|  |  | +            min(media_format) media_format,
 | 
	
		
			
				|  |  | +            min(video_length) video_length,
 | 
	
		
			
				|  |  | +            min(video_bit_rate) video_bit_rate,
 | 
	
		
			
				|  |  | +            0 max_media_size 
 | 
	
		
			
				|  |  |          from (select replace(signature,' ,','') as signature ,
 | 
	
		
			
				|  |  |              sum(cost) consume_amount,
 | 
	
		
			
				|  |  |              sum(click_count) click_times,
 | 
	
		
			
				|  |  | +            sum(use_times) use_times,
 | 
	
		
			
				|  |  |              sum(view_count) view_times,
 | 
	
		
			
				|  |  |              group_concat(distinct book) novels ,
 | 
	
		
			
				|  |  |              max(dt) end_date,
 | 
	
	
		
			
				|  | @@ -74,7 +89,11 @@ def image():
 | 
	
		
			
				|  |  |              if(is_video=1,2,1) type,  
 | 
	
		
			
				|  |  |              if(locate(',',signature)>0,0,1) single_img,
 | 
	
		
			
				|  |  |              min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
 | 
	
		
			
				|  |  | -			min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height 
 | 
	
		
			
				|  |  | +			min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height,
 | 
	
		
			
				|  |  | +			min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size ,
 | 
	
		
			
				|  |  | +            min(replace(format ,' ,','')) media_format,
 | 
	
		
			
				|  |  | +            min(video_length) video_length,
 | 
	
		
			
				|  |  | +            min(video_bit_rate) video_bit_rate
 | 
	
		
			
				|  |  |              from dw_image_cost_day  
 | 
	
		
			
				|  |  |              where signature is not null and signature !=''  
 | 
	
		
			
				|  |  |              and length (replace (replace (signature,',',''),' ',''))>0
 | 
	
	
		
			
				|  | @@ -82,26 +101,37 @@ def image():
 | 
	
		
			
				|  |  |              group by signature   
 | 
	
		
			
				|  |  |              """
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    df = db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | +    # df = db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | +    df = db.dm.pd_data_sql(sql)
 | 
	
		
			
				|  |  |      # print(df)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    df['create_by'] = 0
 | 
	
		
			
				|  |  | +    # 进行数据转换-----添加max_media_size
 | 
	
		
			
				|  |  | +    for i in range(len(df['media_size'])):
 | 
	
		
			
				|  |  | +        if not df['media_size'][i]:
 | 
	
		
			
				|  |  | +            continue
 | 
	
		
			
				|  |  | +        size_list = df['media_size'][i].split(',')
 | 
	
		
			
				|  |  | +        max_size = 0
 | 
	
		
			
				|  |  | +        for size_data in size_list:
 | 
	
		
			
				|  |  | +            if size_data != 'None':
 | 
	
		
			
				|  |  | +                if float(size_data) > max_size:
 | 
	
		
			
				|  |  | +                    max_size = str(size_data)
 | 
	
		
			
				|  |  | +        df['max_media_size'][i] = max_size
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    df['create_by'] = '0'
 | 
	
		
			
				|  |  |      df["data_type"] = 'all'
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      key = ["signature"]
 | 
	
		
			
				|  |  | -    tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type','width','height']
 | 
	
		
			
				|  |  | +    tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate', 'use_times', "view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img",
 | 
	
		
			
				|  |  | +           "content", 'consume_amount', 'type', 'width', 'height']
 | 
	
		
			
				|  |  |      table = "t_ads_media"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      db.zx_ads.dfsave2mysql(df, table, key, tag)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  def adcreative():
 | 
	
		
			
				|  |  | -    sql="""select  signature,title,article,
 | 
	
		
			
				|  |  | +    sql = """select  signature,title,article,
 | 
	
		
			
				|  |  |  sum(click_times) click_times,
 | 
	
		
			
				|  |  |  sum(view_times) view_times,
 | 
	
		
			
				|  |  | +sum(use_times) use_times,
 | 
	
		
			
				|  |  |  sum(consume_amount) consume_amount,
 | 
	
		
			
				|  |  |  group_concat(distinct novels) novels,
 | 
	
		
			
				|  |  |  min(start_date) start_date,
 | 
	
	
		
			
				|  | @@ -111,17 +141,27 @@ min(channel) channel ,
 | 
	
		
			
				|  |  |  min(type) type,
 | 
	
		
			
				|  |  |  if(locate(',',signature)>0,0,1) single_img,
 | 
	
		
			
				|  |  |  min(width) width ,
 | 
	
		
			
				|  |  | -min(height) height 
 | 
	
		
			
				|  |  | +min(height) height,
 | 
	
		
			
				|  |  | +min(media_size) media_size ,
 | 
	
		
			
				|  |  | +min(media_format) media_format,
 | 
	
		
			
				|  |  | +min(video_length) video_length,
 | 
	
		
			
				|  |  | +min(video_bit_rate) video_bit_rate,
 | 
	
		
			
				|  |  | +0 max_media_size 
 | 
	
		
			
				|  |  |  from 
 | 
	
		
			
				|  |  |   (select replace(signature ,' ,','') as signature,title,description article,
 | 
	
		
			
				|  |  |  sum(click_count) click_times,
 | 
	
		
			
				|  |  |  sum(view_count) view_times,
 | 
	
		
			
				|  |  | +sum(use_times) use_times,
 | 
	
		
			
				|  |  |  sum(cost) consume_amount,
 | 
	
		
			
				|  |  |  group_concat(distinct book) novels,
 | 
	
		
			
				|  |  |  min(dt) start_date,max(dt) end_date,
 | 
	
		
			
				|  |  |  min(replace(preview_url ,' ,','')) media,
 | 
	
		
			
				|  |  |  min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
 | 
	
		
			
				|  |  |  min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height ,
 | 
	
		
			
				|  |  | +min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size ,
 | 
	
		
			
				|  |  | +min(replace(format ,' ,','')) media_format,
 | 
	
		
			
				|  |  | +min(video_length) video_length,
 | 
	
		
			
				|  |  | +min(video_bit_rate) video_bit_rate,
 | 
	
		
			
				|  |  |  type channel,
 | 
	
		
			
				|  |  |  if(is_video=1,2,1) type,
 | 
	
		
			
				|  |  |  if(locate(',',signature)>0,0,1) single_img
 | 
	
	
		
			
				|  | @@ -129,10 +169,24 @@ from dw_image_cost_day where signature is not null and signature!=''
 | 
	
		
			
				|  |  |  GROUP BY  signature,title,description,type,is_video) as foo
 | 
	
		
			
				|  |  |  group by signature ,title,article  """
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    df = db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    key = ["signature",'title','article']
 | 
	
		
			
				|  |  | -    tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media','width','height']
 | 
	
		
			
				|  |  | +    # df = db.dm.getData_pd(sql)
 | 
	
		
			
				|  |  | +    df = db.dm.pd_data_sql(sql)
 | 
	
		
			
				|  |  | +    # 进行数据转换-----添加max_media_size
 | 
	
		
			
				|  |  | +    for i in range(len(df['media_size'])):
 | 
	
		
			
				|  |  | +        if not df['media_size'][i]:
 | 
	
		
			
				|  |  | +            continue
 | 
	
		
			
				|  |  | +        size_list = df['media_size'][i].split(',')
 | 
	
		
			
				|  |  | +        max_size = 0
 | 
	
		
			
				|  |  | +        for size_data in size_list:
 | 
	
		
			
				|  |  | +            if size_data != 'None':
 | 
	
		
			
				|  |  | +                if float(size_data) > max_size:
 | 
	
		
			
				|  |  | +                    max_size = str(size_data)
 | 
	
		
			
				|  |  | +        df['max_media_size'][i] = max_size
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    key = ["signature", 'title', 'article']
 | 
	
		
			
				|  |  | +    tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate', 'max_media_size', 'use_times', "view_times",
 | 
	
		
			
				|  |  | +           "click_times", "novels", "start_date", "end_date", "type", "channel",
 | 
	
		
			
				|  |  | +           'consume_amount', 'single_img', 'media', 'width', 'height']
 | 
	
		
			
				|  |  |      table = "t_ads_idea"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      db.zx_ads.dfsave2mysql(df, table, key, tag)
 | 
	
	
		
			
				|  | @@ -147,3 +201,7 @@ def run():
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
		
			
				|  |  |      run()
 | 
	
		
			
				|  |  | +    # title()
 | 
	
		
			
				|  |  | +    # description()
 | 
	
		
			
				|  |  | +    # image()
 | 
	
		
			
				|  |  | +    # adcreative()
 |