|
@@ -4,57 +4,66 @@ desc : 素材库数据清洗
|
|
|
|
|
|
"""
|
|
"""
|
|
from model.DataBaseUtils import MysqlUtils
|
|
from model.DataBaseUtils import MysqlUtils
|
|
-db =MysqlUtils()
|
|
|
|
|
|
+
|
|
|
|
+db = MysqlUtils()
|
|
|
|
+
|
|
|
|
|
|
def title():
|
|
def title():
|
|
sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
|
|
sql = """select REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '') content,
|
|
- sum(cost) consume_amount,
|
|
|
|
|
|
+ cast(sum(cost) as float) consume_amount,
|
|
sum(click_count) click_times,
|
|
sum(click_count) click_times,
|
|
sum(view_count) view_times,
|
|
sum(view_count) view_times,
|
|
|
|
+ cast(count(*) as decimal(10,2)) use_times,
|
|
group_concat(distinct book) novels,
|
|
group_concat(distinct book) novels,
|
|
max(dt) end_date,min(dt) start_date
|
|
max(dt) end_date,min(dt) start_date
|
|
from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
|
|
from dw_image_cost_day where title!='' and title is not null GROUP BY REPLACE(REPLACE(title, CHAR(10), ''), CHAR(13), '')
|
|
"""
|
|
"""
|
|
|
|
|
|
- df = db.dm.getData_pd(sql)
|
|
|
|
- # print(df)
|
|
|
|
|
|
+ df = db.dm.pd_data_sql(sql)
|
|
|
|
+
|
|
df["data_type"] = 'all'
|
|
df["data_type"] = 'all'
|
|
- df['type'] = 1
|
|
|
|
- df['create_by'] = 0
|
|
|
|
|
|
+ df['type'] = '1'
|
|
|
|
+ df['create_by'] = '0'
|
|
|
|
|
|
key = ["content", "type"]
|
|
key = ["content", "type"]
|
|
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
|
|
|
|
|
|
+ tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
|
|
|
|
+ 'consume_amount']
|
|
table = "t_ads_content"
|
|
table = "t_ads_content"
|
|
-
|
|
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
|
|
|
|
|
|
+
|
|
def description():
|
|
def description():
|
|
sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
|
|
sql = """select REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '') content,
|
|
sum(cost) consume_amount,
|
|
sum(cost) consume_amount,
|
|
sum(click_count) click_times,
|
|
sum(click_count) click_times,
|
|
sum(view_count) view_times,
|
|
sum(view_count) view_times,
|
|
|
|
+ cast(count(*) as decimal(10,2)) use_times,
|
|
group_concat(distinct book) novels,
|
|
group_concat(distinct book) novels,
|
|
max(dt) end_date,min(dt) start_date
|
|
max(dt) end_date,min(dt) start_date
|
|
from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
|
|
from dw_image_cost_day where description!='' and description is not null GROUP BY REPLACE(REPLACE(description, CHAR(10), ''), CHAR(13), '')
|
|
"""
|
|
"""
|
|
|
|
|
|
- df = db.dm.getData_pd(sql)
|
|
|
|
|
|
+ df = db.dm.pd_data_sql(sql)
|
|
|
|
+
|
|
# print(df)
|
|
# print(df)
|
|
|
|
|
|
df["data_type"] = 'all'
|
|
df["data_type"] = 'all'
|
|
- df['type'] = 2
|
|
|
|
- df['create_by'] = 0
|
|
|
|
|
|
+ df['type'] = '2'
|
|
|
|
+ df['create_by'] = '0'
|
|
key = ["content", "type"]
|
|
key = ["content", "type"]
|
|
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by",'data_type', 'consume_amount']
|
|
|
|
|
|
+ tag = ['use_times', "view_times", "click_times", "novels", "start_date", "end_date", "create_by", 'data_type',
|
|
|
|
+ 'consume_amount']
|
|
table = "t_ads_content"
|
|
table = "t_ads_content"
|
|
|
|
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
|
|
|
|
|
|
|
|
def image():
|
|
def image():
|
|
- sql="""select signature,sum(consume_amount) consume_amount,
|
|
|
|
|
|
+
|
|
|
|
+ sql = """select signature,sum(consume_amount) consume_amount,
|
|
sum(click_times) click_times,
|
|
sum(click_times) click_times,
|
|
sum(view_times) view_times,
|
|
sum(view_times) view_times,
|
|
|
|
+ sum(use_times) use_times,
|
|
group_concat(distinct novels) novels ,
|
|
group_concat(distinct novels) novels ,
|
|
max(end_date) end_date,
|
|
max(end_date) end_date,
|
|
min(start_date) start_date,
|
|
min(start_date) start_date,
|
|
@@ -62,10 +71,16 @@ def image():
|
|
min(type) type,
|
|
min(type) type,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
min(width ) width ,
|
|
min(width ) width ,
|
|
- min(height ) height
|
|
|
|
|
|
+ min(height ) height ,
|
|
|
|
+ min(media_size) media_size ,
|
|
|
|
+ min(media_format) media_format,
|
|
|
|
+ min(video_length) video_length,
|
|
|
|
+ min(video_bit_rate) video_bit_rate,
|
|
|
|
+ 0 max_media_size
|
|
from (select replace(signature,' ,','') as signature ,
|
|
from (select replace(signature,' ,','') as signature ,
|
|
sum(cost) consume_amount,
|
|
sum(cost) consume_amount,
|
|
sum(click_count) click_times,
|
|
sum(click_count) click_times,
|
|
|
|
+ sum(use_times) use_times,
|
|
sum(view_count) view_times,
|
|
sum(view_count) view_times,
|
|
group_concat(distinct book) novels ,
|
|
group_concat(distinct book) novels ,
|
|
max(dt) end_date,
|
|
max(dt) end_date,
|
|
@@ -74,7 +89,11 @@ def image():
|
|
if(is_video=1,2,1) type,
|
|
if(is_video=1,2,1) type,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
|
|
min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
|
|
- min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height
|
|
|
|
|
|
+ min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height,
|
|
|
|
+ min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size ,
|
|
|
|
+ min(replace(format ,' ,','')) media_format,
|
|
|
|
+ min(video_length) video_length,
|
|
|
|
+ min(video_bit_rate) video_bit_rate
|
|
from dw_image_cost_day
|
|
from dw_image_cost_day
|
|
where signature is not null and signature !=''
|
|
where signature is not null and signature !=''
|
|
and length (replace (replace (signature,',',''),' ',''))>0
|
|
and length (replace (replace (signature,',',''),' ',''))>0
|
|
@@ -82,26 +101,37 @@ def image():
|
|
group by signature
|
|
group by signature
|
|
"""
|
|
"""
|
|
|
|
|
|
- df = db.dm.getData_pd(sql)
|
|
|
|
|
|
+ # df = db.dm.getData_pd(sql)
|
|
|
|
+ df = db.dm.pd_data_sql(sql)
|
|
# print(df)
|
|
# print(df)
|
|
-
|
|
|
|
- df['create_by'] = 0
|
|
|
|
|
|
+ # 进行数据转换-----添加max_media_size
|
|
|
|
+ for i in range(len(df['media_size'])):
|
|
|
|
+ if not df['media_size'][i]:
|
|
|
|
+ continue
|
|
|
|
+ size_list = df['media_size'][i].split(',')
|
|
|
|
+ max_size = 0
|
|
|
|
+ for size_data in size_list:
|
|
|
|
+ if size_data != 'None':
|
|
|
|
+ if float(size_data) > max_size:
|
|
|
|
+ max_size = str(size_data)
|
|
|
|
+ df['max_media_size'][i] = max_size
|
|
|
|
+
|
|
|
|
+ df['create_by'] = '0'
|
|
df["data_type"] = 'all'
|
|
df["data_type"] = 'all'
|
|
|
|
|
|
key = ["signature"]
|
|
key = ["signature"]
|
|
- tag = ["view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img", "content",'consume_amount','type','width','height']
|
|
|
|
|
|
+ tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate', 'use_times', "view_times", "click_times", "novels", "start_date", "end_date", "create_by", "single_img",
|
|
|
|
+ "content", 'consume_amount', 'type', 'width', 'height']
|
|
table = "t_ads_media"
|
|
table = "t_ads_media"
|
|
|
|
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
def adcreative():
|
|
def adcreative():
|
|
- sql="""select signature,title,article,
|
|
|
|
|
|
+ sql = """select signature,title,article,
|
|
sum(click_times) click_times,
|
|
sum(click_times) click_times,
|
|
sum(view_times) view_times,
|
|
sum(view_times) view_times,
|
|
|
|
+sum(use_times) use_times,
|
|
sum(consume_amount) consume_amount,
|
|
sum(consume_amount) consume_amount,
|
|
group_concat(distinct novels) novels,
|
|
group_concat(distinct novels) novels,
|
|
min(start_date) start_date,
|
|
min(start_date) start_date,
|
|
@@ -111,17 +141,27 @@ min(channel) channel ,
|
|
min(type) type,
|
|
min(type) type,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
if(locate(',',signature)>0,0,1) single_img,
|
|
min(width) width ,
|
|
min(width) width ,
|
|
-min(height) height
|
|
|
|
|
|
+min(height) height,
|
|
|
|
+min(media_size) media_size ,
|
|
|
|
+min(media_format) media_format,
|
|
|
|
+min(video_length) video_length,
|
|
|
|
+min(video_bit_rate) video_bit_rate,
|
|
|
|
+0 max_media_size
|
|
from
|
|
from
|
|
(select replace(signature ,' ,','') as signature,title,description article,
|
|
(select replace(signature ,' ,','') as signature,title,description article,
|
|
sum(click_count) click_times,
|
|
sum(click_count) click_times,
|
|
sum(view_count) view_times,
|
|
sum(view_count) view_times,
|
|
|
|
+sum(use_times) use_times,
|
|
sum(cost) consume_amount,
|
|
sum(cost) consume_amount,
|
|
group_concat(distinct book) novels,
|
|
group_concat(distinct book) novels,
|
|
min(dt) start_date,max(dt) end_date,
|
|
min(dt) start_date,max(dt) end_date,
|
|
min(replace(preview_url ,' ,','')) media,
|
|
min(replace(preview_url ,' ,','')) media,
|
|
min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
|
|
min(replace(if(left (width ,2)='0,',substring(width ,3),width) ,',0','')) width ,
|
|
min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height ,
|
|
min(replace(if(left (height ,2)='0,',substring(height ,3),height) ,',0','')) height ,
|
|
|
|
+min(replace(if(left (size ,2)='0,',substring(size ,3),size) ,',0','')) media_size ,
|
|
|
|
+min(replace(format ,' ,','')) media_format,
|
|
|
|
+min(video_length) video_length,
|
|
|
|
+min(video_bit_rate) video_bit_rate,
|
|
type channel,
|
|
type channel,
|
|
if(is_video=1,2,1) type,
|
|
if(is_video=1,2,1) type,
|
|
if(locate(',',signature)>0,0,1) single_img
|
|
if(locate(',',signature)>0,0,1) single_img
|
|
@@ -129,10 +169,24 @@ from dw_image_cost_day where signature is not null and signature!=''
|
|
GROUP BY signature,title,description,type,is_video) as foo
|
|
GROUP BY signature,title,description,type,is_video) as foo
|
|
group by signature ,title,article """
|
|
group by signature ,title,article """
|
|
|
|
|
|
- df = db.dm.getData_pd(sql)
|
|
|
|
-
|
|
|
|
- key = ["signature",'title','article']
|
|
|
|
- tag = ["view_times", "click_times", "novels", "start_date", "end_date","type","channel",'consume_amount','single_img','media','width','height']
|
|
|
|
|
|
+ # df = db.dm.getData_pd(sql)
|
|
|
|
+ df = db.dm.pd_data_sql(sql)
|
|
|
|
+ # 进行数据转换-----添加max_media_size
|
|
|
|
+ for i in range(len(df['media_size'])):
|
|
|
|
+ if not df['media_size'][i]:
|
|
|
|
+ continue
|
|
|
|
+ size_list = df['media_size'][i].split(',')
|
|
|
|
+ max_size = 0
|
|
|
|
+ for size_data in size_list:
|
|
|
|
+ if size_data != 'None':
|
|
|
|
+ if float(size_data) > max_size:
|
|
|
|
+ max_size = str(size_data)
|
|
|
|
+ df['max_media_size'][i] = max_size
|
|
|
|
+
|
|
|
|
+ key = ["signature", 'title', 'article']
|
|
|
|
+ tag = ['media_size', 'media_format', 'video_length', 'video_bit_rate', 'max_media_size', 'use_times', "view_times",
|
|
|
|
+ "click_times", "novels", "start_date", "end_date", "type", "channel",
|
|
|
|
+ 'consume_amount', 'single_img', 'media', 'width', 'height']
|
|
table = "t_ads_idea"
|
|
table = "t_ads_idea"
|
|
|
|
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
db.zx_ads.dfsave2mysql(df, table, key, tag)
|
|
@@ -147,3 +201,7 @@ def run():
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
run()
|
|
run()
|
|
|
|
+ # title()
|
|
|
|
+ # description()
|
|
|
|
+ # image()
|
|
|
|
+ # adcreative()
|