před 4 roky · bfe24a3fb3
--- a/app/etl/MaterialLibrary/MaterialDataClean.py
+++ b/app/etl/MaterialLibrary/MaterialDataClean.py
@@ -52,17 +52,31 @@ def description():
 
				 
			
 
				 
			
 
				 def image():
			
 
				-    sql="""select signature,
			
 
				+    sql="""select signature,sum(consume_amount) consume_amount,
			
 
				+            sum(click_times) click_times,
			
 
				+            sum(view_times) view_times,
			
 
				+            group_concat(distinct novels) novels ,
			
 
				+            max(end_date) end_date,
			
 
				+            min(start_date) start_date,
			
 
				+            min(content) content,
			
 
				+            min(type) type,  
			
 
				+            if(locate(',',signature)>0,0,1) single_img
			
 
				+        from (select replace(signature,' ,','') as signature ,
			
 
				             sum(cost) consume_amount,
			
 
				             sum(click_count) click_times,
			
 
				             sum(view_count) view_times,
			
 
				             group_concat(distinct book) novels ,
			
 
				             max(dt) end_date,
			
 
				             min(dt) start_date,
			
 
				-            min(preview_url) content,
			
 
				+            replace (min(preview_url),' ,','') as content,
			
 
				             if(is_video=1,2,1) type,  
			
 
				             if(locate(',',signature)>0,0,1) single_img
			
 
				-            from dw_image_cost_day  where signature is not null and signature !=''  GROUP BY  signature,is_video"""
			
 
				+            from dw_image_cost_day  
			
 
				+            where signature is not null and signature !=''  
			
 
				+            and length (replace (replace (signature,',',''),' ',''))>0
			
 
				+            GROUP BY  signature,is_video) as foo
			
 
				+            group by signature  
			
 
				+            """
			
 
				 
			
 
				     df = db.dm.getData_pd(sql)
			
 
				     # print(df)
			
--- a/app/etl/dw/dw_image_cost_day.py
+++ b/app/etl/dw/dw_image_cost_day.py
@@ -34,7 +34,8 @@ def run(dt):
 
				     for i in data:
			
 
				         # print(i)
			
 
				         li.extend(i[-1].split(','))
			
 
				-    #TODO:之后如果一天产生的图片过多,可能超过sql的字符限制
			
 
				+    #TODO:之后如果一天产生的图片过多,可能超过sql的字符限制,
			
 
				+    # 之后数据使用hive,来进行数据存储
			
 
				 
			
 
				     sql3 = f"select image_id,preview_url,signature,width,height from image_info where  image_id in ({str(set(li))[1:-1]})"
			
 
				 
			
@@ -46,23 +47,31 @@ def run(dt):
 
				     # print(image_di)
			
 
				 
			
 
				     for i in data:
			
 
				-        y = ''
			
 
				-        z = ''
			
 
				+        preview_url = ''
			
 
				+        signature = ''
			
 
				         width = ''
			
 
				         height = ''
			
 
				+        image_id = ''
			
 
				         for j in i[-1].split(','):
			
 
				             if image_di.get(j):
			
 
				-                y = y + ',' + image_di.get(j)[0]
			
 
				-                z = z + ',' + image_di.get(j)[1]
			
 
				+                image_id = image_id + ',' + j
			
 
				+                preview_url = preview_url + ',' + image_di.get(j)[0]
			
 
				+                signature = signature + ',' + image_di.get(j)[1]
			
 
				                 width = width + ',' + str(image_di.get(j)[2])
			
 
				                 height = height + ',' + str(image_di.get(j)[3])
			
 
				-        i.append(y[1:])
			
 
				-        i.append(z[1:])
			
 
				+            else:
			
 
				+                image_id = image_id + ',' +j
			
 
				+                preview_url = preview_url + ',' + ' '
			
 
				+                signature = signature + ',' + ' '
			
 
				+                width = width + ',' + '0'
			
 
				+                height = height + ',' + '0'
			
 
				+        i[-1]=image_id[1:]
			
 
				+        i.append(preview_url[1:])
			
 
				+        i.append(signature[1:])
			
 
				         i.append(0)
			
 
				         i.append(width[1:])
			
 
				         i.append(height[1:])
			
 
				 
			
 
				-    # print(data)
			
 
				     # exit(0)
			
 
				     sql_video = f"""SELECT a.dt,b.type,sum(a.cost),sum(view_count),sum(click_count),sum(follow_count),sum(order_count),sum(order_amount),
			
 
				             title,description,book,platform,stage,e.channel,pitcher,ifnull(image_id,''),g.preview_url,g.signature,1,