ck 4 년 전
부모
커밋
1b4de51af1
2개의 변경된 파일229개의 추가작업 그리고 1개의 파일을 삭제
  1. 2 1
      app/etl/dw/dw_pitcher_daily.py
  2. 227 0
      app/etl/src/book_annual_expect_profit.py

+ 2 - 1
app/etl/dw/dw_pitcher_daily.py

@@ -29,7 +29,8 @@ def dw_pitcher_trend():
        sum(da13) d13,sum(da14) d14,sum(da15) d15,sum(da16) d16,sum(da17) d17,sum(da18) d18,sum(da19) d19,
        sum(da20) d20,sum(da21) d21,sum(da22) d22,sum(da23) d23,sum(da24) d24,sum(da25) d25,
        sum(da26) d26,sum(da27) d27,sum(da28) d28,sum(da29) d29,sum(da30) d30,
-       sum(ba1),sum(ba2),sum(ba3),sum(ba4),sum(ba5),sum(ba6),sum(ba7)
+       sum(ba1),sum(ba2),sum(ba3),sum(ba4),sum(ba5),sum(ba6),sum(ba7),
+       sum(first_order_amount*annual_mult-cost) annual_expect_profit
 from dw_channel a
 left join dw_channel_amount_daily b on a.dt=b.dt and a.channel=b.channel
 left join src_book_info c on a.book=c.book and a.platform=c.platform and a.type=c.type and a.dt=c.dt 

+ 227 - 0
app/etl/src/book_annual_expect_profit.py

@@ -0,0 +1,227 @@
+
+import pandas as pd
+from model.DataBaseUtils import CkUtils,MysqlUtils
+import numpy as np
+from model.DateUtils import DateUtils
+import matplotlib.pyplot as plt
+from scipy.optimize import curve_fit
+import time
+plt.rcParams['font.sans-serif']=['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+ck=CkUtils()
+db = MysqlUtils()
+du=DateUtils()
+
+def run():
+    sql="""select
+       book,type,
+       round(sum(w1)/sum(if(dt>=subtractDays(today(),5),0,a1)),2) wa1,
+round(sum(w2)/sum(if(dt>=subtractDays(today(),12),0,a1)),2) wa2,
+round(sum(w3)/sum(if(dt>=subtractDays(today(),19),0,a1)),2) wa3,
+round(sum(w4)/sum(if(dt>=subtractDays(today(),26),0,a1)),2) wa4,
+round(sum(w5)/sum(if(dt>=subtractDays(today(),33),0,a1)),2) wa5,
+round(sum(w6)/sum(if(dt>=subtractDays(today(),40),0,a1)),2) wa6,
+round(sum(w7)/sum(if(dt>=subtractDays(today(),47),0,a1)),2) wa7,
+round(sum(w8)/sum(if(dt>=subtractDays(today(),54),0,a1)),2) wa8,
+round(sum(w9)/sum(if(dt>=subtractDays(today(),61),0,a1)),2) wa9,
+round(sum(w10)/sum(if(dt>=subtractDays(today(),68),0,a1)),2) wa10,
+round(sum(w11)/sum(if(dt>=subtractDays(today(),75),0,a1)),2) wa11,
+round(sum(w12)/sum(if(dt>=subtractDays(today(),82),0,a1)),2) wa12,
+round(sum(w13)/sum(if(dt>=subtractDays(today(),89),0,a1)),2) wa13,
+round(sum(w14)/sum(if(dt>=subtractDays(today(),96),0,a1)),2) wa14,
+round(sum(w15)/sum(if(dt>=subtractDays(today(),103),0,a1)),2) wa15,
+round(sum(w16)/sum(if(dt>=subtractDays(today(),110),0,a1)),2) wa16,
+round(sum(w17)/sum(if(dt>=subtractDays(today(),117),0,a1)),2) wa17,
+round(sum(w18)/sum(if(dt>=subtractDays(today(),124),0,a1)),2) wa18,
+round(sum(w19)/sum(if(dt>=subtractDays(today(),131),0,a1)),2) wa19,
+round(sum(w20)/sum(if(dt>=subtractDays(today(),138),0,a1)),2) wa20,
+round(sum(w21)/sum(if(dt>=subtractDays(today(),145),0,a1)),2) wa21,
+round(sum(w22)/sum(if(dt>=subtractDays(today(),152),0,a1)),2) wa22,
+round(sum(w23)/sum(if(dt>=subtractDays(today(),159),0,a1)),2) wa23,
+round(sum(w24)/sum(if(dt>=subtractDays(today(),166),0,a1)),2) wa24,
+round(sum(w25)/sum(if(dt>=subtractDays(today(),173),0,a1)),2) wa25,
+round(sum(w26)/sum(if(dt>=subtractDays(today(),180),0,a1)),2) wa26,
+round(sum(w27)/sum(if(dt>=subtractDays(today(),187),0,a1)),2) wa27,
+round(sum(w28)/sum(if(dt>=subtractDays(today(),194),0,a1)),2) wa28,
+round(sum(w29)/sum(if(dt>=subtractDays(today(),201),0,a1)),2) wa29,
+round(sum(w30)/sum(if(dt>=subtractDays(today(),208),0,a1)),2) wa30,
+round(sum(w31)/sum(if(dt>=subtractDays(today(),215),0,a1)),2) wa31,
+round(sum(w32)/sum(if(dt>=subtractDays(today(),222),0,a1)),2) wa32,
+round(sum(w33)/sum(if(dt>=subtractDays(today(),229),0,a1)),2) wa33,
+round(sum(w34)/sum(if(dt>=subtractDays(today(),236),0,a1)),2) wa34,
+round(sum(w35)/sum(if(dt>=subtractDays(today(),243),0,a1)),2) wa35,
+round(sum(w36)/sum(if(dt>=subtractDays(today(),250),0,a1)),2) wa36,
+round(sum(w37)/sum(if(dt>=subtractDays(today(),257),0,a1)),2) wa37,
+round(sum(w38)/sum(if(dt>=subtractDays(today(),264),0,a1)),2) wa38,
+round(sum(w39)/sum(if(dt>=subtractDays(today(),271),0,a1)),2) wa39,
+round(sum(w40)/sum(if(dt>=subtractDays(today(),278),0,a1)),2) wa40,
+round(sum(w41)/sum(if(dt>=subtractDays(today(),285),0,a1)),2) wa41,
+round(sum(w42)/sum(if(dt>=subtractDays(today(),292),0,a1)),2) wa42,
+round(sum(w43)/sum(if(dt>=subtractDays(today(),299),0,a1)),2) wa43,
+round(sum(w44)/sum(if(dt>=subtractDays(today(),306),0,a1)),2) wa44,
+round(sum(w45)/sum(if(dt>=subtractDays(today(),313),0,a1)),2) wa45,
+round(sum(w46)/sum(if(dt>=subtractDays(today(),320),0,a1)),2) wa46,
+round(sum(w47)/sum(if(dt>=subtractDays(today(),327),0,a1)),2) wa47,
+round(sum(w48)/sum(if(dt>=subtractDays(today(),334),0,a1)),2) wa48,
+round(sum(w49)/sum(if(dt>=subtractDays(today(),341),0,a1)),2) wa49,
+round(sum(w50)/sum(if(dt>=subtractDays(today(),348),0,a1)),2) wa50,
+round(sum(w51)/sum(if(dt>=subtractDays(today(),355),0,a1)),2) wa51,
+round(sum(w52)/sum(if(dt>=subtractDays(today(),362),0,a1)),2) wa52
+
+       from dw_daily_channel a
+left outer join
+(select toDate(formatDateTime(reg_time,'%Y-%m-%d')) as dt,
+   channel as channel,
+sum(if(subtractDays(date, 1)>=toDate(reg_time),0,amount)) as a1,
+if(dt<subtractDays(today(), 5), sum(if(subtractDays(date, 7)>=toDate(reg_time),0,amount)),null) as w1,
+if(dt<subtractDays(today(), 12), sum(if(subtractDays(date, 14)>=toDate(reg_time),0,amount)),null) as w2,
+if(dt<subtractDays(today(), 19), sum(if(subtractDays(date, 21)>=toDate(reg_time),0,amount)),null) as w3,
+if(dt<subtractDays(today(), 26), sum(if(subtractDays(date, 28)>=toDate(reg_time),0,amount)),null) as w4,
+if(dt<subtractDays(today(), 33), sum(if(subtractDays(date, 35)>=toDate(reg_time),0,amount)),null) as w5,
+if(dt<subtractDays(today(), 40), sum(if(subtractDays(date, 42)>=toDate(reg_time),0,amount)),null) as w6,
+if(dt<subtractDays(today(), 47), sum(if(subtractDays(date, 49)>=toDate(reg_time),0,amount)),null) as w7,
+if(dt<subtractDays(today(), 54), sum(if(subtractDays(date, 56)>=toDate(reg_time),0,amount)),null) as w8,
+if(dt<subtractDays(today(), 61), sum(if(subtractDays(date, 63)>=toDate(reg_time),0,amount)),null) as w9,
+if(dt<subtractDays(today(), 68), sum(if(subtractDays(date, 70)>=toDate(reg_time),0,amount)),null) as w10,
+if(dt<subtractDays(today(), 75), sum(if(subtractDays(date, 77)>=toDate(reg_time),0,amount)),null) as w11,
+if(dt<subtractDays(today(), 82), sum(if(subtractDays(date, 84)>=toDate(reg_time),0,amount)),null) as w12,
+if(dt<subtractDays(today(), 89), sum(if(subtractDays(date, 91)>=toDate(reg_time),0,amount)),null) as w13,
+if(dt<subtractDays(today(), 96), sum(if(subtractDays(date, 98)>=toDate(reg_time),0,amount)),null) as w14,
+if(dt<subtractDays(today(), 103), sum(if(subtractDays(date, 105)>=toDate(reg_time),0,amount)),null) as w15,
+if(dt<subtractDays(today(), 110), sum(if(subtractDays(date, 112)>=toDate(reg_time),0,amount)),null) as w16,
+if(dt<subtractDays(today(), 117), sum(if(subtractDays(date, 119)>=toDate(reg_time),0,amount)),null) as w17,
+if(dt<subtractDays(today(), 124), sum(if(subtractDays(date, 126)>=toDate(reg_time),0,amount)),null) as w18,
+if(dt<subtractDays(today(), 131), sum(if(subtractDays(date, 133)>=toDate(reg_time),0,amount)),null) as w19,
+if(dt<subtractDays(today(), 138), sum(if(subtractDays(date, 140)>=toDate(reg_time),0,amount)),null) as w20,
+if(dt<subtractDays(today(), 145), sum(if(subtractDays(date, 147)>=toDate(reg_time),0,amount)),null) as w21,
+if(dt<subtractDays(today(), 152), sum(if(subtractDays(date, 154)>=toDate(reg_time),0,amount)),null) as w22,
+if(dt<subtractDays(today(), 159), sum(if(subtractDays(date, 161)>=toDate(reg_time),0,amount)),null) as w23,
+if(dt<subtractDays(today(), 166), sum(if(subtractDays(date, 168)>=toDate(reg_time),0,amount)),null) as w24,
+if(dt<subtractDays(today(), 173), sum(if(subtractDays(date, 175)>=toDate(reg_time),0,amount)),null) as w25,
+if(dt<subtractDays(today(), 180), sum(if(subtractDays(date, 182)>=toDate(reg_time),0,amount)),null) as w26,
+if(dt<subtractDays(today(), 187), sum(if(subtractDays(date, 189)>=toDate(reg_time),0,amount)),null) as w27,
+if(dt<subtractDays(today(), 194), sum(if(subtractDays(date, 196)>=toDate(reg_time),0,amount)),null) as w28,
+if(dt<subtractDays(today(), 201), sum(if(subtractDays(date, 203)>=toDate(reg_time),0,amount)),null) as w29,
+if(dt<subtractDays(today(), 208), sum(if(subtractDays(date, 210)>=toDate(reg_time),0,amount)),null) as w30,
+if(dt<subtractDays(today(), 215), sum(if(subtractDays(date, 217)>=toDate(reg_time),0,amount)),null) as w31,
+if(dt<subtractDays(today(), 222), sum(if(subtractDays(date, 224)>=toDate(reg_time),0,amount)),null) as w32,
+if(dt<subtractDays(today(), 229), sum(if(subtractDays(date, 231)>=toDate(reg_time),0,amount)),null) as w33,
+if(dt<subtractDays(today(), 236), sum(if(subtractDays(date, 238)>=toDate(reg_time),0,amount)),null) as w34,
+if(dt<subtractDays(today(), 243), sum(if(subtractDays(date, 245)>=toDate(reg_time),0,amount)),null) as w35,
+if(dt<subtractDays(today(), 250), sum(if(subtractDays(date, 252)>=toDate(reg_time),0,amount)),null) as w36,
+if(dt<subtractDays(today(), 257), sum(if(subtractDays(date, 259)>=toDate(reg_time),0,amount)),null) as w37,
+if(dt<subtractDays(today(), 264), sum(if(subtractDays(date, 266)>=toDate(reg_time),0,amount)),null) as w38,
+if(dt<subtractDays(today(), 271), sum(if(subtractDays(date, 273)>=toDate(reg_time),0,amount)),null) as w39,
+if(dt<subtractDays(today(), 278), sum(if(subtractDays(date, 280)>=toDate(reg_time),0,amount)),null) as w40,
+if(dt<subtractDays(today(), 285), sum(if(subtractDays(date, 287)>=toDate(reg_time),0,amount)),null) as w41,
+if(dt<subtractDays(today(), 292), sum(if(subtractDays(date, 294)>=toDate(reg_time),0,amount)),null) as w42,
+if(dt<subtractDays(today(), 299), sum(if(subtractDays(date, 301)>=toDate(reg_time),0,amount)),null) as w43,
+if(dt<subtractDays(today(), 306), sum(if(subtractDays(date, 308)>=toDate(reg_time),0,amount)),null) as w44,
+if(dt<subtractDays(today(), 313), sum(if(subtractDays(date, 315)>=toDate(reg_time),0,amount)),null) as w45,
+if(dt<subtractDays(today(), 320), sum(if(subtractDays(date, 322)>=toDate(reg_time),0,amount)),null) as w46,
+if(dt<subtractDays(today(), 327), sum(if(subtractDays(date, 329)>=toDate(reg_time),0,amount)),null) as w47,
+if(dt<subtractDays(today(), 334), sum(if(subtractDays(date, 336)>=toDate(reg_time),0,amount)),null) as w48,
+if(dt<subtractDays(today(), 341), sum(if(subtractDays(date, 343)>=toDate(reg_time),0,amount)),null) as w49,
+if(dt<subtractDays(today(), 348), sum(if(subtractDays(date, 350)>=toDate(reg_time),0,amount)),null) as w50,
+if(dt<subtractDays(today(), 355), sum(if(subtractDays(date, 357)>=toDate(reg_time),0,amount)),null) as w51,
+if(dt<subtractDays(today(), 362), sum(if(subtractDays(date, 364)>=toDate(reg_time),0,amount)),null) as w52
+from order a
+where reg_time>'2019-03-18 00:00:00' group by toDate(formatDateTime(reg_time,'%Y-%m-%d')),channel) b using (dt,channel)
+where cost>0 and a1>0 and book!='' 
+group by book,type 
+
+"""
+    col=['book','first_amount','start','end','w1','w2','w3','w4','w5','w6','w7','w8','w9','w10','w11','w12','w13','w14','w15','w16','w17','w18','w19','w20','w21','w22','w23','w24','w25','w26','w27','w28','w29','w30','w31','w32','w33','w34','w35','w36','w37','w38','w39','w40','w41','w42','w43','w44','w45','w46','w47','w48','w49','w50','w51','w52',]
+    # df =ck.getData_pd(sql,col=col)
+    # print(df)
+    # df.to_csv('./book.csv',encoding='utf-8')
+    df = ck.execute(sql)
+    # print(df)
+    li = [list(i) for i in df]
+    # print(li)
+
+
+    li2=[]
+    for i in li:
+        w = []
+        w.append(i[0])
+        w.append(i[1])
+        x=1
+        for j in i[2:]:
+            if j is None:
+                break
+            if j>=x:
+                w.append(j)
+                x=j
+            else:
+                break
+        li2.append(w)
+    # print(li2)
+    li4=[]
+    for i in li2:
+        li3=[]
+        if len(i)<5:
+            continue
+        # print(i[0])
+        y = np.array(i[2:])
+        x = np.array([n for n in range(2,len(i))])
+        # print(x,y)
+
+        try:
+            popt, pcov = curve_fit(func, x, y)
+            # print(popt[0])
+            if popt[0]<0:
+                continue
+
+            # print(i[0])
+            # yvals = func(x, popt[0], popt[1], popt[2])  # 拟合y值
+            li3.append(i[0])
+            li3.append(i[1])
+            li3.append(round(func(52, popt[0], popt[1], popt[2]),2))
+            # di[i[0]]=round(func(52, popt[0], popt[1], popt[2]),2)
+            # print(di)
+            li4.append(li3)
+
+            # 画图
+            # plot1 = plt.plot(x, y, 's', label='original values')
+            # plot2 = plt.plot(x, yvals, 'r', label='polyfit values')
+            # plt.xlabel('x')
+            # plt.ylabel('y')
+            # plt.legend(loc=4)  # 指定legend的位置右下角
+            # plt.title(i[0])
+            # plt.show()
+            # time.sleep(1)
+
+
+        except:
+            print(i[0],i[1],'找不到解')
+
+
+    print(li4)
+    print(li4.__len__())
+    update_data(li4)
+
+
+
+def func(x,a,b,k):
+        return 1/(a*b**x+k)
+
+
+
+def update_data(data):
+    dt =du.getNow()
+    for i in data:
+        book = i[0]
+        type= i[1]
+        mult = i[2]
+        db.dm.execute(f"update src_book_info set annual_mult={mult} where book='{book}' and type ='{type}'")
+
+
+
+if __name__ == '__main__':
+    # x=""
+    # for i in range(1,53):
+    #     x+=f"'w{i}',"
+    # print(x)
+    run()
+    # print(fuc(1,2,3,4))