Jelajahi Sumber

优化阅文的订单查询接口

zwg 4 tahun lalu
induk
melakukan
e0c420ba53
2 mengubah file dengan 181 tambahan dan 213 penghapusan
  1. 2 2
      README.md
  2. 179 211
      dgp/tests/check_order_new.py

+ 2 - 2
README.md

@@ -11,7 +11,7 @@ Data grabbing platform(DGP)数据采集平台
 #### 安装教程
 
 * 确保已安装Python3
-* 安装项目依赖,执行 `pip install -r requirements.txt`
+* 安装项目依赖,执行 ``
 
 #### 特性
 
@@ -26,7 +26,7 @@ Data grabbing platform(DGP)数据采集平台
 * 修改数据库账号配置
 * 检查account和token配置
 * 执行相关脚本:`nohup python3 -u get_data_hourly.py >output_get_data_hourly_202009251042.log 2>&1 &`
-* 执行相关脚本:`nohup python3 -u check_order.py >check_order_202009280211.log 2>&1 &`
+* 执行相关脚本:`nohup python3 -u check_order_new.py >check_order_new_202009300217.log 2>&1 &`
 
 #### 参与贡献
 

+ 179 - 211
dgp/tests/check_order_new.py

@@ -73,153 +73,119 @@ def get_yuewen_order(st, et):
 
 def get_yuewen_order_task(st, et, account):
     order_list = ()
+
+    email = account[0]
+    appsecert = account[1]
+
     url = 'https://open.yuewen.com/cpapi/wxRecharge/querychargelog'
     version = 1
+    order_status = 2 #已支付
+    page_count = 100 #每页100条数据
     start_time = st
-    email = account[0]
-    appsecert = account[1]
 
     for i in range((et - st) // 86400 + 1):
-        end_time = min(start_time + 86400, et)
-        timestamp = int(time.time())
-        s = ''
         page = 1
-        order_status = 2
-        data = {
-            'email': email,
-            'version': version,
-            'timestamp': timestamp,
-            'start_time': start_time,
-            'end_time': end_time,
-            'page': page,
-            'order_status': order_status
-            # 'last_min_id':last_min_id,
-            # 'last_max_id':last_max_id,
-            # 'total_count':total_count,
-            # 'last_page':last_page
-        }
-        sorted_data = sorted(data.items())
-        for k, v in sorted_data:
-            s = s + str(k) + str(v)
+        last_min_id = ''
+        last_max_id = ''
+        total_count = ''
+        last_page = ''
 
-        sign = md5value(appsecert + s).upper()
+        while True:
+            if start_time == et:
+                break
 
-        data1 = {
-            'email': email,
-            'version': version,
-            'timestamp': timestamp,
-            'start_time': start_time,
-            'end_time': end_time,
-            'page': page,
-            'order_status': order_status,
-            'sign': sign
-        }
-        list1 = requests.get(url=url, params=data1)
+            end_time = min(start_time + 86400, et)
+            timestamp = int(time.time())
 
-        ## 此接口有调用频率限制,相同查询条件每分钟仅能请求一次
-        ## exception: list1.json() {'code': 10408, 'msg': '调用频率超限'}
-        if list1.json()['code'] != 0:
-            print('阅文查询充值接口异常:', list1.json())
-            break
+            params = {
+                'email': email,
+                'version': version,
+                'timestamp': timestamp,
+                'start_time': start_time,
+                'end_time': end_time,
+                'page': page,
+                'order_status': order_status
+            }
 
-        total_count = list1.json()['data']['total_count']
-        last_min_id = list1.json()['data']['min_id']
-        last_max_id = list1.json()['data']['max_id']
-        last_page = list1.json()['data']['page']
+            if page > 1:
+                params['last_min_id'] = last_min_id
+                params['last_max_id'] = last_max_id
+                params['total_count'] = total_count
+                params['last_page'] = last_page
 
-        if total_count > 0:
-            for x in list1.json()['data']['list']:
-                y = {}
-                dtime = datetime.datetime.strptime(x['order_time'], "%Y-%m-%d %H:%M:%S")
-                y['date'] = ((int(time.mktime(dtime.timetuple())) + 8 * 3600) // 86400) * 86400 - 8 * 3600
-                y['platform'] = '阅文'
-                y['channel'] = x['app_name']
-                y['from_novel'] = x['book_name']
-                y['user_id'] = x['openid']
-                y['stage'] = ''
-                y['channel_id'] = 0
-                y['order_time'] = x['order_time']
-                y['amount'] = x['amount']
-                y['reg_time'] = x['reg_time']
-                y['order_id'] = x['order_id']
+            sorted_data = sorted(params.items())
+            str_params = ''
+            for k, v in sorted_data:
+                str_params = str_params + str(k) + str(v)
 
-                y = sorted(y.items(), key=lambda item: item[0])
-                y = dict(y)
-                y = tuple(y.values())
-                order_list = order_list + ((y),)
+            sign = md5value(appsecert + str_params).upper()
+
+            #放入签名
+            params['sign'] = sign
+            response_result_json = requests.get(url=url, params=params).json()
+
+            code = response_result_json['code']
+            ## 此接口有调用频率限制,相同查询条件每分钟仅能请求一次
+            if code != 0:
+                print('阅文查询充值接口异常:', response_result_json, '参数', params)
+                break
+                # if code == 10408:
+                #     if fail_count > 0:
+                #         break
+                #
+                #     sleep_seconds = random.randint(60, 70)
+                #     print('阅文获取订单数据线程休眠【{sleep_seconds}】秒,因为该接口有一分钟的限制'.format(sleep_seconds=sleep_seconds))
+                #     time.sleep(sleep_seconds)
+                #
+                #     print('重试一次')
+                #     fail_count = fail_count + 1
+                #     get_yuewen_order_task(st, et, account, fail_count)
+
+            response_data = response_result_json['data']
+            total_count = response_data['total_count']
+
+            if total_count == 0:
+                continue
+
+            last_min_id = response_data['min_id']
+            last_max_id = response_data['max_id']
+            last_page = response_data['page']
+            order_item_list = response_data['list']
+
+            for order_item in order_item_list:
+                order = {}
+                dtime = datetime.datetime.strptime(order_item['order_time'], "%Y-%m-%d %H:%M:%S")
+                order['date'] = ((int(time.mktime(dtime.timetuple())) + 8 * 3600) // 86400) * 86400 - 8 * 3600
+                order['platform'] = '阅文'
+                order['channel'] = order_item['app_name']
+                order['from_novel'] = order_item['book_name']
+                order['user_id'] = order_item['openid']
+                order['stage'] = ''
+                order['channel_id'] = 0
+                order['order_time'] = order_item['order_time']
+                order['amount'] = order_item['amount']
+                order['reg_time'] = order_item['reg_time']
+                order['order_id'] = order_item['order_id']
+
+                order = sorted(order.items(), key=lambda item: item[0])
+                order = dict(order)
+                order = tuple(order.values())
+                order_list = order_list + ((order),)
+
+            # print('阅文账号【{key}】, 查询时间【{start_time} - {end_time}】,当前页【{page}】,本次查询订单数量【{total_count}】'
+            #       .format(key=email, start_time=date_util.getSecondsToDatetime(start_time),
+            #               end_time=date_util.getSecondsToDatetime(end_time),page=page, total_count=total_count))
+
+            if int(page) >= math.ceil(total_count / int(page_count)):
+                break
+
+            page = page + 1
+
+        start_time = start_time + 86400  #天数加1
 
-        if total_count > 100:
-            page_while_count = math.ceil(total_count / 100) + 1
-            if page_while_count > 2:
-                sleep_seconds = random.randint(60, 70)
-                print('阅文获取订单数据线程休眠', sleep_seconds, '秒,因为该接口有一分钟的限制')
-                time.sleep(sleep_seconds)
-
-            for page in range(2, page_while_count):
-                timestamp = int(time.time())
-                data = {
-                    'email': email,
-                    'version': version,
-                    'timestamp': timestamp,
-                    'start_time': start_time,
-                    'end_time': end_time,
-                    'page': page,
-                    'last_min_id': last_min_id,
-                    'last_max_id': last_max_id,
-                    'total_count': total_count,
-                    'last_page': last_page,
-                    'order_status': order_status
-                }
-                sorted_data = sorted(data.items())
-                s1 = ''
-                for k, v in sorted_data:
-                    s1 = s1 + str(k) + str(v)
-                    sign = md5value(appsecert + s1).upper()
-                    data2 = {
-                        'email': email,
-                        'version': version,
-                        'timestamp': timestamp,
-                        'start_time': start_time,
-                        'end_time': end_time,
-                        'page': page,
-                        'last_min_id': last_min_id,
-                        'last_max_id': last_max_id,
-                        'total_count': total_count,
-                        'last_page': last_page,
-                        'order_status': order_status,
-                        'sign': sign
-                    }
-                list2 = requests.get(url=url, params=data2)
-                if list2.json()['code'] != 0:
-                    print('阅文查询充值接口异常:', list2.json(), timestamp, int(time.time()))
-                    break
-
-                for x in list2.json()['data']['list']:
-                    y = {}
-                    dtime = datetime.datetime.strptime(x['order_time'], "%Y-%m-%d %H:%M:%S")
-                    y['date'] = ((int(time.mktime(dtime.timetuple())) + 8 * 3600) // 86400) * 86400 - 8 * 3600
-                    y['platform'] = '阅文'
-                    y['channel'] = x['app_name']
-                    y['from_novel'] = x['book_name']
-                    y['user_id'] = x['openid']
-                    y['stage'] = ''
-                    y['channel_id'] = 0
-                    y['order_time'] = x['order_time']
-                    y['amount'] = x['amount']
-                    y['reg_time'] = x['reg_time']
-                    y['order_id'] = x['order_id']
-
-                    y = sorted(y.items(), key=lambda item: item[0])
-                    y = dict(y)
-                    y = tuple(y.values())
-                    order_list = order_list + ((y),)
-
-                total_count = list2.json()['data']['total_count']
-                last_min_id = list2.json()['data']['min_id']
-                last_max_id = list2.json()['data']['max_id']
-                last_page = list2.json()['data']['page']
-
-        start_time = start_time + 86400
+        # sleep_seconds = random.randint(60, 70)
+        # print('阅文获取订单数据线程休眠【{sleep_seconds}】秒,因为该接口有一分钟的限制'.format(sleep_seconds=sleep_seconds))
+        # time.sleep(sleep_seconds)
 
     return order_list
 
@@ -271,8 +237,8 @@ def get_zhangdu_order_task(st, et, account):
             'starttime': starttime,
             'endtime': endtime
         }
-        list1 = requests.get(url=url, params=params)
-        pageCount = list1.json()['data']['pageCount']
+        response_result_json = requests.get(url=url, params=params).json()
+        pageCount = response_result_json['data']['pageCount']
         if pageCount == 0:
             continue
 
@@ -319,7 +285,7 @@ def get_huasheng_order(st, et):
     total_order_list = ()
     account_list = platform_util.get_huasheng_account_list()
 
-    executor = ProcessPoolExecutor(max_workers=10)
+    executor = ProcessPoolExecutor(max_workers=5)
 
     futures = []
     for account in account_list:
@@ -337,7 +303,7 @@ def get_huasheng_order(st, et):
         response_result_json = requests.post(url, params).json()
 
         if 'data' not in response_result_json.keys():
-            print('花生账号【{apiKey}】本次请求数据为空,响应报文【{result}】'.format(apiKey=apiKey, result=response_result_json))
+            # print('花生账号【{apiKey}】本次请求数据为空,响应报文【{result}】'.format(apiKey=apiKey, result=response_result_json))
             continue
 
         for merchant in response_result_json['data']:
@@ -367,10 +333,10 @@ def get_huasheng_order_task(st, et, account, merchant):
     merchant_id = merchant['merchant_id']
     merchant_name = merchant['merchant_name']
     start_time = st
+    limit = 500
 
     for i in range((et - st) // 86400 + 1):
         page = 1
-        limit = 500
 
         while True:
             date = time.strftime("%Y-%m-%d", time.localtime(start_time))
@@ -388,9 +354,10 @@ def get_huasheng_order_task(st, et, account, merchant):
             response_result_json = requests.post(order_url, order_params).json()
 
             if 'data' not in response_result_json.keys() or len(response_result_json['data']) == 0:
-                print('花生账号【{key}】, 渠道【{merchant_id}:{merchant_name}】本次请求数据为空,响应报文【{result}】'
-                      .format(key=apiKey, merchant_id=merchant_id, merchant_name=merchant_name,
-                              result=response_result_json))
+                # print('花生账号【{key}】, 渠道【{merchant_id}:{merchant_name}】本次请求数据为空,响应报文【{result}】'
+                #       .format(key=apiKey, merchant_id=merchant_id, merchant_name=merchant_name,
+                #               result=response_result_json))
+                break
 
             total_count = response_result_json['count']
             order_item_list = response_result_json['data']
@@ -452,7 +419,7 @@ def get_zzy_order(st, et):
         response_result_json = requests.get(url + params).json()  # 获取子渠道列表
 
         if 'data' not in response_result_json.keys():
-            print('掌中云账号【{key}】本次请求数据为空,响应报文【{result}】'.format(key=key, result=response_result_json))
+            # print('掌中云账号【{key}】本次请求数据为空,响应报文【{result}】'.format(key=key, result=response_result_json))
             continue
 
         items = response_result_json['data']['items']
@@ -504,8 +471,8 @@ def get_zzy_order_task(st, et, account, channel):
         response_result_json = requests.get(order_url + params).json()
 
         if 'data' not in response_result_json.keys():
-            print('掌中云账号【{key}】, 渠道【{channel_id}:{channel_name}】本次请求数据为空,响应报文【{result}】'
-                  .format(key=key, channel_id=channel_id, channel_name=channel_name, result=response_result_json))
+            # print('掌中云账号【{key}】, 渠道【{channel_id}:{channel_name}】本次请求数据为空,响应报文【{result}】'
+            #       .format(key=key, channel_id=channel_id, channel_name=channel_name, result=response_result_json))
             break
 
         total_count = response_result_json['data']['count']  # 总数量
@@ -703,76 +670,77 @@ def start_all_job():
     st_unix = date_util.getYesterdayStartTime()
     et_unix = date_util.getTodayStartTime()
 
-    st_unix = 1601136000  # 2020/9/27 0:0:0
-    et_unix = 1601308800  # 2020/9/29 0:0:0
+    # st_unix = 1601136000  # 2020/9/27 0:0:0
+    # et_unix = 1601308800  # 2020/9/29 0:0:0
+    # et_unix = st_unix + 10  # 2020/9/29 0:0:0
 
     print('查询开始时间:', st_unix, date_util.getSecondsToDatetime(st_unix))
     print('查询结束时间:', et_unix, date_util.getSecondsToDatetime(et_unix))
 
-    order_list = get_yuewen_order(st_unix, et_unix)
-    mysql_insert_order(order_list)
-
-    # platform_order_num_list = mysql_select_platform_order_count(date_util.getYesterdayStartTime())
-    # if len(platform_order_num_list) == 0:
-    #     print('本地库中没有任何数据,现在全平台补全')
-    #     mysql_insert_order(get_zzy_order(st_unix, et_unix))
-    #     mysql_insert_order(get_yuewen_order(st_unix, et_unix))
-    #     mysql_insert_order(get_huasheng_order(st_unix, et_unix))
-    #     mysql_insert_order(get_ysg_order(st_unix, et_unix))
-    #     mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
-    # else:
-    #     platform_list = ['阅文','悠书阁','掌读','掌中云','花生']
-    #     for platform_order_num in platform_order_num_list:
-    #         platform = str(platform_order_num['platform'])
-    #         num = int(platform_order_num['num'])
-    #         platform_list.remove(platform)
-    #
-    #         if platform == '阅文':
-    #             order_list = get_yuewen_order(st_unix, et_unix)
-    #             if len(order_list) != num:
-    #                 print('阅文数据实际订单和已经入库数据差异:', len(order_list) - num)
-    #                 mysql_insert_order(order_list)
-    #         elif platform == '悠书阁':
-    #             order_list = get_ysg_order(st_unix, et_unix)
-    #             if len(order_list) != num:
-    #                 print('悠书阁数据实际订单和已经入库数据差异:', len(order_list) - num)
-    #                 mysql_insert_order(order_list)
-    #         elif platform == '掌读':
-    #             order_list = get_zhangdu_order(st_unix, et_unix)
-    #             if len(order_list) != num:
-    #                 print('掌读数据实际订单和已经入库数据差异:', len(order_list) - num)
-    #                 mysql_insert_order(order_list)
-    #         elif platform == '掌中云':
-    #             order_list = get_zzy_order(st_unix, et_unix)
-    #             if len(order_list) != num:
-    #                 print('掌中云数据实际订单和已经入库数据差异:', len(order_list) - num)
-    #                 mysql_insert_order(order_list)
-    #         elif platform == '花生':
-    #             order_list = get_huasheng_order(st_unix, et_unix)
-    #             if len(order_list) != num:
-    #                 print('花生数据实际订单和已经入库数据差异:', len(order_list) - num)
-    #                 mysql_insert_order(order_list)
-    #         else:
-    #             print('发现未知平台数据!', platform_order_num)
-    #
-    #     for platform in platform_list:
-    #         if platform == '阅文':
-    #             print('阅文没有数据')
-    #             mysql_insert_order(get_yuewen_order(st_unix, et_unix))
-    #         elif platform == '悠书阁':
-    #             print('悠书阁没有数据')
-    #             mysql_insert_order(get_ysg_order(st_unix, et_unix))
-    #         elif platform == '掌读':
-    #             print('掌读没有数据')
-    #             mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
-    #         elif platform == '掌中云':
-    #             print('掌中云没有数据')
-    #             mysql_insert_order(get_zzy_order(st_unix, et_unix))
-    #         elif platform == '花生':
-    #             print('花生没有数据')
-    #             mysql_insert_order(get_huasheng_order(st_unix, et_unix))
-    #         else:
-    #             print('什么鬼平台:', platform)
+    # order_list = get_yuewen_order(st_unix, et_unix)
+    # mysql_insert_order(order_list)
+
+    platform_order_num_list = mysql_select_platform_order_count(date_util.getYesterdayStartTime())
+    if len(platform_order_num_list) == 0:
+        print('本地库中没有任何数据,现在全平台补全')
+        mysql_insert_order(get_zzy_order(st_unix, et_unix))
+        mysql_insert_order(get_yuewen_order(st_unix, et_unix))
+        mysql_insert_order(get_huasheng_order(st_unix, et_unix))
+        mysql_insert_order(get_ysg_order(st_unix, et_unix))
+        mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
+    else:
+        platform_list = ['阅文','悠书阁','掌读','掌中云','花生']
+        for platform_order_num in platform_order_num_list:
+            platform = str(platform_order_num['platform'])
+            num = int(platform_order_num['num'])
+            platform_list.remove(platform)
+
+            if platform == '阅文':
+                order_list = get_yuewen_order(st_unix, et_unix)
+                if len(order_list) != num:
+                    print('阅文数据实际订单和已经入库数据差异:', len(order_list) - num)
+                    mysql_insert_order(order_list)
+            elif platform == '悠书阁':
+                order_list = get_ysg_order(st_unix, et_unix)
+                if len(order_list) != num:
+                    print('悠书阁数据实际订单和已经入库数据差异:', len(order_list) - num)
+                    mysql_insert_order(order_list)
+            elif platform == '掌读':
+                order_list = get_zhangdu_order(st_unix, et_unix)
+                if len(order_list) != num:
+                    print('掌读数据实际订单和已经入库数据差异:', len(order_list) - num)
+                    mysql_insert_order(order_list)
+            elif platform == '掌中云':
+                order_list = get_zzy_order(st_unix, et_unix)
+                if len(order_list) != num:
+                    print('掌中云数据实际订单和已经入库数据差异:', len(order_list) - num)
+                    mysql_insert_order(order_list)
+            elif platform == '花生':
+                order_list = get_huasheng_order(st_unix, et_unix)
+                if len(order_list) != num:
+                    print('花生数据实际订单和已经入库数据差异:', len(order_list) - num)
+                    mysql_insert_order(order_list)
+            else:
+                print('发现未知平台数据!', platform_order_num)
+
+        for platform in platform_list:
+            if platform == '阅文':
+                print('阅文没有数据')
+                mysql_insert_order(get_yuewen_order(st_unix, et_unix))
+            elif platform == '悠书阁':
+                print('悠书阁没有数据')
+                mysql_insert_order(get_ysg_order(st_unix, et_unix))
+            elif platform == '掌读':
+                print('掌读没有数据')
+                mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
+            elif platform == '掌中云':
+                print('掌中云没有数据')
+                mysql_insert_order(get_zzy_order(st_unix, et_unix))
+            elif platform == '花生':
+                print('花生没有数据')
+                mysql_insert_order(get_huasheng_order(st_unix, et_unix))
+            else:
+                print('什么鬼平台:', platform)
 
     print('订单检查执行时间(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)