Explorar el Código

测试代码修改:
掌中云订单爬取代码优化:原本查询一页数据需要查询二次,既浪费性能,又消耗接口调用次数
订单爬取bug修复:从前永远都无法爬取整点的数据,包括0点0分0秒

zhengwangeng hace 4 años
padre
commit
e487f84213
Se han modificado 2 ficheros con 201 adiciones y 159 borrados
  1. 165 152
      dgp/tests/check_order_new.py
  2. 36 7
      dgp/tests/demo/test_read_ini.py

+ 165 - 152
dgp/tests/check_order_new.py

@@ -52,10 +52,10 @@ def md5value(s):
 ##《1》阅文
 def get_yuewen_order(st, et):
     start_exec_seconds = date_util.getCurrentSecondTime()
-    total_order = ()
+    total_order_list = ()
     account_list = platform_util.get_yuewen_account_list()
 
-    executor = ProcessPoolExecutor(max_workers=4)
+    executor = ProcessPoolExecutor(max_workers=5)
 
     futures = []
     for account in account_list:
@@ -64,11 +64,12 @@ def get_yuewen_order(st, et):
     executor.shutdown(True)
 
     for future in futures:
-        if len(future.result()) > 0:
-            total_order = future.result() + total_order
+        order_list = future.result()
+        if len(order_list) > 0:
+            total_order_list = order_list + total_order_list
 
-    print('阅文订单数量:', len(total_order), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
-    return total_order
+    print('阅文订单数量:', len(total_order_list), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
+    return total_order_list
 
 def get_yuewen_order_task(st, et, account):
     order_list = ()
@@ -226,10 +227,10 @@ def get_yuewen_order_task(st, et, account):
 ##《2》掌读
 def get_zhangdu_order(st, et):
     start_exec_seconds = date_util.getCurrentSecondTime()
-    total_order = ()
+    total_order_list = ()
     account_list = platform_util.get_zhangdu_account_list()
 
-    executor = ProcessPoolExecutor(max_workers=4)
+    executor = ProcessPoolExecutor(max_workers=5)
 
     futures = []
     for account in account_list:
@@ -238,11 +239,12 @@ def get_zhangdu_order(st, et):
     executor.shutdown(True)
 
     for future in futures:
-        if len(future.result()) > 0:
-            total_order = future.result() + total_order
+        order_list = future.result()
+        if len(order_list) > 0:
+            total_order_list = order_list + total_order_list
 
-    print('掌读订单数量:', len(total_order), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
-    return total_order
+    print('掌读订单数量:', len(total_order_list), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
+    return total_order_list
 
 def get_zhangdu_order_task(st, et, account):
     order_list = ()
@@ -313,10 +315,10 @@ def get_zhangdu_order_task(st, et, account):
 ##《3》花生
 def get_huasheng_order(st, et):
     start_exec_seconds = date_util.getCurrentSecondTime()
-    total_order = ()
+    total_order_list = ()
     account_list = platform_util.get_huasheng_account_list()
 
-    executor = ProcessPoolExecutor(max_workers=4)
+    executor = ProcessPoolExecutor(max_workers=5)
 
     futures = []
     for account in account_list:
@@ -331,19 +333,25 @@ def get_huasheng_order(st, et):
             'timestamp': timestamp,
             'sign': sign
         }
-        merchant_list = requests.post(url, params).json()
+        response_result_json = requests.post(url, params).json()
 
-        for merchant in merchant_list['data']:
+        if 'data' not in response_result_json.keys():
+            print('花生账号【{apiKey}】本次请求数据为空,响应报文【{result}】'.format(apiKey=apiKey, result=response_result_json))
+            continue
+
+        for merchant in response_result_json['data']:
             future = executor.submit(get_huasheng_order_task, st, et, account, merchant)
             futures.append(future)
+
     executor.shutdown(True)
 
     for future in futures:
-        if len(future.result()) > 0:
-            total_order = future.result() + total_order
+        order_list = future.result()
+        if len(order_list) > 0:
+            total_order_list = order_list + total_order_list
 
-    print('花生订单数量:', len(total_order), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
-    return total_order
+    print('花生订单数量:', len(total_order_list), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
+    return total_order_list
 
 def get_huasheng_order_task(st, et, account, merchant):
     order_list = ()
@@ -417,10 +425,11 @@ def get_huasheng_order_task(st, et, account, merchant):
 ##《4》掌中云
 def get_zzy_order(st, et):
     start_exec_seconds = date_util.getCurrentSecondTime()
-    total_order = ()
+    total_order_list = ()
     account_list = al.zzy_account_list
     # account_list = platform_util.get_zhangzhongyun_account_list()
     # account_list = [['1108701f1d6','0f9c0f8429d1a16a8a78c2306e7a4db3','清勇7月']]
+    # account_list = [['1109295d56c','9bb955186597882ac473e86ba4576158','趣程20期']]
 
     executor = ProcessPoolExecutor(max_workers=5)
 
@@ -429,101 +438,105 @@ def get_zzy_order(st, et):
         url = 'https://openapi.818tu.com/partners/channel/channels/list?'
         key = account[0]
         secert = account[1]
-        my_sign = md5value(secert + 'key=' + key)
-        parameter = 'key=' + key + '&sign=' + my_sign
-        channel_list = requests.get(url + parameter)  # 获取子渠道列表
-
-        if 'data' in channel_list.json().keys():
-            items = channel_list.json()['data']['items']
-        else:
-            print('掌中云本次请求数据为空', account)
-            items = []
+        sign = md5value(secert + 'key=' + key)
+        parameter = 'key=' + key + '&sign=' + sign
+        response_result_json = requests.get(url + parameter).json()# 获取子渠道列表
+
+        if 'data' not in response_result_json.keys():
+            print('掌中云账号【{key}】本次请求数据为空,响应报文【{result}】'.format(key=key, result=response_result_json))
             continue
 
-        for item in items:
+        items = response_result_json['data']['items']
+        for channel in items:
             # 获取channel_id 后逐个拉取历史orders
-            future = executor.submit(get_zzy_order_task, st, et, account, item)
+            future = executor.submit(get_zzy_order_task, st, et, account, channel)
             futures.append(future)
 
     executor.shutdown(True)
 
     for future in futures:
-        result = future.result()
-        if len(result) > 0:
-            total_order = result + total_order
+        order_list = future.result()
+        if len(order_list) > 0:
+            total_order_list = order_list + total_order_list
 
-    print('掌中云订单数量:', len(total_order), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
-    return total_order
+    print('掌中云订单数量:', len(total_order_list), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
+    return total_order_list
 
-def get_zzy_order_task(st, et, account, item):
+def get_zzy_order_task(st, et, account, channel):
     # 掌中云的时间格式比较特殊,转换下
     st = platform_util.get_zhangzhongyun_format_time(st)
     et = platform_util.get_zhangzhongyun_format_time(et)
 
     order_list = ()
 
-    my_key = account[0]
+    key = account[0]
     secert = account[1]
     stage = account[2]
 
-    channel_id = item['id']
-    channel = item['nickname']
+    order_url = 'https://openapi.818tu.com/partners/channel/orders/list?'
+    channel_id = channel['id']
+    channel_name = channel['nickname']
     status = str(1)
+    page = str(1)
     per_page = str(1000)
-    limit_time = et
     get_time = st
-    lt = parse.urlencode({'created_at[lt]': limit_time})
-    gt = parse.urlencode({'created_at[gt]': get_time})
-    url_1 = 'https://openapi.818tu.com/partners/channel/orders/list?'
-    my_sign_1 = md5value(secert + 'channel_id=' + str(
-        channel_id) + '&created_at[gt]=' + get_time + '&created_at[lt]=' + limit_time + '&key=' + my_key + '&per_page=' + per_page + '&status=' + status)
-    parameter_1 = 'channel_id=' + str(
-        channel_id) + '&' + gt + '&' + lt + '&per_page=' + per_page + '&status=' + status + '&key=' + my_key + '&sign=' + my_sign_1
-    orders = requests.get(url_1 + parameter_1)
-    t = orders.json()['data']['count'] // int(per_page) + 1
-    for page in range(1, t + 1):
-        my_sign_2 = md5value(secert + 'channel_id=' + str(
-            channel_id) + '&created_at[gt]=' + get_time + '&created_at[lt]=' + limit_time + '&key=' + my_key + '&page=' + str(
-            page) + '&per_page=' + per_page + '&status=' + status)
-        parameter_2 = 'channel_id=' + str(channel_id) + '&' + gt + '&' + lt + '&page=' + str(
-            page) + '&per_page=' + per_page + '&status=' + status + '&key=' + my_key + '&sign=' + my_sign_2
-        orders_1 = requests.get(url_1 + parameter_2)
-
-        # print(orders_1.json())
-        b = orders_1.json()['data']['items']
-
-        for a in b:
-            c = {}
-            c['user_id'] = str(a['member']['openid'])
-            c['channel'] = channel
-            c['reg_time'] = a['member']['created_at']
-            c['channel_id'] = channel_id
-            c['amount'] = round(a['price'] / 100, 2)
-            c['order_id'] = str(a['id'])
-            c['order_time'] = a['created_at']
-            c['platform'] = '掌中云'
-            c['stage'] = stage
-            dtime = datetime.datetime.strptime(a['created_at'][0:10], "%Y-%m-%d")
-            c['date'] = ((int(time.mktime(dtime.timetuple())) + 8 * 3600) // 86400) * 86400 - 8 * 3600
-
-            if str(a['from_novel_id']) != 'None':
-                c['from_novel'] = a['from_novel']['title']
+    limit_time = et
+    gte = parse.urlencode({'created_at[gte]': get_time}) #gte就是ge 大于等于开始时间
+    lt = parse.urlencode({'created_at[lt]': limit_time}) #小于 结束时间
+
+    while True:
+        sign = md5value(secert + 'channel_id=' + str(channel_id) + '&created_at[gte]=' + get_time + '&created_at[lt]=' + limit_time + '&key=' + key + '&page=' + str(page) + '&per_page=' + per_page + '&status=' + status)
+        parameter = 'channel_id=' + str(channel_id) + '&' + gte + '&' + lt + '&page=' + str(page) + '&per_page=' + per_page + '&status=' + status + '&key=' + key + '&sign=' + sign
+
+        response_result_json = requests.get(order_url + parameter).json()
+
+        if 'data' not in response_result_json.keys():
+            print('掌中云账号【{key}】, 渠道【{channel_id}:{channel_name}】本次请求数据为空,响应报文【{result}】'
+                  .format(key=key, channel_id=channel_id, channel_name=channel_name, result=response_result_json))
+            break
+
+        total_count = response_result_json['data']['count'] #总数量
+        order_item_list = response_result_json['data']['items'] #订单列表
+
+        for order_item in order_item_list:
+            order = {}
+            order['user_id'] = str(order_item['member']['openid'])
+            order['channel'] = channel_name
+            order['reg_time'] = order_item['member']['created_at']
+            order['channel_id'] = channel_id
+            order['amount'] = round(order_item['price'] / 100, 2)
+            order['order_id'] = str(order_item['id'])
+            order['order_time'] = order_item['created_at']
+            order['platform'] = '掌中云'
+            order['stage'] = stage
+            dtime = datetime.datetime.strptime(order_item['created_at'][0:10], "%Y-%m-%d")
+            order['date'] = ((int(time.mktime(dtime.timetuple())) + 8 * 3600) // 86400) * 86400 - 8 * 3600
+
+            if str(order_item['from_novel_id']) != 'None':
+                order['from_novel'] = order_item['from_novel']['title']
             else:
-                c['from_novel'] = 'None'
-            x = sorted(c.items(), key=lambda item: item[0])
+                order['from_novel'] = 'None'
+            x = sorted(order.items(), key=lambda item: item[0])
             x = dict(x)
             x = tuple(x.values())
             order_list = order_list + ((x),)
 
+        if int(page) >= math.ceil(total_count / int(per_page)):
+            break
+
+        print('掌中云账号【{key}】, 渠道【{channel_id}:{channel_name}】当前页【{page}】,本次查询订单数【{total_count}】,即将查询下一页'
+              .format(key=key, channel_id=channel_id, channel_name=channel_name, page=page, total_count=total_count))
+        page = int(page) + 1
+
     return order_list
 
 ##《5》 悠书阁
 def get_ysg_order(st, et):
     start_exec_seconds = date_util.getCurrentSecondTime()
-    total_order = ()
+    total_order_list = ()
     account_list = platform_util.get_youshuge_account_list()
 
-    executor = ProcessPoolExecutor(max_workers=4)
+    executor = ProcessPoolExecutor(max_workers=5)
 
     futures = []
     for account in account_list:
@@ -532,11 +545,12 @@ def get_ysg_order(st, et):
     executor.shutdown(True)
 
     for future in futures:
-        if len(future.result()) > 0:
-            total_order = future.result() + total_order
+        order_list = future.result()
+        if len(order_list) > 0:
+            total_order_list = order_list + total_order_list
 
-    print('悠书阁订单数量:', len(total_order), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
-    return total_order
+    print('悠书阁订单数量:', len(total_order_list), '执行时长(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)
+    return total_order_list
 
 
 def get_ysg_order_task(st, et, account):
@@ -571,6 +585,7 @@ def get_ysg_order_task(st, et, account):
     respone = requests.post(url, data)
     if respone.status_code == 400:
         print('respone', respone)
+
     result_json = respone.json()
     first_page_order = build_ysg_order_data(channel, channel_id, result_json, stage)
     order_list = order_list + first_page_order
@@ -666,7 +681,6 @@ def mysql_select_platform_order_count(date):
 
 def start_all_job():
     start_exec_seconds = date_util.getCurrentSecondTime()
-    platform_order_num_list = mysql_select_platform_order_count(date_util.getYesterdayStartTime())
 
     st_unix = date_util.getYesterdayStartTime()
     et_unix = date_util.getTodayStartTime()
@@ -674,71 +688,70 @@ def start_all_job():
     print('查询开始时间:', st_unix, date_util.getSecondsToDatetime(st_unix))
     print('查询结束时间:', et_unix, date_util.getSecondsToDatetime(et_unix))
 
-    # order_list = get_yuewen_order(st_unix, et_unix)
-    # mysql_insert_order(order_list)
-
-    if len(platform_order_num_list) != 0:
-        print('本地库中没有任何数据,现在全平台补全')
-
-        # get_zzy_order(st_unix, et_unix)
-        mysql_insert_order(get_zzy_order(st_unix, et_unix))
-        # mysql_insert_order(get_yuewen_order(st_unix, et_unix))
-        # mysql_insert_order(get_huasheng_order(st_unix, et_unix))
-        # mysql_insert_order(get_ysg_order(st_unix, et_unix))
-        # mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
-    else:
-        platform_list = ['阅文','悠书阁','掌读','掌中云','花生']
-        for platform_order_num in platform_order_num_list:
-            platform = str(platform_order_num['platform'])
-            num = int(platform_order_num['num'])
-            platform_list.remove(platform)
-
-            if platform == '阅文':
-                order_list = get_yuewen_order(st_unix, et_unix)
-                if len(order_list) != num:
-                    print('阅文数据实际订单和已经入库数据差异:', len(order_list) - num)
-                    mysql_insert_order(order_list)
-            elif platform == '悠书阁':
-                order_list = get_ysg_order(st_unix, et_unix)
-                if len(order_list) != num:
-                    print('悠书阁数据实际订单和已经入库数据差异:', len(order_list) - num)
-                    mysql_insert_order(order_list)
-            elif platform == '掌读':
-                order_list = get_zhangdu_order(st_unix, et_unix)
-                if len(order_list) != num:
-                    print('掌读数据实际订单和已经入库数据差异:', len(order_list) - num)
-                    mysql_insert_order(order_list)
-            elif platform == '掌中云':
-                order_list = get_zzy_order(st_unix, et_unix)
-                if len(order_list) != num:
-                    print('掌中云数据实际订单和已经入库数据差异:', len(order_list) - num)
-                    mysql_insert_order(order_list)
-            elif platform == '花生':
-                order_list = get_huasheng_order(st_unix, et_unix)
-                if len(order_list) != num:
-                    print('花生数据实际订单和已经入库数据差异:', len(order_list) - num)
-                    mysql_insert_order(order_list)
-            else:
-                print('发现未知平台数据!', platform_order_num)
-
-        for platform in platform_list:
-            if platform == '阅文':
-                print('阅文没有数据')
-                mysql_insert_order(get_yuewen_order(st_unix, et_unix))
-            elif platform == '悠书阁':
-                print('悠书阁没有数据')
-                mysql_insert_order(get_ysg_order(st_unix, et_unix))
-            elif platform == '掌读':
-                print('掌读没有数据')
-                mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
-            elif platform == '掌中云':
-                print('掌中云没有数据')
-                mysql_insert_order(get_zzy_order(st_unix, et_unix))
-            elif platform == '花生':
-                print('花生没有数据')
-                mysql_insert_order(get_huasheng_order(st_unix, et_unix))
-            else:
-                print('什么鬼平台:', platform)
+    order_list = get_zzy_order(st_unix, et_unix)
+    mysql_insert_order(order_list)
+
+    # platform_order_num_list = mysql_select_platform_order_count(date_util.getYesterdayStartTime())
+    # if len(platform_order_num_list) == 0:
+    #     print('本地库中没有任何数据,现在全平台补全')
+    #     mysql_insert_order(get_zzy_order(st_unix, et_unix))
+    #     mysql_insert_order(get_yuewen_order(st_unix, et_unix))
+    #     mysql_insert_order(get_huasheng_order(st_unix, et_unix))
+    #     mysql_insert_order(get_ysg_order(st_unix, et_unix))
+    #     mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
+    # else:
+    #     platform_list = ['阅文','悠书阁','掌读','掌中云','花生']
+    #     for platform_order_num in platform_order_num_list:
+    #         platform = str(platform_order_num['platform'])
+    #         num = int(platform_order_num['num'])
+    #         platform_list.remove(platform)
+    #
+    #         if platform == '阅文':
+    #             order_list = get_yuewen_order(st_unix, et_unix)
+    #             if len(order_list) != num:
+    #                 print('阅文数据实际订单和已经入库数据差异:', len(order_list) - num)
+    #                 mysql_insert_order(order_list)
+    #         elif platform == '悠书阁':
+    #             order_list = get_ysg_order(st_unix, et_unix)
+    #             if len(order_list) != num:
+    #                 print('悠书阁数据实际订单和已经入库数据差异:', len(order_list) - num)
+    #                 mysql_insert_order(order_list)
+    #         elif platform == '掌读':
+    #             order_list = get_zhangdu_order(st_unix, et_unix)
+    #             if len(order_list) != num:
+    #                 print('掌读数据实际订单和已经入库数据差异:', len(order_list) - num)
+    #                 mysql_insert_order(order_list)
+    #         elif platform == '掌中云':
+    #             order_list = get_zzy_order(st_unix, et_unix)
+    #             if len(order_list) != num:
+    #                 print('掌中云数据实际订单和已经入库数据差异:', len(order_list) - num)
+    #                 mysql_insert_order(order_list)
+    #         elif platform == '花生':
+    #             order_list = get_huasheng_order(st_unix, et_unix)
+    #             if len(order_list) != num:
+    #                 print('花生数据实际订单和已经入库数据差异:', len(order_list) - num)
+    #                 mysql_insert_order(order_list)
+    #         else:
+    #             print('发现未知平台数据!', platform_order_num)
+    #
+    #     for platform in platform_list:
+    #         if platform == '阅文':
+    #             print('阅文没有数据')
+    #             mysql_insert_order(get_yuewen_order(st_unix, et_unix))
+    #         elif platform == '悠书阁':
+    #             print('悠书阁没有数据')
+    #             mysql_insert_order(get_ysg_order(st_unix, et_unix))
+    #         elif platform == '掌读':
+    #             print('掌读没有数据')
+    #             mysql_insert_order(get_zhangdu_order(st_unix, et_unix))
+    #         elif platform == '掌中云':
+    #             print('掌中云没有数据')
+    #             mysql_insert_order(get_zzy_order(st_unix, et_unix))
+    #         elif platform == '花生':
+    #             print('花生没有数据')
+    #             mysql_insert_order(get_huasheng_order(st_unix, et_unix))
+    #         else:
+    #             print('什么鬼平台:', platform)
 
 
     print('订单检查执行时间(秒):', date_util.getCurrentSecondTime() - start_exec_seconds)

+ 36 - 7
dgp/tests/demo/test_read_ini.py

@@ -25,15 +25,44 @@ __title__ = '测试读取ini文件'
 """
 
 import configparser
+import math
 import os
 
 # 获取当前文件所在目录的上一级目录
-parent_dir_path = os.path.dirname(os.path.abspath('../..'))
-config_path = parent_dir_path + '/conf/account_list_config.ini'
+# parent_dir_path = os.path.dirname(os.path.abspath('../..'))
+# config_path = parent_dir_path + '/conf/account_list_config.ini'
+#
+# # 读取数据库配置信息
+# config = configparser.ConfigParser(allow_no_value=True) #注意参数不能省
+# config.read(config_path, encoding='UTF-8')
+#
+# print(config.items("IPS"))
+# print(type(config.items("IPS")))
 
-# 读取数据库配置信息
-config = configparser.ConfigParser(allow_no_value=True) #注意参数不能省
-config.read(config_path, encoding='UTF-8')
+# print(10 // int(1000) + 1)
+# print(1000 // int(1000) + 1)
+# print(1001 // int(1000) + 1)
 
-print(config.items("IPS"))
-print(type(config.items("IPS")))
+per_page = 1000
+total_count = 40
+page = 1
+# print(math.ceil(0 / int(per_page)))
+print(math.ceil(40 / int(per_page)))
+print(math.ceil(1000 / int(per_page)))
+parameter = int(page) > math.ceil(total_count / int(per_page))
+print(parameter)
+
+print(math.ceil(0 / 100))
+print(math.ceil(100 / 100))
+print(math.ceil(101 / 100))
+
+for i in range(5):
+    if 1 != 1:
+        items = [0,1]
+    else:
+        print('掌中云本次请求数据为空')
+        items = []
+        continue
+    print('123345')
+    for item in items:
+        print(item)