123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548 |
- import json
- import random
- import requests
- import time
- from datetime import datetime
- import pymysql
- import logging
- from concurrent.futures import ThreadPoolExecutor
- from model.DataBaseUtils import MysqlUtils
- logging.getLogger().setLevel(logging.WARNING)
- from model.ComUtils import *
- from model.DateUtils import DateUtils
- du = DateUtils()
- db = MysqlUtils()
- max_workers = 10
- count = []
- t = du.get_n_days(-10)
- def get_adcreatives(account_id,access_token,flag,adc_ids,dt): # 获取创意
- url = 'https://api.e.qq.com/v1.1/adcreatives/get'
- li =[]
- page = 1
- while True:
- parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': ('campaign_id', 'adcreative_id', 'adcreative_name', 'adcreative_elements', 'promoted_object_type', 'page_type',
- 'page_spec', 'link_page_spec', 'universal_link_url', 'promoted_object_id','site_set'),
- "filtering": [{
- "field": "adcreative_id",
- "operator": "IN",
- "values": adc_ids.split(',')
- }],
- "account_id": account_id,
- "page": page,
- "page_size": 100,
- "is_deleted": False
- }
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- h = requests.get(url, params=parameters)
- # print(h.json())
- if h.status_code == 200:
- r = h.json()
- # print(r)
- break
- else:
- time.sleep(1)
- print("爬取失败 等待1s")
- if 'data' in r.keys():
- is_video=0
- for i in r['data']['list']:
- # print(i)
- if flag=='MP':
- if len(i['adcreative_elements'])>0:
- d = i['adcreative_elements']
- title =d.get('title','')
- description = d.get('description', '')
- if 'image' in d.keys():
- image=d.get('image','')
- elif 'image_list' in d.keys():
- image =','.join(d.get('image_list'))
- elif 'short_video_struct' in d.keys():
- image = d['short_video_struct']["short_video1"]
- is_video=1
- else:
- image = ''
- else:
- title = image=''
- li.append((
- i['adcreative_id'],i['adcreative_name'],i['campaign_id'],image,title,
- i.get('promoted_object_type',''),i.get('page_type',''),
- i['page_spec'].get('page_id',''),i.get('promoted_object_id',''),
- '',description,'MP',account_id,dt,is_video
- ))
- else:
- if len(i['adcreative_elements'])>0:
- d =i['adcreative_elements']
- if 'image' in d.keys():
- image =d['image']
- elif 'element_story' in d.keys():
- image= ','.join([x['image'] for x in d['element_story']])
- else:
- image=''
- title =d.get('title','')
- description = d.get('description','')
- else:
- image=title=description=''
- li.append(
- (
- i['adcreative_id'], i['adcreative_name'], i['campaign_id'],image,title,
- i.get('promoted_object_type', ''), i.get('page_type', ''),
- i['page_spec'].get('page_id', ''), i.get('promoted_object_id', ''),
- ','.join(i['site_set']),description,'GDT',account_id,dt,is_video
- )
- )
- total_page = r['data']['page_info']['total_page']
- if total_page > page:
- page += 1
- else:
- break
- else:
- break
- if len(li)>0:
- print(f"{account_id}有创意:",len(li))
- sql='replace into adcreative_info values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) '
- db.quchen_text.executeMany(sql,li)
- def images_info_get(account_id,access_token,image_ids): # 获取图片信息
- fields = ('image_id','width','height','file_size','signature','preview_url')
- interface = 'images/get'
- url = 'https://api.e.qq.com/v1.3/' + interface
- page = 1
- li = []
- while True:
- common_parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields
- }
- parameters = {
- "account_id": account_id,
- "filtering":[{
- "field": "image_id",
- "operator": "IN",
- "values": image_ids.split(',')
- }],
- "page": page,
- "page_size": 100
- }
- parameters.update(common_parameters)
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- h = requests.get(url, params=parameters)
- # print(h.text)
- if h.status_code == 200:
- r = h.json()
- break
- else:
- time.sleep(1)
- print("请求出错 等待1s..")
- if 'data' in r.keys():
- li.extend(r['data']['list'])
- total_page = r['data']['page_info']['total_page']
- if total_page > page:
- page += 1
- else:
- break
- # print(li)
- data = []
- for i in li:
- data.append((i['image_id'],i['width'],i['height'],i['signature'],i['preview_url']))
- # print(data)
- print(f"{account_id} 有图片:", li.__len__())
- if li.__len__() > 0:
- sql="replace into image_info value (%s,%s,%s,%s,%s)"
- db.quchen_text.executeMany(sql, data)
- db.close()
- def ad_info():
- accounts = db.quchen_text.getData("""
- select account_id,access_token,name channel,'GDT' type from advertiser_qq where name !='' or name is not null
- union
- select account_id,access_token,name channel,'MP' type from advertiser_vx where name !='' or name is not null
-
- """)
- total_data =[]
- executor = ThreadPoolExecutor(max_workers=max_workers)
- for i in accounts:
- # print(i)
- account_id =i[0]
- access_token = i[1]
- type = i[3]
- executor.submit(get_ad_info,account_id, access_token,type,total_data)
- executor.shutdown()
- print(len(total_data))
- if len(total_data)>0:
- sql="replace into ad_info values(%s,%s,%s,%s,%s,%s,%s) "
- db.quchen_text.executeMany(sql, total_data)
- """获取广告基础信息"""
- def get_ad_info(account_id, access_token, flag,ad_ids,dt):
- path = 'ads/get'
- fields = ('ad_id', 'ad_name', 'adcreative_id', 'adgroup_id', 'campaign_id')
- url = 'https://api.e.qq.com/v1.3/' + path
- li = []
- page = 1
- while True:
- parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields,
- "filtering": [{
- "field": "ad_id",
- "operator": "IN",
- "values":
- ad_ids.split(',')
- }],
- "account_id": account_id,
- "page": page,
- "page_size": 100,
- "is_deleted": False
- }
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- r = requests.get(url, params=parameters).json()
- code = r['code']
- if code == 11017:
- time.sleep(61)
- else:
- break
- # print(r)
- total_page = r['data']['page_info']['total_page']
- if page > total_page:
- break
- else:
- page += 1
- if r.get("data"):
- for i in r['data']['list']:
- li.append((str(i['ad_id']), i['ad_name'], i['adcreative_id'], i['campaign_id'], i['adgroup_id'],
- account_id, flag, dt))
- if li.__len__()>0:
- print(f"{account_id}有广告:",li.__len__())
- sql = "replace into ad_info values(%s,%s,%s,%s,%s,%s,%s,%s) "
- db.quchen_text.executeMany(sql, li)
- db.close()
- def get_ad_cost_day(account_id,access_token,flag,st,et):
- if flag == 'MP':
- ad_cost_day_mp(account_id,access_token, st, et)
- else:
- ad_cost_day_gdt(account_id,access_token, st, et)
- def ad_cost_day_gdt(account_id,access_token,st,et):
- url = 'https://api.e.qq.com/v1.3/daily_reports/get'
- fields = ('date', 'ad_id','adgroup_id', 'cost', 'view_count', 'ctr', 'follow_count','web_order_count','order_amount')
- li = []
- page = 1
- while True:
- parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields,
- "account_id": account_id,
- "group_by" : ['ad_id','date'],
- "level": 'REPORT_LEVEL_AD',
- "page": page,
- "page_size": 1000,
- "date_range": {
- "start_date": st,
- "end_date": et
- }
- }
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- r = requests.get(url, params=parameters).json()
- # print(r)
- code =r['code']
- if code==11017:
- time.sleep(61)
- else:
- break
- if r.get("data"):
- for i in r['data']['list']:
- if i['cost']>0:
- li.append(
- (
- i['date'], i['ad_id'],i['adgroup_id'], i['cost']/100, i['view_count'], i['ctr']*i['view_count'],
- i['follow_count'],i['web_order_count'],i['order_amount']/100,account_id,'GDT'
- )
- )
- total_page = r['data']['page_info']['total_page']
- if page >= total_page:
- break
- else:
- page += 1
- # print(li)
- if len(li) > 0:
- print(f"{account_id} have ad cost :{len(li)} ")
- db.quchen_text.executeMany('replace into ad_cost_day values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', li)
- db.close()
- def ad_cost_day_mp(account_id,access_token,st,et):
- url = 'https://api.e.qq.com/v1.3/daily_reports/get'
- fields = ('date', 'ad_id','adgroup_id', 'cost', 'view_count', 'valid_click_count', 'official_account_follow_count','order_count','order_amount')
- li = []
- page = 1
- while True:
- parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields,
- "account_id": account_id,
- "level": 'REPORT_LEVEL_AD_WECHAT',
- "page": page,
- "page_size": 1000,
- "date_range": {
- "start_date": st,
- "end_date": et
- }
- }
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- r = requests.get(url, params=parameters).json()
- # print(r['data']['list'])
- # import pandas as pd
- # print(pd.DataFrame(r['data']['list']))
- code = r['code']
- if code == 11017:
- time.sleep(61)
- else:
- break
- if r.get("data"):
- for i in r['data']['list']:
- if i['cost']>0:
- li.append(
- (
- i['date'],i['ad_id'],i['adgroup_id'],i['cost']/100,i['view_count'],i['valid_click_count'],
- i['official_account_follow_count'],i['order_count'],i['order_amount']/100,account_id,'MP'
- )
- )
- total_page = r['data']['page_info']['total_page']
- if page >=total_page:
- break
- else:
- page += 1
- # print(li)
- # exit()
- if len(li) > 0:
- print(f"{account_id} have ad cost :{len(li)} ")
- db.quchen_text.executeMany('replace into ad_cost_day values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', li)
- db.close()
- def daily_reports_get(access_token, account_id, level, start_date, end_date, fields): # 获取wx投放计划日报数据
- interface = 'daily_reports/get'
- url = 'https://api.e.qq.com/v1.3/' + interface
- common_parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields
- }
- parameters = {
- "account_id": account_id,
- "level": level,
- "date_range":
- {
- "start_date": start_date,
- "end_date": end_date
- },
- "page": 1,
- "page_size": 1000,
- "fields":
- [
- ]
- }
- parameters.update(common_parameters)
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- while True:
- r = requests.get(url, params=parameters)
- if r.status_code == 200:
- break
- else:
- time.sleep(1)
- print("请求出错 等待1s..")
- return r.json()
- def daily_qq_reports_get(access_token, account_id, compaign_id, level, start_date, end_date, fields): # 获取gdt投放计划日报数据
- interface = 'daily_reports/get'
- url = 'https://api.e.qq.com/v1.1/' + interface
- common_parameters = {
- 'access_token': access_token,
- 'timestamp': int(time.time()),
- 'nonce': str(time.time()) + str(random.randint(0, 999999)),
- 'fields': fields
- }
- parameters = {
- "account_id": account_id,
- "filtering":
- [
- {
- "field": "campaign_id",
- "operator": "EQUALS",
- "values":
- [
- compaign_id
- ]
- }
- ],
- "level": level,
- "date_range":
- {
- "start_date": start_date,
- "end_date": end_date
- },
- "page": 1,
- "page_size": 1000,
- "fields":
- [
- ]
- }
- parameters.update(common_parameters)
- for k in parameters:
- if type(parameters[k]) is not str:
- parameters[k] = json.dumps(parameters[k])
- r = requests.get(url, params=parameters)
- return r.json()
- def mysql_insert_adcreative(data):
- db = pymysql.connect('rm-bp1c9cj79872tx3aaro.mysql.rds.aliyuncs.com', 'superc', 'Cc719199895', 'quchen_text')
- cursor = db.cursor()
- sql = 'replace into adcreative (campaign_id,adcreative_id,adcreative_name,image_id,title,promoted_object_type,page_type,page_id,link_page_id,promoted_object_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
- try:
- cursor.executemany(sql, data)
- db.commit()
- print('insert [adcreative] ', len(data))
- except:
- db.rollback()
- print('insert [adcreative] defeat')
- if __name__ == '__main__':
- account_id = 19206910
- access_token = '89079ccc8db047b078a0108e36a7e276'
- #
- account_id2 = 14709511
- access_token2 = 'e87f7b6f860eaeef086ddcc9c3614678'
- get_ad_cost_day(account_id,access_token,'MP','2021-04-09','2021-04-09')
- # get_adcreatives(account_id,access_token,'MP','3187867673','2021-04-09')
|