cost_util.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. import json
  2. import random
  3. import requests
  4. import time
  5. from datetime import datetime
  6. import pymysql
  7. import logging
  8. from concurrent.futures import ThreadPoolExecutor
  9. from model.DataBaseUtils import MysqlUtils
  10. logging.getLogger().setLevel(logging.WARNING)
  11. from model.ComUtils import *
  12. from model.DateUtils import DateUtils
  13. du = DateUtils()
  14. db = MysqlUtils()
  15. max_workers = 10
  16. count = []
  17. t = du.get_n_days(-10)
  18. def get_adcreatives(account_id,access_token,flag,adc_ids,dt): # 获取创意
  19. url = 'https://api.e.qq.com/v1.1/adcreatives/get'
  20. li =[]
  21. page = 1
  22. while True:
  23. parameters = {
  24. 'access_token': access_token,
  25. 'timestamp': int(time.time()),
  26. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  27. 'fields': ('campaign_id', 'adcreative_id', 'adcreative_name', 'adcreative_elements', 'promoted_object_type', 'page_type',
  28. 'page_spec', 'link_page_spec', 'universal_link_url', 'promoted_object_id','site_set'),
  29. "filtering": [{
  30. "field": "adcreative_id",
  31. "operator": "IN",
  32. "values": adc_ids.split(',')
  33. }],
  34. "account_id": account_id,
  35. "page": page,
  36. "page_size": 100,
  37. "is_deleted": False
  38. }
  39. for k in parameters:
  40. if type(parameters[k]) is not str:
  41. parameters[k] = json.dumps(parameters[k])
  42. while True:
  43. h = requests.get(url, params=parameters)
  44. # print(h.json())
  45. if h.status_code == 200:
  46. r = h.json()
  47. # print(r)
  48. break
  49. else:
  50. time.sleep(1)
  51. print("爬取失败 等待1s")
  52. if 'data' in r.keys():
  53. is_video=0
  54. for i in r['data']['list']:
  55. # print(i)
  56. if flag=='MP':
  57. if len(i['adcreative_elements'])>0:
  58. d = i['adcreative_elements']
  59. title =d.get('title','')
  60. description = d.get('description', '')
  61. if 'image' in d.keys():
  62. image=d.get('image','')
  63. elif 'image_list' in d.keys():
  64. image =','.join(d.get('image_list'))
  65. elif 'video' in d.keys():
  66. image = d['video']
  67. is_video=1
  68. else:
  69. image = ''
  70. else:
  71. title = image=''
  72. li.append((
  73. i['adcreative_id'],i['adcreative_name'],i['campaign_id'],image,title,
  74. i.get('promoted_object_type',''),i.get('page_type',''),
  75. i['page_spec'].get('page_id',''),i.get('promoted_object_id',''),
  76. '',description,'MP',account_id,dt,is_video
  77. ))
  78. else:
  79. if len(i['adcreative_elements'])>0:
  80. d =i['adcreative_elements']
  81. if 'image' in d.keys():
  82. image =d['image']
  83. elif 'element_story' in d.keys():
  84. image= ','.join([x['image'] for x in d['element_story']])
  85. else:
  86. image=''
  87. title =d.get('title','')
  88. description = d.get('description','')
  89. else:
  90. image=title=description=''
  91. li.append(
  92. (
  93. i['adcreative_id'], i['adcreative_name'], i['campaign_id'],image,title,
  94. i.get('promoted_object_type', ''), i.get('page_type', ''),
  95. i['page_spec'].get('page_id', ''), i.get('promoted_object_id', ''),
  96. ','.join(i['site_set']),description,'GDT',account_id,dt,is_video
  97. )
  98. )
  99. total_page = r['data']['page_info']['total_page']
  100. if total_page > page:
  101. page += 1
  102. else:
  103. break
  104. else:
  105. break
  106. if len(li)>0:
  107. print(f"{account_id}有创意:",len(li))
  108. sql='replace into adcreative_info values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) '
  109. db.quchen_text.executeMany(sql,li)
  110. def images_info_get(account_id,access_token,image_ids): # 获取图片信息
  111. fields = ('image_id','width','height','file_size','signature','preview_url')
  112. interface = 'images/get'
  113. url = 'https://api.e.qq.com/v1.3/' + interface
  114. page = 1
  115. li = []
  116. while True:
  117. common_parameters = {
  118. 'access_token': access_token,
  119. 'timestamp': int(time.time()),
  120. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  121. 'fields': fields
  122. }
  123. parameters = {
  124. "account_id": account_id,
  125. "filtering":[{
  126. "field": "image_id",
  127. "operator": "IN",
  128. "values": image_ids.split(',')
  129. }],
  130. "page": page,
  131. "page_size": 100
  132. }
  133. parameters.update(common_parameters)
  134. for k in parameters:
  135. if type(parameters[k]) is not str:
  136. parameters[k] = json.dumps(parameters[k])
  137. while True:
  138. h = requests.get(url, params=parameters)
  139. # print(h.text)
  140. if h.status_code == 200:
  141. r = h.json()
  142. break
  143. else:
  144. time.sleep(1)
  145. print("请求出错 等待1s..")
  146. if 'data' in r.keys():
  147. li.extend(r['data']['list'])
  148. total_page = r['data']['page_info']['total_page']
  149. if total_page > page:
  150. page += 1
  151. else:
  152. break
  153. # print(li)
  154. data = []
  155. for i in li:
  156. data.append((i['image_id'],i['width'],i['height'],i['signature'],i['preview_url']))
  157. # print(data)
  158. print(f"{account_id} 有图片:", li.__len__())
  159. if li.__len__() > 0:
  160. sql="replace into image_info value (%s,%s,%s,%s,%s)"
  161. db.quchen_text.executeMany(sql, data)
  162. db.close()
  163. def video_info_get(account_id,access_token,image_ids): # 获取视频信息
  164. fields = ('video_id','width','height','file_size','signature','preview_url')
  165. interface = 'videos/get'
  166. url = 'https://api.e.qq.com/v1.3/' + interface
  167. page = 1
  168. li = []
  169. while True:
  170. common_parameters = {
  171. 'access_token': access_token,
  172. 'timestamp': int(time.time()),
  173. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  174. 'fields': fields
  175. }
  176. parameters = {
  177. "account_id": account_id,
  178. "filtering":[{
  179. "field": "media_id",
  180. "operator": "IN",
  181. "values": image_ids.split(',')
  182. }],
  183. "page": page,
  184. "page_size": 100
  185. }
  186. parameters.update(common_parameters)
  187. for k in parameters:
  188. if type(parameters[k]) is not str:
  189. parameters[k] = json.dumps(parameters[k])
  190. while True:
  191. h = requests.get(url, params=parameters)
  192. # print(h.text)
  193. if h.status_code == 200:
  194. r = h.json()
  195. break
  196. else:
  197. time.sleep(1)
  198. print("请求出错 等待1s..")
  199. if 'data' in r.keys():
  200. li.extend(r['data']['list'])
  201. total_page = r['data']['page_info']['total_page']
  202. if total_page > page:
  203. page += 1
  204. else:
  205. break
  206. # print(li)
  207. data = []
  208. for i in li:
  209. data.append((i['video_id'],i['width'],i['height'],i['signature'],i['preview_url']))
  210. # print(data)
  211. print(f"{account_id} 有视频:", li.__len__())
  212. if li.__len__() > 0:
  213. sql="replace into video_info value (%s,%s,%s,%s,%s)"
  214. db.quchen_text.executeMany(sql, data)
  215. db.close()
  216. def ad_info():
  217. accounts = db.quchen_text.getData("""
  218. select account_id,access_token,name channel,'GDT' type from advertiser_qq where name !='' or name is not null
  219. union
  220. select account_id,access_token,name channel,'MP' type from advertiser_vx where name !='' or name is not null
  221. """)
  222. total_data =[]
  223. executor = ThreadPoolExecutor(max_workers=max_workers)
  224. for i in accounts:
  225. # print(i)
  226. account_id =i[0]
  227. access_token = i[1]
  228. type = i[3]
  229. executor.submit(get_ad_info,account_id, access_token,type,total_data)
  230. executor.shutdown()
  231. print(len(total_data))
  232. if len(total_data)>0:
  233. sql="replace into ad_info values(%s,%s,%s,%s,%s,%s,%s) "
  234. db.quchen_text.executeMany(sql, total_data)
  235. """获取广告基础信息"""
  236. def get_ad_info(account_id, access_token, flag,ad_ids,dt):
  237. path = 'ads/get'
  238. fields = ('ad_id', 'ad_name', 'adcreative_id', 'adgroup_id', 'campaign_id')
  239. url = 'https://api.e.qq.com/v1.3/' + path
  240. li = []
  241. page = 1
  242. while True:
  243. parameters = {
  244. 'access_token': access_token,
  245. 'timestamp': int(time.time()),
  246. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  247. 'fields': fields,
  248. "filtering": [{
  249. "field": "ad_id",
  250. "operator": "IN",
  251. "values":
  252. ad_ids.split(',')
  253. }],
  254. "account_id": account_id,
  255. "page": page,
  256. "page_size": 100,
  257. "is_deleted": False
  258. }
  259. for k in parameters:
  260. if type(parameters[k]) is not str:
  261. parameters[k] = json.dumps(parameters[k])
  262. while True:
  263. r = requests.get(url, params=parameters).json()
  264. code = r['code']
  265. if code == 11017:
  266. time.sleep(61)
  267. else:
  268. break
  269. # print(r)
  270. total_page = r['data']['page_info']['total_page']
  271. if page > total_page:
  272. break
  273. else:
  274. page += 1
  275. if r.get("data"):
  276. for i in r['data']['list']:
  277. li.append((str(i['ad_id']), i['ad_name'], i['adcreative_id'], i['campaign_id'], i['adgroup_id'],
  278. account_id, flag, dt))
  279. if li.__len__()>0:
  280. print(f"{account_id}有广告:",li.__len__())
  281. sql = "replace into ad_info values(%s,%s,%s,%s,%s,%s,%s,%s) "
  282. db.quchen_text.executeMany(sql, li)
  283. db.close()
  284. def get_ad_cost_day(account_id,access_token,flag,st,et):
  285. if flag == 'MP':
  286. ad_cost_day_mp(account_id,access_token, st, et)
  287. else:
  288. ad_cost_day_gdt(account_id,access_token, st, et)
  289. def ad_cost_day_gdt(account_id,access_token,st,et):
  290. url = 'https://api.e.qq.com/v1.3/daily_reports/get'
  291. fields = ('date', 'ad_id','adgroup_id', 'cost', 'view_count', 'ctr', 'follow_count','web_order_count','order_amount')
  292. li = []
  293. page = 1
  294. while True:
  295. parameters = {
  296. 'access_token': access_token,
  297. 'timestamp': int(time.time()),
  298. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  299. 'fields': fields,
  300. "account_id": account_id,
  301. "group_by" : ['ad_id','date'],
  302. "level": 'REPORT_LEVEL_AD',
  303. "page": page,
  304. "page_size": 1000,
  305. "date_range": {
  306. "start_date": st,
  307. "end_date": et
  308. }
  309. }
  310. for k in parameters:
  311. if type(parameters[k]) is not str:
  312. parameters[k] = json.dumps(parameters[k])
  313. while True:
  314. r = requests.get(url, params=parameters).json()
  315. # print(r)
  316. code =r['code']
  317. if code==11017:
  318. time.sleep(61)
  319. else:
  320. break
  321. if r.get("data"):
  322. for i in r['data']['list']:
  323. if i['cost']>0:
  324. li.append(
  325. (
  326. i['date'], i['ad_id'],i['adgroup_id'], i['cost']/100, i['view_count'], i['ctr']*i['view_count'],
  327. i['follow_count'],i['web_order_count'],i['order_amount']/100,account_id,'GDT'
  328. )
  329. )
  330. total_page = r['data']['page_info']['total_page']
  331. if page >= total_page:
  332. break
  333. else:
  334. page += 1
  335. # print(li)
  336. if len(li) > 0:
  337. print(f"{account_id} have ad cost :{len(li)} ")
  338. db.quchen_text.executeMany('replace into ad_cost_day values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', li)
  339. db.close()
  340. def ad_cost_day_mp(account_id,access_token,st,et):
  341. url = 'https://api.e.qq.com/v1.3/daily_reports/get'
  342. fields = ('date', 'ad_id','adgroup_id', 'cost', 'view_count', 'valid_click_count', 'official_account_follow_count','order_count','order_amount')
  343. li = []
  344. page = 1
  345. while True:
  346. parameters = {
  347. 'access_token': access_token,
  348. 'timestamp': int(time.time()),
  349. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  350. 'fields': fields,
  351. "account_id": account_id,
  352. "level": 'REPORT_LEVEL_AD_WECHAT',
  353. "page": page,
  354. "page_size": 1000,
  355. "date_range": {
  356. "start_date": st,
  357. "end_date": et
  358. }
  359. }
  360. for k in parameters:
  361. if type(parameters[k]) is not str:
  362. parameters[k] = json.dumps(parameters[k])
  363. while True:
  364. r = requests.get(url, params=parameters).json()
  365. # print(r['data']['list'])
  366. # import pandas as pd
  367. # print(pd.DataFrame(r['data']['list']))
  368. code = r['code']
  369. if code == 11017:
  370. time.sleep(61)
  371. else:
  372. break
  373. if r.get("data"):
  374. for i in r['data']['list']:
  375. if i['cost']>0:
  376. li.append(
  377. (
  378. i['date'],i['ad_id'],i['adgroup_id'],i['cost']/100,i['view_count'],i['valid_click_count'],
  379. i['official_account_follow_count'],i['order_count'],i['order_amount']/100,account_id,'MP'
  380. )
  381. )
  382. total_page = r['data']['page_info']['total_page']
  383. if page >=total_page:
  384. break
  385. else:
  386. page += 1
  387. # print(li)
  388. # exit()
  389. if len(li) > 0:
  390. print(f"{account_id} have ad cost :{len(li)} ")
  391. db.quchen_text.executeMany('replace into ad_cost_day values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', li)
  392. db.close()
  393. def daily_reports_get(access_token, account_id, level, start_date, end_date, fields): # 获取wx投放计划日报数据
  394. interface = 'daily_reports/get'
  395. url = 'https://api.e.qq.com/v1.3/' + interface
  396. common_parameters = {
  397. 'access_token': access_token,
  398. 'timestamp': int(time.time()),
  399. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  400. 'fields': fields
  401. }
  402. parameters = {
  403. "account_id": account_id,
  404. "level": level,
  405. "date_range":
  406. {
  407. "start_date": start_date,
  408. "end_date": end_date
  409. },
  410. "page": 1,
  411. "page_size": 1000,
  412. "fields":
  413. [
  414. ]
  415. }
  416. parameters.update(common_parameters)
  417. for k in parameters:
  418. if type(parameters[k]) is not str:
  419. parameters[k] = json.dumps(parameters[k])
  420. while True:
  421. r = requests.get(url, params=parameters)
  422. if r.status_code == 200:
  423. break
  424. else:
  425. time.sleep(1)
  426. print("请求出错 等待1s..")
  427. return r.json()
  428. def daily_qq_reports_get(access_token, account_id, compaign_id, level, start_date, end_date, fields): # 获取gdt投放计划日报数据
  429. interface = 'daily_reports/get'
  430. url = 'https://api.e.qq.com/v1.1/' + interface
  431. common_parameters = {
  432. 'access_token': access_token,
  433. 'timestamp': int(time.time()),
  434. 'nonce': str(time.time()) + str(random.randint(0, 999999)),
  435. 'fields': fields
  436. }
  437. parameters = {
  438. "account_id": account_id,
  439. "filtering":
  440. [
  441. {
  442. "field": "campaign_id",
  443. "operator": "EQUALS",
  444. "values":
  445. [
  446. compaign_id
  447. ]
  448. }
  449. ],
  450. "level": level,
  451. "date_range":
  452. {
  453. "start_date": start_date,
  454. "end_date": end_date
  455. },
  456. "page": 1,
  457. "page_size": 1000,
  458. "fields":
  459. [
  460. ]
  461. }
  462. parameters.update(common_parameters)
  463. for k in parameters:
  464. if type(parameters[k]) is not str:
  465. parameters[k] = json.dumps(parameters[k])
  466. r = requests.get(url, params=parameters)
  467. return r.json()
  468. def mysql_insert_adcreative(data):
  469. db = pymysql.connect('rm-bp1c9cj79872tx3aaro.mysql.rds.aliyuncs.com', 'superc', 'Cc719199895', 'quchen_text')
  470. cursor = db.cursor()
  471. sql = 'replace into adcreative (campaign_id,adcreative_id,adcreative_name,image_id,title,promoted_object_type,page_type,page_id,link_page_id,promoted_object_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
  472. try:
  473. cursor.executemany(sql, data)
  474. db.commit()
  475. print('insert [adcreative] ', len(data))
  476. except:
  477. db.rollback()
  478. print('insert [adcreative] defeat')
  479. if __name__ == '__main__':
  480. account_id = 19206910
  481. access_token = '89079ccc8db047b078a0108e36a7e276'
  482. #
  483. account_id2 = 14709511
  484. access_token2 = 'e87f7b6f860eaeef086ddcc9c3614678'
  485. get_ad_cost_day(account_id,access_token,'MP','2021-04-09','2021-04-09')
  486. # get_adcreatives(account_id,access_token,'MP','3187867673','2021-04-09')