需要统计代理商那边每月刷的好评数,手动统计太慢了,写了个Python脚本来干这活儿,每月跑一次就行了,没啥技术含量,只是为了代码备忘。
目前只能抓取OPPO和MEIZU的,小米需要手机抓包没去搞,腾讯的AJAX不好抓放弃了。
#coding=utf-8
#!/usr/local/bin/python
import requests,time,random,pycurl,json,StringIO,datetime,re,threading,urllib,Queue
import os
import sys
from urlparse import *
from lxml import etree
import hashlib
import jieba
import jieba.analyse
import codecs #为了生成gbk编码的文件
import chardet
# from lxml import *
from multiprocessing.dummy import Pool as ThreadPool
#import MySQLdb as mdb
reload(sys)
sys.setdefaultencoding('utf-8')
today = datetime.date.today()
#############oppo#############
#############oppo#############
#############oppo#############
f = open('oppo.txt','w')
for i in xrange(1,36):
r = requests.get('http://store.oppomobile.com/comment/list.json?id=11004487&page=%s'%(i))
html = r.text
html_json = json.loads(html)
commentsList = html_json['commentsList']
for a in commentsList:
id = a['id']
word = a['word'].replace('\n',' ')
# print i,id,word
userGrade = a['userGrade']
version = a['version']
userNickName = a['userNickName']
source = a['source']
createDate = a['createDate']
print i,id,createDate,userGrade,version,userNickName,source,word
f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(i,id,createDate,userGrade,version,userNickName,source,word))
f.flush()
#############meizu#############
#############meizu#############
#############meizu#############
def convert_to_cn(text):
# 需要将 × 这种先做补全,×
text = re.sub(r'&#x([A-F0-9]{2});', r'�\1;', text)
return text.replace('&#x', '\u').replace(';', '').decode('unicode-escape').encode('utf-8')
f = open('meizu.txt','w')
for i in xrange(1,246):
try:
i2 = i*10
r = requests.get('http://app.meizu.com/apps/public/evaluate/list?app_id=536002&start=%s&max=10'%(i2))
html = r.text
html_json = json.loads(html)
# print html_json
commentsList = html_json['value']['list']
for a in commentsList:
create_time = a['create_time']
star = a['star']
version_name = a['version_name']
user_name = convert_to_cn(a['user_name'])
comment = convert_to_cn(a['comment'])
print i,create_time,star,version_name,user_name,comment
f.write('%s\t%s\t%s\t%s\t%s\t%s\n'%(i,create_time,star,version_name,user_name,comment))
f.flush()
except:
pass

评论