diff --git a/dianzan/.gitignore b/dianzan/.gitignore new file mode 100644 index 0000000..5bdfa6c --- /dev/null +++ b/dianzan/.gitignore @@ -0,0 +1,2 @@ +img.gif +verify diff --git a/dianzan/server_dianzan.py b/dianzan/server_dianzan.py new file mode 100644 index 0000000..02f0ea4 --- /dev/null +++ b/dianzan/server_dianzan.py @@ -0,0 +1,271 @@ +# -*- coding=utf-8 -*- + +import requests +#import libxml2 as xparse +import sys +#import re +reload(sys) +sys.setdefaultencoding('utf-8') +#import copy + +from xml.dom.minidom import parseString +import xpath +import time + +__metaclass__ = type +class Dianzan: + def __init__(self, qq = None, pwd = None): + self.qq = 'atupal@foxmail.com' if not qq else qq + self.pwd = 'xxxxx' if not pwd else pwd + self.session = requests.Session() + self._login() + self.repeat_set = set() + + def _parse(self, url, _xpath, content = None): + try: + if not content:content = self.session.get(url).content + #doc = xparse.parseDoc(content) + #ctxt = doc.xpathNewContext() + #return ctxt.xpathEval(_xpath) + doc = parseString(content) + ret = xpath.find(_xpath, doc) + for i in xrange(len(ret)): + ret[i].__setattr__('content', ret[i].nodeValue) + return ret + except Exception as e: + print e + return [] + finally: + #doc.freeDoc() + pass + + + def _login(self): + url = 'http://info50.3g.qq.com/g/s?aid=index&s_it=1&g_from=3gindex&&g_f=1283' #3gqq首页 + url = self._parse(url, '/wml/card/p/a[7]/@href')[0].content #空间登陆url + url = self._parse(url, '/wml/card/@ontimer')[0].content #空间登陆302url + self.login_referer = url + data = dict() + headers = dict() + content = self.session.get(url).content + url = self._parse(None, '//*/@href', content = content)[1].content #post地址 + names = ['login_url', 'go_url', 'sidtype', 'aid'] + for name in names: + value = self._parse(None, '//*[@name="'+ name +'"]/@value', content = content)[0].content + data[name] = value + data['qq'] = self.qq + data['pwd'] = self.pwd + headers['Origin'] = 'http://pt.3g.qq.com' + headers['Referer'] = self.login_referer + headers['Host'] = 'pt.3g.qq.com' + #headers['User-Agent'] = 'curl/7.21.3 (i686-pc-linux-gnu) libcurl/7.21.3 OpenSSL/0.9.8o zlib/1.2.3.4 libidn/1.18' + headers['User-Agent'] = '' + + + res = self.session.post(url, data = data, headers = headers, allow_redirects = False) + url = res.headers['location']#post之后重定向的地址,这里如果允许自动跳转的话不知道为什么会跳转到腾讯首页去。。蛋疼 + + if not url: + data = dict() + img_url = self._parse(None, '//img/@src', content = res.content)[0].content + names = [ + 'qq' , + 'u_token' , + 'r' , + 'extend' , + 'r_sid' , + 'aid' , + 'hiddenPwd' , + 'login_url' , + 'go_url' , + #'verify' , + 'sidtype' , + ] + for name in names: + value = self._parse(None, '//*[@name="'+ name +'"]/@value', content = res.content)[0].content + data[name] = value + #from StringIO import StringIO + #r = self.session.get(img_url) + #verify_img = Image.open(StringIO(r.content)) + #verify_img.show() + import urllib2 + with open('./img.jpg', 'wb') as fi: + fi.write(urllib2.urlopen(img_url).read()) + url = self._parse(None, '//*/@href', content = res.content)[1].content #post地址 + import os + if os.environ.get('HOME') == '/home/atupal': + import time + time.sleep(60) + with open('verify', 'r') as fi: + #data['verify'] = raw_input("verify:") + data['verify'] = fi.readline().strip('\n') + url = self._verify(data = data, headers = headers, url = url) + '''#代码重复了,发送验证码的实现逻辑由_verrify方法实现 + res = self.session.post(url, data = data, headers = headers, allow_redirects = False) + #print '1' + str(res.content) + url = res.headers['location'] + + ##验证码后第一次get + #url = self._parse(url, '/wml/card/@ontimer')[0].content + + ##验证码后第二次get + #url = self._parse(url, '/wml/card/@ontimer')[0].content + + #验证码后第一次get + content = self.session.get(url).content + #print '2' + content + url = self._parse(None, '/wml/card/@ontimer', content = content)[0].content + + #验证码后第二次get + content = self.session.get(url).content + #print '3' + content + url = self._parse(None, '/wml/card/@ontimer', content = content)[0].content + ''' + else: + form = '
' + self.verify = ''' + +