爬取煎蛋妹子图:
查看网页源码
发现:
<div class="text"><span class="righttext"><a href="//jandan.net/ooxx/page-38#comment-3947190">3947190</a></span><p><img src="//img.jandan.net/img/blank.gif" onload="jandan_load_img(this)" /><span class="img-hash">Ly93dzMuc2luYWltZy5jbi9tdzYwMC8wMDczdExQR2d5MWZ1c3h0dHpma2JqMzBnZDBnaGdtbi5qcGc=</span></p>
</div>
使用jandan_load_img()函数 加载数据
在所有js中查找 jandan_load_img()函数
最后找到了91798e4c623fa60181a31d543488217eGilwMCv6.29100001.js 中有这个函数
function jandan_load_img(b) {
var d = $(b);
var f = d.next("span.img-hash");
var e = f.text();
f.remove();
var c = jd9tyaUwh3vvC3QncC1iwiApdgIzNvbiBk(e, "6NY2R9dI5vLvnEw6fBZ6jSNINhbeSakZ");
var a = $('<a href="' + c.replace(/(\/\/\w+\.sinaimg\.cn\/)(\w+)(\/.+\.(gif|jpg|jpeg))/, "$1large$3") + '" target="_blank" class="view_img_link">[查看原图]</a>');
d.before(a);
d.before("<br>");
d.removeAttr("onload");
d.attr("src", location.protocol + c.replace(/(\/\/\w+\.sinaimg\.cn\/)(\w+)(\/.+\.gif)/, "$1thumb180$3"));
if (/\.gif$/.test(c)) {
d.attr("org_src", location.protocol + c);
b.onload = function() {
add_img_loading_mask(this, load_sina_gif)
}
}
}
#f = d.next("span.img-hash") e = f.text
#c =jd9tyaUwh3vvC3QncC1iwiApdgIzNvbiBk(e, "6NY2R9dI5vLvnEw6fBZ6jSNINhbeSakZ")
#t ="6NY2R9dI5vLvnEw6fBZ6jSNINhbeSakZ"
然后使用函数jd9tyaUwh3vvC3QncC1iwiApdgIzNvbiBk
查找这个函数,发现有两个根据上下文 选择后面的那个函数
var jd9tyaUwh3vvC3QncC1iwiApdgIzNvbiBk = function(n, t, e) { #n=e, t=t, e=None
var f = "DECODE";
var t = t ? t : "";
var e = e ? e : 0;
var r = 4;
t = md5(t);
var d = n;
var p = md5(t.substr(0, 16));
var o = md5(t.substr(16, 16));
if (r) {
if (f == "DECODE") {
var m = n.substr(0, r)
}
} else {
var m = ""
}
var c = p + md5(p + m);
var l;
if (f == "DECODE") {
n = n.substr(r);
l = base64_decode(n)
}
var k = new Array(256);
for (var h = 0; h < 256; h++) {
k[h] = h
}
var b = new Array();
for (var h = 0; h < 256; h++) {
b[h] = c.charCodeAt(h % c.length)
}
for (var g = h = 0; h < 256; h++) {
g = (g + k[h] + b[h]) % 256;
tmp = k[h];
k[h] = k[g];
k[g] = tmp
}
var u = "";
l = l.split("");
for (var q = g = h = 0; h < l.length; h++) {
q = (q + 1) % 256;
g = (g + k[q]) % 256;
tmp = k[q];
k[q] = k[g];
k[g] = tmp;
u += chr(ord(l[h]) ^ (k[(k[q] + k[g]) % 256]))
}
if (f == "DECODE") {
if ((u.substr(0, 10) == 0 || u.substr(0, 10) - time() > 0) && u.substr(10, 16) == md5(u.substr(26) + o).substr(0, 16)) {
u = u.substr(26)
} else {
u = ""
}
u = base64_decode(d)
}
return u
};
看到这么多来解密url真是醉了
但是我看完了代码:return u
u =base64_decode(d)
d = n
n = img-hash.text
一万个草泥马
中间代码就没有操作过 d
那么 t 也毫无作用
def get_img_url(m: str) -> str:
'''通过image-hash解码 返回url
:param m:str image-hash
:return :str img_url
'''
t = _base64_decode(m)
t = 'http:' + t.decode("utf-8")
t = re.sub('mw\d*', 'large', t)
return t
def _base64_decode(data):
'''base64解码,要注意字符长度'''
missing_padding = 4 - len(data) % 4
if missing_padding:
data += '=' * missing_padding
return base64.b64decode(data)