Skip to content

Commit

Permalink
Discuz验证码识别
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Cherish committed Jan 29, 2018
1 parent 911231f commit 732d148
Show file tree
Hide file tree
Showing 3 changed files with 445 additions and 0 deletions.
48 changes: 48 additions & 0 deletions Discuz/get_discuz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#-*- coding:utf-8 -*-
from urllib.request import urlretrieve
import time, random, os

class Discuz():
def __init__(self):
# Discuz验证码生成图片地址
self.url = 'http://cuijiahua.com/tutrial/discuz/index.php?label='

def random_captcha_text(self, captcha_size = 4):
"""
验证码一般都无视大小写;验证码长度4个字符
Parameters:
captcha_size:验证码长度
Returns:
captcha_text:验证码字符串
"""
number = ['0','1','2','3','4','5','6','7','8','9']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
char_set = number + alphabet
captcha_text = []
for i in range(captcha_size):
c = random.choice(char_set)
captcha_text.append(c)
captcha_text = ''.join(captcha_text)
return captcha_text

def download_discuz(self, nums = 5000):
"""
下载验证码图片
Parameters:
nums:下载的验证码图片数量
"""
dirname = './Discuz'
if dirname not in os.listdir():
os.mkdir(dirname)
for i in range(nums):
label = self.random_captcha_text()
print('第%d张图片:%s下载' % (i + 1,label))
urlretrieve(url = self.url + label, filename = dirname + '/' + label + '.jpg')
# 请至少加200ms延时,避免给我的服务器造成过多的压力,如发现影响服务器正常工作,我会关闭此功能。
# 你好我也好,大家好才是真的好!
time.sleep(0.2)
print('恭喜图片下载完成!')

if __name__ == '__main__':
dz = Discuz()
dz.download_discuz()
36 changes: 36 additions & 0 deletions Discuz/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#-*- coding:utf-8 -*-
import tensorflow as tf
import numpy as np
import train

def crack_captcha(captcha_image, captcha_label):
"""
使用模型做预测
Parameters:
captcha_image:数据
captcha_label:标签
"""

output = dz.crack_captcha_cnn()
saver = tf.train.Saver()
with tf.Session(config=dz.config) as sess:

saver.restore(sess, tf.train.latest_checkpoint('.'))
for i in range(len(captcha_label)):
img = captcha_image[i].flatten()
label = captcha_label[i]
predict = tf.argmax(tf.reshape(output, [-1, dz.max_captcha, dz.char_set_len]), 2)
text_list = sess.run(predict, feed_dict={dz.X: [img], dz.keep_prob: 1})
text = text_list[0].tolist()
vector = np.zeros(dz.max_captcha*dz.char_set_len)
i = 0
for n in text:
vector[i*dz.char_set_len + n] = 1
i += 1
prediction_text = dz.vec2text(vector)
print("正确: {} 预测: {}".format(dz.vec2text(label), prediction_text))

if __name__ == '__main__':
dz = train.Discuz()
batch_x, batch_y = dz.get_next_batch(False, 5)
crack_captcha(batch_x, batch_y)
Loading

0 comments on commit 732d148

Please sign in to comment.