forked from grt1st/wooyun_search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
61 lines (61 loc) · 2.06 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#coding=utf-8
import os
import re
import sys
import MySQLdb
from bs4 import BeautifulSoup
path='bugs'
#预编译
pattern0=re.compile(r'<h3.*?class=\'wybug_title\'>.*?漏洞标题:(.*)<img.*?src="/images/credit.png"')
pattern1=re.compile(r'<h3.*?class=\'wybug_date\'>提交时间:(.*)</h3>')
pattern2=re.compile(r'>(.*)</a>')
pattern3=re.compile(r'>(.*)</a>')
pattern4=re.compile(r':(.*)</h3>')
pattern5=re.compile(r'漏洞标题:(.*)')
for docs in os.listdir(path):
#打开文件,提取内容
if os.path.isdir('bugs/'+docs):
print "目录跳过"
continue
doc=open('bugs/'+docs,'r')
html=doc.read()
doc.close()
#提取信息
soup=BeautifulSoup(html,"html.parser")
corps=soup.find_all('h3',class_='wybug_corp')
corps=corps[0].find_all('a')
corp=corps[0]
corp=str(corp).replace(' ','').replace('\n','')
authors=soup.find_all('h3',class_='wybug_author')
authors=authors[0].find_all('a')
author=authors[0]
author=str(author).replace(' ','')
types=soup.find_all('h3',class_='wybug_type')
type0=str(types[0]).replace(' ','')
title=re.findall(pattern0,html)
if title:
title1=title[0].replace(' ','').replace(' ','')
else:
title=soup.find_all('h3',class_='wybug_title')
title0=title[0].text.encode('utf-8')
title0=re.findall(pattern5,title0)
title1=title0[0].replace('\n','').replace(' ','').replace(' ','')
date=re.findall(pattern1,html)
date1=date[0].replace(' ','')
corp1=re.findall(pattern2,corp)
author1=re.findall(pattern3,author)
type1=re.findall(pattern4,type0)
print title1,date1,author1[0],type1[0],corp1[0]
#连接数据库
try:
conn=MySQLdb.connect(host='localhost',port=3306,user='root',passwd='',db='wooyun',charset='utf8')
cur=conn.cursor()
reload(sys)
sys.setdefaultencoding('utf-8')
tmp=(title1,date1,author1[0],type1[0],corp1[0],docs)
cur.execute("INSERT INTO `bugs`(`title`,`dates`,`author`,`type`,`corp`,`doc`) VALUES(%s,%s,%s,%s,%s,%s)",tmp)
conn.commit()
cur.close()
conn.close()
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])