forked from cve-search/cve-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb_fulltext.py
67 lines (55 loc) · 1.89 KB
/
db_fulltext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Fulltext indexer for the MongoDB CVE collection.
#
# The fulltext indexer is relying on Whoosh.
#
# The indexing is done by enumerating all items from
# the MongoDB CVE collection and indexing the summary text of each
# CVE. The Path of each document is the CVE-ID.
#
# Software is free software released under the "Modified BSD license"
#
# Copyright (c) 2012-2013 Alexandre Dulaunoy - a@foo.be
import pymongo
import os
import argparse
connect = pymongo.Connection()
db = connect.cvedb
collection = db.cves
argParser = argparse.ArgumentParser(description='Fulltext indexer for the MongoDB CVE collection')
argParser.add_argument('-v', action='store_true', help='Verbose logging', default=False)
argParser.add_argument('-l', help='Number of last entries to index', default=None)
args = argParser.parse_args()
indexpath = "./indexdir"
from whoosh.index import create_in, exists_in, open_dir
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True,unique=True), content=TEXT)
if not os.path.exists(indexpath):
os.mkdir(indexpath)
if not exists_in("indexdir"):
ix = create_in("indexdir", schema)
else:
ix = open_dir("indexdir")
def dumpallcveid (entry=None):
cveid = []
if entry is None:
for x in collection.find({}).sort('_id',1):
cveid.append(x['id'])
else:
for x in collection.find({}).sort("last-modified",-1).limit(int(entry)):
cveid.append(x['id'])
return cveid
def getcve (cveid=None):
if cveid is None:
return False
return collection.find_one({'id': cveid})
for cveid in dumpallcveid(entry=args.l):
writer = ix.writer()
item = getcve(cveid=cveid)
title = item['summary'][0:70]
if args.v:
print ('Indexing CVE-ID '+ str(cveid)+' '+title)
writer.update_document(title=title,path=cveid,content=item['summary'])
writer.commit()