-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdb_couchdb.py
137 lines (114 loc) · 3.83 KB
/
db_couchdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# -*- coding: utf-8 -*-
""""
Script which contains all class definition related to COUCHDB database
"""
from manage_db import *
class Connection(ManageConnection):
"""
Manage connection to database
"""
def __init__(self,db_server):
self.db_server = db_server
ManageConnection.__init__(self)
def connect(self):
return self.connection(self.db_server)
""" -------------------------------------------- """
""" ----Database connection must be done here--- """
if DB_SERVER=='couchdb':
from db_couchdb import *
elif DB_SERVER=='mongodb':
from db_mongodb import *
connecting = Connection(DB_SERVER)
COUCHDB = connecting.connect()
""" -------------------------------------------- """
from couchdb.mapping import Document, TextField, IntegerField, DictField, ViewField
class Article(Document):
"""
Represents articles stored in DB. Used for both reading and inserting articles datas
"""
type = TextField()
title = TextField()
text = TextField()
source = TextField()
pub_date = TextField()
cluster = IntegerField()
by_source = ViewField('source', '''\
function(doc) {
if (doc.type=='article'){emit(doc.source, doc);};
}''')
by_article = ViewField('article', '''\
function(doc) {
if (doc.type=='article'){emit(doc);};
}''')
@staticmethod
def check_article_url(url):
#permanent view
result = COUCHDB.view('source/by_source')
if not list(result[url]):
return True
def _set_article(self, article):
self.type = 'article'
self.source = article.url
self.title = article.title
self.text = article.text
if article.publish_date:
self.pub_date = str(article.publish_date[0].date())
else: # just in case publishing date cannot be retrieved, stores 'None'
self.pub_date = str(article.publish_date)
def save_article(self, article):
self._set_article(article)
self.store(COUCHDB)
@staticmethod
def get_all_articles():
options = {"include_docs":True}
result = Article.view(COUCHDB,'article/by_article',**options)
return list(result)
def update_article(self, cluster_key):
self.cluster = cluster_key
self.store(COUCHDB)
class Stopword(Document):
"""
Class for storing stopwords objects
"""
type = TextField()
lang = TextField()
word = TextField()
by_stopword = ViewField('stopword', '''\
function(doc) {
if (doc.type=='stopword'){emit(doc.lang, doc);};
}''')
def sw_exist(self):
result = COUCHDB.query(self.by_stopword.map_fun)
if list(result):
return True
def set_stopwords(self):
word_list = stopwords_list(SW_PATH)
sw_list = []
for lang, word in word_list.iteritems():
for each_word in word:
sw_list.append(Stopword(type='stopword',lang=lang,word=each_word))
#return sw_list
COUCHDB.update(sw_list)
def get_all_stopwords(self):
options = {"include_docs": True}
result = Stopword.view(COUCHDB, 'stopword/by_stopword', **options)
return result
class ClusteringResume(Document):
"""
Stores main useful elements for each cluster
"""
type = TextField()
_id = TextField()
keywords = DictField()
cluster_title = TextField()
def set_dataclusters(self,cluster_id,keywords,title):
self.type = 'cluster'
self._id = cluster_id
self.keywords = keywords
self.cluster_title = title
self.store(COUCHDB)
def remove_cluster_content(self):
result = COUCHDB.query(self.by_cluster.map_fun)
for element in list(result):
elem = COUCHDB.get(element.id)
COUCHDB.delete(elem)