forked from zake7749/Chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optimize the searching strategy of question answering.
- Loading branch information
Showing
2 changed files
with
47 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
class QuickSearcher(object): | ||
|
||
""" | ||
對每個句子的詞建立反向映射表,透過 set operator 快速限縮查詢時間 | ||
""" | ||
|
||
def __init__(self, docs=None): | ||
|
||
self.inverted_word_dic = dict() | ||
#self.buildInvertedIndex(docs) | ||
|
||
def buildInvertedIndex(self, docs): | ||
|
||
""" | ||
建構詞對 ID 的倒排索引 | ||
Args: | ||
- docs: 欲建構的倒排索引表列,每個 doc 需「完成斷詞」 | ||
""" | ||
|
||
for doc_id,doc in enumerate(docs): | ||
for word in doc: | ||
if word not in self.inverted_word_dic.keys(): | ||
self.inverted_word_dic[word] = set() | ||
self.inverted_word_dic[word].add(doc_id) | ||
|
||
def quickSearch(self, query): | ||
|
||
""" | ||
讀入已斷好詞的 query,依照倒排索引只取出必要的 id | ||
""" | ||
|
||
result = set() | ||
# print(query) | ||
for word in query: | ||
if word in self.inverted_word_dic.keys(): | ||
result = result.union(self.inverted_word_dic[word]) | ||
|
||
return result |