Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions preprocess/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import nltk
import os
import pickle
from utils import symbol_filter, re_lemma, fully_part_header, group_header, partial_header, num2year, group_symbol, group_values, group_digital
from utils import symbol_filter, fully_part_header, group_header, partial_header, num2year, group_symbol, group_values, group_digital
from utils import AGG, wordnet_lemmatizer
from utils import load_dataSets

Expand Down Expand Up @@ -43,29 +43,19 @@ def process_datas(datas, args):
entry['question_toks'] = question_toks

table_names = []
table_names_pattern = []

for y in entry['table_names']:
x = [wordnet_lemmatizer.lemmatize(x.lower()) for x in y.split(' ')]
table_names.append(" ".join(x))
x = [re_lemma(x.lower()) for x in y.split(' ')]
table_names_pattern.append(" ".join(x))

header_toks = []
header_toks_list = []

header_toks_pattern = []
header_toks_list_pattern = []

for y in entry['col_set']:
x = [wordnet_lemmatizer.lemmatize(x.lower()) for x in y.split(' ')]
header_toks.append(" ".join(x))
header_toks_list.append(x)

x = [re_lemma(x.lower()) for x in y.split(' ')]
header_toks_pattern.append(" ".join(x))
header_toks_list_pattern.append(x)

num_toks = len(question_toks)
idx = 0
tok_concol = []
Expand Down Expand Up @@ -215,5 +205,3 @@ def get_concept_result(toks, graph):

with open(args.output, 'w') as f:
json.dump(datas, f)


9 changes: 0 additions & 9 deletions preprocess/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
# @File : utils.py
# @Software: PyCharm
"""
import os
import json
from pattern.en import lemma
from nltk.stem import WordNetLemmatizer

VALUE_FILTER = ['what', 'how', 'list', 'give', 'show', 'find', 'id', 'order', 'when']
Expand Down Expand Up @@ -164,10 +162,3 @@ def check_in(list_one, list_two):
if check_in(toks, heads):
return heads
return None

def re_lemma(string):
lema = lemma(string.lower())
if len(lema) > 0:
return lema
else:
return string.lower()
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Licensed under the MIT license.

nltk==3.4
pattern
numpy==1.14.0
pytorch-pretrained-bert==0.5.1
tqdm==4.31.1
tqdm==4.31.1