-
Notifications
You must be signed in to change notification settings - Fork 70
/
negation_tag_demo.py
65 lines (44 loc) · 1.5 KB
/
negation_tag_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import pickle
import sys
import time
import pandas as pd
from multiprocessing import Pool
from itertools import product
import time
from sklearn.metrics import auc
from sklearn import metrics
import ast
from sklearn.feature_extraction.text import CountVectorizer
from negex import *
def parse_sent(text,nlp, tok, neg_words):
#tokens = word_tokenize(text)
tokens = tok(text)
if any([ w in neg_words for w in tokens ]):
tags = ast.literal_eval(nlp.parse(text))['sentences'][0]['dependencies']
for tag in tags:
if tag[0] =='neg':
text = text.replace( tag[1], tag[1] + '_NEG' )
return text
else:
return text
def split_sent(text):
return re.split(r'[:?.]+',str(text))
def negate(text, irules , conditions):
#cond = [c for c in conditions if c in text]
sentences = list(split_sent(text))
tagged = []
filter_conds = 0
tag = 0
for s in sentences:
cond = [c for c in conditions if c in s]
t = negTagger(sentence = s, phrases = cond, rules = irules, negP=False).getNegTaggedSentence()
tagged.append(t)
return ' '.join(tagged)
if __name__ =='__main__':
rules= pd.read_csv('./negex_triggers.txt' ,sep='\t',header=None)
rules = list(rules[0] + '\t\t' + rules[2])
irules = sortRules(rules)
conditions = ['cough','headache']
sentence = 'the patient is negative for cough, and headache.'
print(negate(sentence , irules , conditions))