2
2
3
3
import spacy
4
4
from spacy .matcher import DependencyMatcher
5
+ from spacy .tokens import Token
5
6
from spacy .tokens .doc import Doc
6
7
7
- from ..get_api ._model import API , Action , Condition , Dependency , Parameter
8
+ from ..get_api ._model import (
9
+ API ,
10
+ Action ,
11
+ Condition ,
12
+ Dependency ,
13
+ Parameter ,
14
+ ParameterHasValue ,
15
+ ParameterIsIgnored ,
16
+ ParameterIsIllegal ,
17
+ ParameterIsOptional ,
18
+ )
8
19
from ._dependency_patterns import dependency_matcher_patterns
9
20
from ._preprocess_docstring import preprocess_docstring
10
21
11
22
PIPELINE = "en_core_web_sm"
12
23
13
24
14
- class DependencyExtractor :
15
- @staticmethod
16
- def extract_pattern_parameter_used_condition (
17
- dependent_param : Parameter ,
18
- func_parameters : List [Parameter ],
19
- match : Tuple ,
20
- param_docstring : Doc ,
21
- ) -> Union [Dependency , None ]:
22
- is_depending_on_param_index = match [1 ][2 ]
23
- is_depending_on_param_name = param_docstring [is_depending_on_param_index ].text
24
- is_depending_on_param = next (
25
- filter (
26
- lambda param : param .name == is_depending_on_param_name , func_parameters
27
- ),
28
- None ,
29
- )
30
- if is_depending_on_param is None :
31
- # Likely not a correct dependency match
32
- return None
33
-
34
- condition_verb = param_docstring [match [1 ][1 ]]
35
- condition_verb_subtree = list (condition_verb .subtree )
36
- condition_text = " " .join ([token .text for token in condition_verb_subtree ])
37
- condition = Condition (condition = condition_text )
38
-
39
- action = Action (action = "used" )
40
-
41
- return Dependency (
42
- hasDependentParameter = dependent_param ,
43
- isDependingOn = is_depending_on_param ,
44
- hasCondition = condition ,
45
- hasAction = action ,
46
- )
25
+ def extract_lefts_and_rights (curr_token : Token , extracted : Union [List , None ] = None ):
26
+ if extracted is None :
27
+ extracted = []
28
+
29
+ token_lefts = list (curr_token .lefts )
30
+ for token in token_lefts :
31
+ extract_lefts_and_rights (token , extracted )
32
+
33
+ extracted .append (curr_token .text )
34
+
35
+ token_rights = list (curr_token .rights )
36
+ for token in token_rights :
37
+ extract_lefts_and_rights (token , extracted )
38
+
39
+ return extracted
40
+
41
+
42
+ def extract_action (action_token : Token , condition_token : Token ) -> Action :
43
+ action_tokens = []
44
+ action_lefts = list (action_token .lefts )
45
+ action_rights = list (action_token .rights )
46
+
47
+ for token in action_lefts :
48
+ if token != condition_token :
49
+ action_tokens .extend (extract_lefts_and_rights (token ))
50
+ action_tokens .append (action_token .text )
51
+ for token in action_rights :
52
+ if token != condition_token :
53
+ action_tokens .extend (extract_lefts_and_rights (token ))
54
+
55
+ action_text = " " .join (action_tokens )
56
+
57
+ ignored_phrases = [
58
+ "ignored" ,
59
+ "not used" ,
60
+ "no impact" ,
61
+ "only supported" ,
62
+ "only applies" ,
63
+ ]
64
+ illegal_phrases = ["raise" , "exception" , "must be" , "must not be" ]
65
+ if any (phrase in action_text .lower () for phrase in ignored_phrases ):
66
+ return ParameterIsIgnored (action = action_text )
67
+ elif any (phrase in action_text .lower () for phrase in illegal_phrases ):
68
+ return ParameterIsIllegal (action = action_text )
69
+ else :
70
+ return Action (action = action_text )
71
+
72
+
73
+ def extract_condition (condition_token : Token ) -> Condition :
74
+ condition_token_subtree = list (condition_token .subtree )
75
+ condition_text = " " .join ([token .text for token in condition_token_subtree ])
76
+
77
+ is_optional_phrases = [
78
+ "is none" ,
79
+ "is not set" ,
80
+ "is not specified" ,
81
+ "is not none" ,
82
+ "if none" ,
83
+ "if not none" ,
84
+ ]
85
+ has_value_phrases = ["equals" , "is true" , "is false" , "is set to" ]
86
+ if any (phrase in condition_text .lower () for phrase in is_optional_phrases ):
87
+ return ParameterIsOptional (condition = condition_text )
88
+ elif any (phrase in condition_text .lower () for phrase in has_value_phrases ):
89
+ return ParameterHasValue (condition = condition_text )
90
+ else :
91
+ return Condition (condition = condition_text )
47
92
48
- @staticmethod
49
- def extract_pattern_parameter_ignored_condition (
50
- dependent_param : Parameter ,
51
- func_parameters : List [Parameter ],
52
- match : Tuple ,
53
- param_docstring : Doc ,
54
- ) -> Union [Dependency , None ]:
55
- is_depending_on_param_index = match [1 ][2 ]
56
- is_depending_on_param_name = param_docstring [is_depending_on_param_index ].text
57
- is_depending_on_param = next (
58
- filter (
59
- lambda param : param .name == is_depending_on_param_name , func_parameters
60
- ),
61
- None ,
62
- )
63
- if is_depending_on_param is None :
64
- # Likely not a correct dependency match
65
- return None
66
-
67
- condition_verb = param_docstring [match [1 ][1 ]]
68
- condition_verb_subtree = list (condition_verb .subtree )
69
- condition_text = " " .join ([token .text for token in condition_verb_subtree ])
70
- condition = Condition (condition = condition_text )
71
-
72
- action = Action (action = "ignored" )
73
-
74
- return Dependency (
75
- hasDependentParameter = dependent_param ,
76
- isDependingOn = is_depending_on_param ,
77
- hasCondition = condition ,
78
- hasAction = action ,
79
- )
80
93
94
+ class DependencyExtractor :
81
95
@staticmethod
82
- def extract_pattern_parameter_applies_condition (
96
+ def extract_pattern_parameter_subordinating_conjunction (
83
97
dependent_param : Parameter ,
84
98
func_parameters : List [Parameter ],
85
99
match : Tuple ,
@@ -97,12 +111,11 @@ def extract_pattern_parameter_applies_condition(
97
111
# Likely not a correct dependency match
98
112
return None
99
113
100
- condition_verb = param_docstring [match [1 ][1 ]]
101
- condition_verb_subtree = list (condition_verb .subtree )
102
- condition_text = " " .join ([token .text for token in condition_verb_subtree ])
103
- condition = Condition (condition = condition_text )
114
+ condition_token = param_docstring [match [1 ][1 ]]
115
+ condition = extract_condition (condition_token )
104
116
105
- action = Action (action = "applies" )
117
+ action_token = param_docstring [match [1 ][0 ]]
118
+ action = extract_action (action_token , condition_token )
106
119
107
120
return Dependency (
108
121
hasDependentParameter = dependent_param ,
@@ -159,14 +172,18 @@ def get_dependencies(api: API) -> Dict:
159
172
docstring = parameter .docstring .description
160
173
docstring_preprocessed = preprocess_docstring (docstring )
161
174
doc = nlp (docstring_preprocessed )
162
- dependency_matches = matcher (doc )
163
- param_dependencies = extract_dependencies_from_docstring (
164
- parameter ,
165
- parameters ,
166
- doc ,
167
- dependency_matches ,
168
- spacy_id_to_pattern_id_mapping ,
169
- )
175
+ param_dependencies = []
176
+ for sentence in doc .sents :
177
+ sentence_dependency_matches = matcher (sentence )
178
+ sentence_dependencies = extract_dependencies_from_docstring (
179
+ parameter ,
180
+ parameters ,
181
+ sentence ,
182
+ sentence_dependency_matches ,
183
+ spacy_id_to_pattern_id_mapping ,
184
+ )
185
+ if sentence_dependencies :
186
+ param_dependencies .extend (sentence_dependencies )
170
187
if param_dependencies :
171
188
all_dependencies [function_name ][parameter .name ] = param_dependencies
172
189
0 commit comments