Skip to content
This repository was archived by the owner on Jan 19, 2025. It is now read-only.

Commit 13c789c

Browse files
aserge16Aleksandr SergeevPrajakta46
authored
feat: Action/Condition Classification (#48)
* #43 basic condtion and action extraction functions * #43 corrected extractions, now matching occurs per sentence, removes many false positives * #43 fixed recursive action extraction function * Added new pattern * #43 classifying of ignored parameter actions * #43 conditions and actions are now dataclasses, renamed one condition * #43 phrases to categories actions/conditions * #43 mypy fixes * style: apply automatic fixes of linters * #43 renamed IDs in general SCONJ pattern * #43 more descriptive pattern name Co-authored-by: Aleksandr Sergeev <aleksandr.sergeev@tngtech.com> Co-authored-by: prajakta <prajaktabhujbal7@gmail.com> Co-authored-by: aserge16 <aserge16@users.noreply.github.com>
1 parent 3efa424 commit 13c789c

File tree

4 files changed

+121
-112
lines changed

4 files changed

+121
-112
lines changed

package_parser/package_parser/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Any
66

77
from .commands.get_api import get_api
8+
from .commands.get_dependencies import get_dependencies
89
from .utils import ensure_file_exists
910

1011
__API_COMMAND = "api"
@@ -22,6 +23,7 @@ def cli() -> None:
2223

2324
if args.command == __API_COMMAND:
2425
public_api = get_api(args.package)
26+
get_dependencies(public_api)
2527

2628
out_dir: Path = args.out
2729
out_file = out_dir.joinpath(

package_parser/package_parser/commands/get_api/_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import inspect
44
import re
5-
from dataclasses import asdict
5+
from dataclasses import asdict, dataclass
66
from enum import Enum, auto
77
from typing import Any, Dict, Optional, Union
88

@@ -445,6 +445,7 @@ def to_json(self) -> Any:
445445
return {"type": self.type, "description": self.description}
446446

447447

448+
@dataclass
448449
class Action:
449450
@classmethod
450451
def from_json(cls, json: Any):
@@ -477,6 +478,7 @@ def __init__(self, action: str) -> None:
477478
super().__init__(action)
478479

479480

481+
@dataclass
480482
class Condition:
481483
@classmethod
482484
def from_json(cls, json: Any):
@@ -504,7 +506,7 @@ def __init__(self, condition: str) -> None:
504506
super().__init__(condition)
505507

506508

507-
class ParameterIsSet(StaticCondition):
509+
class ParameterIsOptional(StaticCondition):
508510
def __init__(self, condition: str) -> None:
509511
super().__init__(condition)
510512

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,41 @@
11
dependency_matcher_patterns = {
2-
"pattern_parameter_used_condition": [
3-
{"RIGHT_ID": "used", "RIGHT_ATTRS": {"ORTH": {"IN": ["used", "Used"]}}},
2+
"pattern_parameter_subordinating_conjunction": [
3+
{"RIGHT_ID": "action_head", "RIGHT_ATTRS": {"POS": "VERB"}},
44
{
5-
"LEFT_ID": "used",
5+
"LEFT_ID": "action_head",
66
"REL_OP": ">",
7-
"RIGHT_ID": "condition",
7+
"RIGHT_ID": "condition_head",
88
"RIGHT_ATTRS": {"DEP": "advcl"},
99
},
1010
{
11-
"LEFT_ID": "condition",
11+
"LEFT_ID": "condition_head",
1212
"REL_OP": ">",
1313
"RIGHT_ID": "dependee_param",
1414
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
1515
},
1616
],
17-
"pattern_parameter_ignored_condition": [
17+
"pattern_parameter_": [
1818
{
19-
"RIGHT_ID": "ignored",
20-
"RIGHT_ATTRS": {"ORTH": {"IN": ["ignored", "Ignored"]}},
19+
"RIGHT_ID": "action",
20+
"RIGHT_ATTRS": {"POS": "VERB"}, # verb is set as an anchor token
2121
},
2222
{
23-
"LEFT_ID": "ignored",
23+
"LEFT_ID": "action",
2424
"REL_OP": ">",
25-
"RIGHT_ID": "condition",
26-
"RIGHT_ATTRS": {"DEP": "advcl"},
25+
"RIGHT_ID": "ActionParameterName", # verb is a direct head of subject which is a NOUN i.e. Parameter Name
26+
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubjpass", "nsubj"]}},
2727
},
2828
{
29-
"LEFT_ID": "condition",
29+
"LEFT_ID": "action",
3030
"REL_OP": ">",
31-
"RIGHT_ID": "dependee_param",
32-
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
31+
"RIGHT_ID": "ConditionalVerbModifier", # Verb is restricted by Verb Modifier
32+
"RIGHT_ATTRS": {"DEP": "advmod"},
3333
},
34-
],
35-
"pattern_parameter_applies_condition": [
3634
{
37-
"RIGHT_ID": "applies",
38-
"RIGHT_ATTRS": {"ORTH": {"IN": ["applies", "Applies"]}},
39-
},
40-
{
41-
"LEFT_ID": "applies",
42-
"REL_OP": ">",
43-
"RIGHT_ID": "condition",
44-
"RIGHT_ATTRS": {"DEP": "advcl"},
45-
},
46-
{
47-
"LEFT_ID": "condition",
48-
"REL_OP": ">",
49-
"RIGHT_ID": "dependee_param",
50-
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
35+
"LEFT_ID": "action",
36+
"REL_OP": ">>",
37+
"RIGHT_ID": "ConditionalParameterName", # verb is a head in chain of object i.e. Parameter name or value
38+
"RIGHT_ATTRS": {"DEP": {"IN": ["dobj", "pobj"]}},
5139
},
5240
],
5341
}

package_parser/package_parser/commands/get_dependencies/_get_dependency.py

Lines changed: 97 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,84 +2,98 @@
22

33
import spacy
44
from spacy.matcher import DependencyMatcher
5+
from spacy.tokens import Token
56
from spacy.tokens.doc import Doc
67

7-
from ..get_api._model import API, Action, Condition, Dependency, Parameter
8+
from ..get_api._model import (
9+
API,
10+
Action,
11+
Condition,
12+
Dependency,
13+
Parameter,
14+
ParameterHasValue,
15+
ParameterIsIgnored,
16+
ParameterIsIllegal,
17+
ParameterIsOptional,
18+
)
819
from ._dependency_patterns import dependency_matcher_patterns
920
from ._preprocess_docstring import preprocess_docstring
1021

1122
PIPELINE = "en_core_web_sm"
1223

1324

14-
class DependencyExtractor:
15-
@staticmethod
16-
def extract_pattern_parameter_used_condition(
17-
dependent_param: Parameter,
18-
func_parameters: List[Parameter],
19-
match: Tuple,
20-
param_docstring: Doc,
21-
) -> Union[Dependency, None]:
22-
is_depending_on_param_index = match[1][2]
23-
is_depending_on_param_name = param_docstring[is_depending_on_param_index].text
24-
is_depending_on_param = next(
25-
filter(
26-
lambda param: param.name == is_depending_on_param_name, func_parameters
27-
),
28-
None,
29-
)
30-
if is_depending_on_param is None:
31-
# Likely not a correct dependency match
32-
return None
33-
34-
condition_verb = param_docstring[match[1][1]]
35-
condition_verb_subtree = list(condition_verb.subtree)
36-
condition_text = " ".join([token.text for token in condition_verb_subtree])
37-
condition = Condition(condition=condition_text)
38-
39-
action = Action(action="used")
40-
41-
return Dependency(
42-
hasDependentParameter=dependent_param,
43-
isDependingOn=is_depending_on_param,
44-
hasCondition=condition,
45-
hasAction=action,
46-
)
25+
def extract_lefts_and_rights(curr_token: Token, extracted: Union[List, None] = None):
26+
if extracted is None:
27+
extracted = []
28+
29+
token_lefts = list(curr_token.lefts)
30+
for token in token_lefts:
31+
extract_lefts_and_rights(token, extracted)
32+
33+
extracted.append(curr_token.text)
34+
35+
token_rights = list(curr_token.rights)
36+
for token in token_rights:
37+
extract_lefts_and_rights(token, extracted)
38+
39+
return extracted
40+
41+
42+
def extract_action(action_token: Token, condition_token: Token) -> Action:
43+
action_tokens = []
44+
action_lefts = list(action_token.lefts)
45+
action_rights = list(action_token.rights)
46+
47+
for token in action_lefts:
48+
if token != condition_token:
49+
action_tokens.extend(extract_lefts_and_rights(token))
50+
action_tokens.append(action_token.text)
51+
for token in action_rights:
52+
if token != condition_token:
53+
action_tokens.extend(extract_lefts_and_rights(token))
54+
55+
action_text = " ".join(action_tokens)
56+
57+
ignored_phrases = [
58+
"ignored",
59+
"not used",
60+
"no impact",
61+
"only supported",
62+
"only applies",
63+
]
64+
illegal_phrases = ["raise", "exception", "must be", "must not be"]
65+
if any(phrase in action_text.lower() for phrase in ignored_phrases):
66+
return ParameterIsIgnored(action=action_text)
67+
elif any(phrase in action_text.lower() for phrase in illegal_phrases):
68+
return ParameterIsIllegal(action=action_text)
69+
else:
70+
return Action(action=action_text)
71+
72+
73+
def extract_condition(condition_token: Token) -> Condition:
74+
condition_token_subtree = list(condition_token.subtree)
75+
condition_text = " ".join([token.text for token in condition_token_subtree])
76+
77+
is_optional_phrases = [
78+
"is none",
79+
"is not set",
80+
"is not specified",
81+
"is not none",
82+
"if none",
83+
"if not none",
84+
]
85+
has_value_phrases = ["equals", "is true", "is false", "is set to"]
86+
if any(phrase in condition_text.lower() for phrase in is_optional_phrases):
87+
return ParameterIsOptional(condition=condition_text)
88+
elif any(phrase in condition_text.lower() for phrase in has_value_phrases):
89+
return ParameterHasValue(condition=condition_text)
90+
else:
91+
return Condition(condition=condition_text)
4792

48-
@staticmethod
49-
def extract_pattern_parameter_ignored_condition(
50-
dependent_param: Parameter,
51-
func_parameters: List[Parameter],
52-
match: Tuple,
53-
param_docstring: Doc,
54-
) -> Union[Dependency, None]:
55-
is_depending_on_param_index = match[1][2]
56-
is_depending_on_param_name = param_docstring[is_depending_on_param_index].text
57-
is_depending_on_param = next(
58-
filter(
59-
lambda param: param.name == is_depending_on_param_name, func_parameters
60-
),
61-
None,
62-
)
63-
if is_depending_on_param is None:
64-
# Likely not a correct dependency match
65-
return None
66-
67-
condition_verb = param_docstring[match[1][1]]
68-
condition_verb_subtree = list(condition_verb.subtree)
69-
condition_text = " ".join([token.text for token in condition_verb_subtree])
70-
condition = Condition(condition=condition_text)
71-
72-
action = Action(action="ignored")
73-
74-
return Dependency(
75-
hasDependentParameter=dependent_param,
76-
isDependingOn=is_depending_on_param,
77-
hasCondition=condition,
78-
hasAction=action,
79-
)
8093

94+
class DependencyExtractor:
8195
@staticmethod
82-
def extract_pattern_parameter_applies_condition(
96+
def extract_pattern_parameter_subordinating_conjunction(
8397
dependent_param: Parameter,
8498
func_parameters: List[Parameter],
8599
match: Tuple,
@@ -97,12 +111,11 @@ def extract_pattern_parameter_applies_condition(
97111
# Likely not a correct dependency match
98112
return None
99113

100-
condition_verb = param_docstring[match[1][1]]
101-
condition_verb_subtree = list(condition_verb.subtree)
102-
condition_text = " ".join([token.text for token in condition_verb_subtree])
103-
condition = Condition(condition=condition_text)
114+
condition_token = param_docstring[match[1][1]]
115+
condition = extract_condition(condition_token)
104116

105-
action = Action(action="applies")
117+
action_token = param_docstring[match[1][0]]
118+
action = extract_action(action_token, condition_token)
106119

107120
return Dependency(
108121
hasDependentParameter=dependent_param,
@@ -159,14 +172,18 @@ def get_dependencies(api: API) -> Dict:
159172
docstring = parameter.docstring.description
160173
docstring_preprocessed = preprocess_docstring(docstring)
161174
doc = nlp(docstring_preprocessed)
162-
dependency_matches = matcher(doc)
163-
param_dependencies = extract_dependencies_from_docstring(
164-
parameter,
165-
parameters,
166-
doc,
167-
dependency_matches,
168-
spacy_id_to_pattern_id_mapping,
169-
)
175+
param_dependencies = []
176+
for sentence in doc.sents:
177+
sentence_dependency_matches = matcher(sentence)
178+
sentence_dependencies = extract_dependencies_from_docstring(
179+
parameter,
180+
parameters,
181+
sentence,
182+
sentence_dependency_matches,
183+
spacy_id_to_pattern_id_mapping,
184+
)
185+
if sentence_dependencies:
186+
param_dependencies.extend(sentence_dependencies)
170187
if param_dependencies:
171188
all_dependencies[function_name][parameter.name] = param_dependencies
172189

0 commit comments

Comments
 (0)