Skip to content

Commit fdb9304

Browse files
committed
Merge branch 'dev' into main
2 parents 9d4486e + 1411371 commit fdb9304

File tree

7 files changed

+338
-79
lines changed

7 files changed

+338
-79
lines changed

averbis/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2021 Averbis GmbH.
2+
# Copyright (c) 2022 Averbis GmbH.
33
#
44
# This file is part of Averbis Python API.
55
# See https://www.averbis.com for further info.
@@ -17,14 +17,16 @@
1717
# limitations under the License.
1818
#
1919
#
20-
from .core import Client, Project, Pipeline, Result, Terminology, DocumentCollection, Pear, Process
20+
from .core import Client, Project, Pipeline, Result, Terminology, DocumentCollection, Pear, Process, \
21+
EvaluationConfiguration
2122

2223
__all__ = [
2324
"Client",
2425
"Project",
2526
"Pipeline",
2627
"Result",
2728
"Terminology",
29+
"EvaluationConfiguration",
2830
"DocumentCollection",
2931
"Pear",
3032
"Process",

averbis/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717
# limitations under the License.
1818
#
1919
#
20-
__version__ = "0.8.0"
20+
__version__ = "0.9.0"

averbis/core/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
DocumentCollection,
3232
Pear,
3333
Process,
34+
EvaluationConfiguration,
3435
DOCUMENT_IMPORTER_CAS,
3536
DOCUMENT_IMPORTER_SOLR,
3637
DOCUMENT_IMPORTER_TEXT,
@@ -62,6 +63,7 @@
6263
"DocumentCollection",
6364
"Pear",
6465
"Process",
66+
"EvaluationConfiguration",
6567
"DOCUMENT_IMPORTER_CAS",
6668
"DOCUMENT_IMPORTER_SOLR",
6769
"DOCUMENT_IMPORTER_TEXT",

averbis/core/_rest_client.py

Lines changed: 203 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,30 @@ def create_and_run_process(
833833

834834
return document_collection.get_process(process_name=process_name)
835835

836+
@experimental_api
837+
def evaluate_against(
838+
self,
839+
reference_process: "Process",
840+
process_name: str,
841+
evaluation_configurations: List["EvaluationConfiguration"],
842+
number_of_pipeline_instances: int = 1,
843+
) -> "Process":
844+
"""
845+
HIGHLY EXPERIMENTAL API - may soon change or disappear.
846+
847+
Starts the evaluation of this process in comparison to the given one as a new process.
848+
Returns the new evaluation process.
849+
"""
850+
# noinspection PyProtectedMember
851+
return self.project.client._evaluate(
852+
self.project,
853+
self,
854+
reference_process,
855+
process_name,
856+
evaluation_configurations,
857+
number_of_pipeline_instances,
858+
)
859+
836860
@experimental_api
837861
def rerun(self):
838862
"""
@@ -906,6 +930,7 @@ def export_text_analysis_to_cas(self, document_name: str) -> Cas:
906930
"Text analysis export is not supported for platform version 5.x, it is only supported from 6.x onwards."
907931
)
908932
document_collection = self.project.get_document_collection(self.document_source_name)
933+
# noinspection PyProtectedMember
909934
document_identifier = document_collection._get_document_identifier(document_name)
910935
# noinspection PyProtectedMember
911936
type_system = load_typesystem(
@@ -1404,6 +1429,80 @@ def upload_resources(
14041429
return self.client._upload_resources(zip_file, project_name=self.name)["files"]
14051430

14061431

1432+
class EvaluationConfiguration:
1433+
def __init__(
1434+
self,
1435+
comparison_annotation_type_name: str,
1436+
features_to_compare: List[str],
1437+
reference_annotation_type_name: str = None,
1438+
):
1439+
"""
1440+
Configuration for the evaluation of one annotation type
1441+
1442+
:param comparison_annotation_type_name: fully qualified name of the annotation that will be compared;
1443+
can also be a rule of format fully_qualified_name[feature1=value1&&feature2=value2...]
1444+
can be extended by another rule of the same type, meaning that an annotation must be contained, e.g:
1445+
fully_qualified_name[feature1=value1&&feature2=value2...] >
1446+
fully_qualified_name[feature1=value1&&feature2=value2...]
1447+
:param reference_annotation_type_name: fully qualified name of the annotation in the reference text analysis
1448+
result that other annotations should be compared to; can also be a rule (see annotation_type_name above).
1449+
If not given, the same annotation as the comparison annotation is used
1450+
:param features_to_compare: The list of features that should be used in the comparison, e.g., begin, end,
1451+
uniqueID.
1452+
"""
1453+
self.partialMatchCriteria: Union[str, None] = None
1454+
self.partialMatchArguments: List[str] = []
1455+
# Features to be excluded from deep feature structure comparisons. These are regular
1456+
# expressions which match against the fully qualified feature name (type:feature).
1457+
self.excludeFeaturePatterns: List[str] = []
1458+
# Regular expression specifying character sequences that should be ignored when
1459+
# values of string features are compared
1460+
self.stringFeatureComparisonIgnorePattern = None
1461+
self.compareAnnotationRule = comparison_annotation_type_name
1462+
if reference_annotation_type_name:
1463+
self.goldAnnotationRule = reference_annotation_type_name
1464+
if reference_annotation_type_name is None:
1465+
self.goldAnnotationRule = comparison_annotation_type_name
1466+
self.featuresToBeCompared = features_to_compare
1467+
self.allowMultipleMatches = False
1468+
self.stringFeatureComparisonIgnoreCase = False
1469+
self.forceComparisonWhenGoldstandardMissing = False
1470+
1471+
def add_feature(self, feature_name: str) -> "EvaluationConfiguration":
1472+
self.featuresToBeCompared.append(feature_name)
1473+
return self
1474+
1475+
def use_overlap_partial_match(self) -> "EvaluationConfiguration":
1476+
"""
1477+
Overlapping annotations are used to calculate partial positives. Normally, these will replace a FalsePositive
1478+
or FalseNegative if a partial match is identified.
1479+
"""
1480+
self.partialMatchCriteria = "OVERLAP_MATCH"
1481+
return self
1482+
1483+
def use_range_variance_partial_match(
1484+
self, range_variance: int
1485+
) -> "EvaluationConfiguration":
1486+
"""
1487+
Annotations that are offset by the given variance are used to calculate partial positives.
1488+
Normally, these will replace a FalsePositive or FalseNegative if a partial match is identified.
1489+
"""
1490+
self.partialMatchCriteria = "RANGE_VARIANCE_MATCH"
1491+
self.partialMatchArguments = [str(range_variance)]
1492+
return self
1493+
1494+
def use_enclosing_annotation_partial_match(
1495+
self, enclosing_annotation_type_name: str
1496+
) -> "EvaluationConfiguration":
1497+
"""
1498+
Annotations that are covered by the given annotation type are used to calculate partial positives.
1499+
Normally, these will replace a FalsePositive or FalseNegative if a partial match is identified.
1500+
"""
1501+
self.partialMatchCriteria = "ENCLOSING_ANNOTATION_MATCH"
1502+
self.partialMatchArguments = [enclosing_annotation_type_name]
1503+
return self
1504+
1505+
14071506
class Client:
14081507
def __init__(
14091508
self,
@@ -1414,6 +1513,8 @@ def __init__(
14141513
username: str = None,
14151514
password: str = None,
14161515
timeout: float = None,
1516+
polling_timeout: int = 30,
1517+
poll_delay: int = 5,
14171518
):
14181519
"""
14191520
A Client is the base object for all calls within the Averbis Python API.
@@ -1431,8 +1532,13 @@ def __init__(
14311532
:param username: If no API token is provided, then a username can be provided together with a password to generate a new API token
14321533
:param password: If no API token is provided, then a username can be provided together with a password to generate a new API token
14331534
:param timeout: An optional global timeout (in seconds) specifiying how long the Client is waiting for a server response (default=None).
1535+
1536+
:param polling_timeout: Timeout (in seconds) after which polling for specific status requests is no longer tried.
1537+
:param poll_delay: Time (in seconds) between requests to server for specific status.
14341538
"""
14351539

1540+
self._polling_timeout = polling_timeout
1541+
self._poll_delay = poll_delay
14361542
self.__logger = logging.getLogger(self.__class__.__module__ + "." + self.__class__.__name__)
14371543
self._api_token = api_token
14381544
self._verify_ssl = verify_ssl
@@ -1723,16 +1829,32 @@ def get_build_info(self) -> dict:
17231829
self._build_info = response["payload"]
17241830
return self._build_info
17251831

1726-
def create_project(self, name: str, description: str = "") -> Project:
1832+
def create_project(self, name: str, description: str = "", exist_ok=False) -> Project:
17271833
"""
17281834
Creates a new project.
17291835
1836+
:param name: The name of the new project
1837+
:param description: The description of the new project
1838+
:param exist_ok: If exist_ok is False (the default), a ValueError is raised if the project already exists. If
1839+
exist_ok is True and the project exists, then the existing project is returned.
17301840
:return: The project.
17311841
"""
1732-
response = self.__request_with_json_response(
1733-
"post", f"/v1/projects", params={"name": name, "description": description}
1734-
)
1735-
return Project(self, response["payload"]["name"])
1842+
1843+
if self.exists_project(name):
1844+
if exist_ok:
1845+
project = self.get_project(name)
1846+
else:
1847+
raise ValueError(
1848+
f"This project '{name}' already exists. You can pass the flag exist_ok=True to create_project to obtain the existing project."
1849+
)
1850+
else:
1851+
response = self.__request_with_json_response(
1852+
"post",
1853+
f"/v1/projects",
1854+
params={"name": name, "description": description},
1855+
)
1856+
project = Project(self, response["payload"]["name"])
1857+
return project
17361858

17371859
def get_project(self, name: str) -> Project:
17381860
"""
@@ -1779,15 +1901,18 @@ def upload_resources(
17791901
zip_file = self._create_zip_io(source, path_in_zip)
17801902
return self._upload_resources(zip_file)["files"]
17811903

1782-
@experimental_api
17831904
def list_projects(self) -> dict:
17841905
"""
1785-
HIGHLY EXPERIMENTAL API - may soon change or disappear.
1786-
17871906
Returns a list of the projects.
17881907
"""
17891908

1790-
response = self.__request_with_json_response("get", f"/experimental/projects")
1909+
try:
1910+
response = self.__request_with_json_response("get", f"/v1/projects")
1911+
except RequestException as e:
1912+
# in HD 6 below 6.11.0 the following url is used
1913+
if '405' not in e.args[0]:
1914+
raise e
1915+
response = self.__request_with_json_response("get", f"/experimental/projects")
17911916
return response["payload"]
17921917

17931918
@experimental_api
@@ -2249,7 +2374,7 @@ def _export_analysis_result_to_xmi(
22492374

22502375
process_name = self.__process_name(process)
22512376

2252-
if self._is_higher_equal_version(build_version, 6, 7):
2377+
try:
22532378
return str(
22542379
self.__request_with_bytes_response(
22552380
"get",
@@ -2263,25 +2388,20 @@ def _export_analysis_result_to_xmi(
22632388
ENCODING_UTF_8,
22642389
)
22652390

2266-
return str(
2267-
self.__request_with_bytes_response(
2268-
"get",
2269-
f"/experimental/textanalysis/projects/{project}/documentCollections/{collection_name}"
2270-
f"/documents/{document_id}/processes/{process_name}/exportTextAnalysisResult",
2271-
headers={
2272-
HEADER_ACCEPT: MEDIA_TYPE_APPLICATION_XMI,
2273-
},
2274-
),
2275-
ENCODING_UTF_8,
2276-
)
2391+
except RequestException as e:
2392+
# in HD 6 below version 6.7 the endpoint is called with identifiers instead
2393+
return str(
2394+
self.__request_with_bytes_response(
2395+
"get",
2396+
f"/experimental/textanalysis/projects/{project}/documentCollections/{collection_name}"
2397+
f"/documents/{document_id}/processes/{process_name}/exportTextAnalysisResult",
2398+
headers={
2399+
HEADER_ACCEPT: MEDIA_TYPE_APPLICATION_XMI,
2400+
},
2401+
),
2402+
ENCODING_UTF_8,
2403+
)
22772404

2278-
@staticmethod
2279-
def _is_higher_equal_version(version: str, compare_major: int, compare_minor: int) -> bool:
2280-
version_parts = version.split(".")
2281-
major = int(version_parts[0])
2282-
return major > compare_major or (
2283-
major == compare_major and int(version_parts[1]) >= compare_minor
2284-
)
22852405

22862406
@experimental_api
22872407
def _export_analysis_result_typesystem(
@@ -2870,3 +2990,58 @@ def _create_cas_file_request_parts(file_param_name, filename, source, mime_type,
28702990
)
28712991

28722992
return files
2993+
2994+
@experimental_api
2995+
def _evaluate(
2996+
self,
2997+
project: Project,
2998+
comparison_process: Process,
2999+
reference_process: Process,
3000+
process_name: str,
3001+
evaluation_configurations: List["EvaluationConfiguration"],
3002+
number_of_pipeline_instances: int,
3003+
) -> Process:
3004+
"""
3005+
HIGHLY EXPERIMENTAL API - may soon change or disappear.
3006+
3007+
Use {process}.evaluate_against() instead.
3008+
"""
3009+
creation_response = self.__request_with_json_response(
3010+
"post",
3011+
f"/experimental/textanalysis/projects/{project.name}/documentCollections/{comparison_process.document_source_name}/evaluationProcesses",
3012+
params={
3013+
"comparisonProcessName": comparison_process.name,
3014+
"referenceProcessName": reference_process.name,
3015+
"processName": process_name,
3016+
"numberOfPipelineInstances": number_of_pipeline_instances,
3017+
"referenceDocumentCollectionName": reference_process.document_source_name,
3018+
},
3019+
json=[vars(evaluation_configuration) for evaluation_configuration in evaluation_configurations],
3020+
headers={
3021+
HEADER_CONTENT_TYPE: MEDIA_TYPE_APPLICATION_JSON,
3022+
HEADER_ACCEPT: MEDIA_TYPE_APPLICATION_JSON,
3023+
},
3024+
)
3025+
if creation_response["errorMessages"]:
3026+
raise Exception(
3027+
f"Error during evaluation process creation {creation_response['errorMessages']}"
3028+
)
3029+
self._ensure_process_created(project, process_name)
3030+
3031+
return Process(
3032+
project=project,
3033+
name=process_name,
3034+
document_source_name=comparison_process.document_source_name,
3035+
)
3036+
3037+
def _ensure_process_created(self, project: Project, process_name: str):
3038+
processes = self._list_processes(project)
3039+
total_time_slept = 0
3040+
while all(p.name != process_name for p in processes):
3041+
if total_time_slept > self._polling_timeout:
3042+
raise OperationTimeoutError(
3043+
f"Process not found for ${total_time_slept}"
3044+
)
3045+
sleep(self._poll_delay)
3046+
total_time_slept += self._poll_delay
3047+
processes = self._list_processes(project)

tests/fixtures.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ def requests_mock_id6_7(requests_mock):
8080
)
8181

8282

83+
@pytest.fixture()
84+
def requests_mock_id6_11(requests_mock):
85+
requests_mock.get(
86+
f"{URL_BASE_ID + '/rest/v1'}/buildInfo",
87+
headers={"Content-Type": "application/json"},
88+
json={"payload": {"specVersion": "6.11.0", "buildNumber": ""}, "errorMessages": []},
89+
)
90+
91+
8392
## Different clients based on the above platforms
8493

8594
# Tests that should work for all platform versions
@@ -105,7 +114,13 @@ def client_version_6(requests_mock_id6):
105114
return Client(URL_BASE_ID, api_token=TEST_API_TOKEN)
106115

107116

108-
# Tests that should work in platform version 6.7
117+
# Tests that should work in version 6.7
109118
@pytest.fixture()
110119
def client_version_6_7(requests_mock_id6_7):
111120
return Client(URL_BASE_ID, api_token=TEST_API_TOKEN)
121+
122+
123+
# Tests that should work in version 6.11
124+
@pytest.fixture()
125+
def client_version_6_11(requests_mock_id6_11):
126+
return Client(URL_BASE_ID, api_token=TEST_API_TOKEN)

0 commit comments

Comments
 (0)