@@ -833,6 +833,30 @@ def create_and_run_process(
833833
834834 return document_collection .get_process (process_name = process_name )
835835
836+ @experimental_api
837+ def evaluate_against (
838+ self ,
839+ reference_process : "Process" ,
840+ process_name : str ,
841+ evaluation_configurations : List ["EvaluationConfiguration" ],
842+ number_of_pipeline_instances : int = 1 ,
843+ ) -> "Process" :
844+ """
845+ HIGHLY EXPERIMENTAL API - may soon change or disappear.
846+
847+ Starts the evaluation of this process in comparison to the given one as a new process.
848+ Returns the new evaluation process.
849+ """
850+ # noinspection PyProtectedMember
851+ return self .project .client ._evaluate (
852+ self .project ,
853+ self ,
854+ reference_process ,
855+ process_name ,
856+ evaluation_configurations ,
857+ number_of_pipeline_instances ,
858+ )
859+
836860 @experimental_api
837861 def rerun (self ):
838862 """
@@ -906,6 +930,7 @@ def export_text_analysis_to_cas(self, document_name: str) -> Cas:
906930 "Text analysis export is not supported for platform version 5.x, it is only supported from 6.x onwards."
907931 )
908932 document_collection = self .project .get_document_collection (self .document_source_name )
933+ # noinspection PyProtectedMember
909934 document_identifier = document_collection ._get_document_identifier (document_name )
910935 # noinspection PyProtectedMember
911936 type_system = load_typesystem (
@@ -1404,6 +1429,80 @@ def upload_resources(
14041429 return self .client ._upload_resources (zip_file , project_name = self .name )["files" ]
14051430
14061431
1432+ class EvaluationConfiguration :
1433+ def __init__ (
1434+ self ,
1435+ comparison_annotation_type_name : str ,
1436+ features_to_compare : List [str ],
1437+ reference_annotation_type_name : str = None ,
1438+ ):
1439+ """
1440+ Configuration for the evaluation of one annotation type
1441+
1442+ :param comparison_annotation_type_name: fully qualified name of the annotation that will be compared;
1443+ can also be a rule of format fully_qualified_name[feature1=value1&&feature2=value2...]
1444+ can be extended by another rule of the same type, meaning that an annotation must be contained, e.g:
1445+ fully_qualified_name[feature1=value1&&feature2=value2...] >
1446+ fully_qualified_name[feature1=value1&&feature2=value2...]
1447+ :param reference_annotation_type_name: fully qualified name of the annotation in the reference text analysis
1448+ result that other annotations should be compared to; can also be a rule (see annotation_type_name above).
1449+ If not given, the same annotation as the comparison annotation is used
1450+ :param features_to_compare: The list of features that should be used in the comparison, e.g., begin, end,
1451+ uniqueID.
1452+ """
1453+ self .partialMatchCriteria : Union [str , None ] = None
1454+ self .partialMatchArguments : List [str ] = []
1455+ # Features to be excluded from deep feature structure comparisons. These are regular
1456+ # expressions which match against the fully qualified feature name (type:feature).
1457+ self .excludeFeaturePatterns : List [str ] = []
1458+ # Regular expression specifying character sequences that should be ignored when
1459+ # values of string features are compared
1460+ self .stringFeatureComparisonIgnorePattern = None
1461+ self .compareAnnotationRule = comparison_annotation_type_name
1462+ if reference_annotation_type_name :
1463+ self .goldAnnotationRule = reference_annotation_type_name
1464+ if reference_annotation_type_name is None :
1465+ self .goldAnnotationRule = comparison_annotation_type_name
1466+ self .featuresToBeCompared = features_to_compare
1467+ self .allowMultipleMatches = False
1468+ self .stringFeatureComparisonIgnoreCase = False
1469+ self .forceComparisonWhenGoldstandardMissing = False
1470+
1471+ def add_feature (self , feature_name : str ) -> "EvaluationConfiguration" :
1472+ self .featuresToBeCompared .append (feature_name )
1473+ return self
1474+
1475+ def use_overlap_partial_match (self ) -> "EvaluationConfiguration" :
1476+ """
1477+ Overlapping annotations are used to calculate partial positives. Normally, these will replace a FalsePositive
1478+ or FalseNegative if a partial match is identified.
1479+ """
1480+ self .partialMatchCriteria = "OVERLAP_MATCH"
1481+ return self
1482+
1483+ def use_range_variance_partial_match (
1484+ self , range_variance : int
1485+ ) -> "EvaluationConfiguration" :
1486+ """
1487+ Annotations that are offset by the given variance are used to calculate partial positives.
1488+ Normally, these will replace a FalsePositive or FalseNegative if a partial match is identified.
1489+ """
1490+ self .partialMatchCriteria = "RANGE_VARIANCE_MATCH"
1491+ self .partialMatchArguments = [str (range_variance )]
1492+ return self
1493+
1494+ def use_enclosing_annotation_partial_match (
1495+ self , enclosing_annotation_type_name : str
1496+ ) -> "EvaluationConfiguration" :
1497+ """
1498+ Annotations that are covered by the given annotation type are used to calculate partial positives.
1499+ Normally, these will replace a FalsePositive or FalseNegative if a partial match is identified.
1500+ """
1501+ self .partialMatchCriteria = "ENCLOSING_ANNOTATION_MATCH"
1502+ self .partialMatchArguments = [enclosing_annotation_type_name ]
1503+ return self
1504+
1505+
14071506class Client :
14081507 def __init__ (
14091508 self ,
@@ -1414,6 +1513,8 @@ def __init__(
14141513 username : str = None ,
14151514 password : str = None ,
14161515 timeout : float = None ,
1516+ polling_timeout : int = 30 ,
1517+ poll_delay : int = 5 ,
14171518 ):
14181519 """
14191520 A Client is the base object for all calls within the Averbis Python API.
@@ -1431,8 +1532,13 @@ def __init__(
14311532 :param username: If no API token is provided, then a username can be provided together with a password to generate a new API token
14321533 :param password: If no API token is provided, then a username can be provided together with a password to generate a new API token
14331534 :param timeout: An optional global timeout (in seconds) specifiying how long the Client is waiting for a server response (default=None).
1535+
1536+ :param polling_timeout: Timeout (in seconds) after which polling for specific status requests is no longer tried.
1537+ :param poll_delay: Time (in seconds) between requests to server for specific status.
14341538 """
14351539
1540+ self ._polling_timeout = polling_timeout
1541+ self ._poll_delay = poll_delay
14361542 self .__logger = logging .getLogger (self .__class__ .__module__ + "." + self .__class__ .__name__ )
14371543 self ._api_token = api_token
14381544 self ._verify_ssl = verify_ssl
@@ -1723,16 +1829,32 @@ def get_build_info(self) -> dict:
17231829 self ._build_info = response ["payload" ]
17241830 return self ._build_info
17251831
1726- def create_project (self , name : str , description : str = "" ) -> Project :
1832+ def create_project (self , name : str , description : str = "" , exist_ok = False ) -> Project :
17271833 """
17281834 Creates a new project.
17291835
1836+ :param name: The name of the new project
1837+ :param description: The description of the new project
1838+ :param exist_ok: If exist_ok is False (the default), a ValueError is raised if the project already exists. If
1839+ exist_ok is True and the project exists, then the existing project is returned.
17301840 :return: The project.
17311841 """
1732- response = self .__request_with_json_response (
1733- "post" , f"/v1/projects" , params = {"name" : name , "description" : description }
1734- )
1735- return Project (self , response ["payload" ]["name" ])
1842+
1843+ if self .exists_project (name ):
1844+ if exist_ok :
1845+ project = self .get_project (name )
1846+ else :
1847+ raise ValueError (
1848+ f"This project '{ name } ' already exists. You can pass the flag exist_ok=True to create_project to obtain the existing project."
1849+ )
1850+ else :
1851+ response = self .__request_with_json_response (
1852+ "post" ,
1853+ f"/v1/projects" ,
1854+ params = {"name" : name , "description" : description },
1855+ )
1856+ project = Project (self , response ["payload" ]["name" ])
1857+ return project
17361858
17371859 def get_project (self , name : str ) -> Project :
17381860 """
@@ -1779,15 +1901,18 @@ def upload_resources(
17791901 zip_file = self ._create_zip_io (source , path_in_zip )
17801902 return self ._upload_resources (zip_file )["files" ]
17811903
1782- @experimental_api
17831904 def list_projects (self ) -> dict :
17841905 """
1785- HIGHLY EXPERIMENTAL API - may soon change or disappear.
1786-
17871906 Returns a list of the projects.
17881907 """
17891908
1790- response = self .__request_with_json_response ("get" , f"/experimental/projects" )
1909+ try :
1910+ response = self .__request_with_json_response ("get" , f"/v1/projects" )
1911+ except RequestException as e :
1912+ # in HD 6 below 6.11.0 the following url is used
1913+ if '405' not in e .args [0 ]:
1914+ raise e
1915+ response = self .__request_with_json_response ("get" , f"/experimental/projects" )
17911916 return response ["payload" ]
17921917
17931918 @experimental_api
@@ -2249,7 +2374,7 @@ def _export_analysis_result_to_xmi(
22492374
22502375 process_name = self .__process_name (process )
22512376
2252- if self . _is_higher_equal_version ( build_version , 6 , 7 ) :
2377+ try :
22532378 return str (
22542379 self .__request_with_bytes_response (
22552380 "get" ,
@@ -2263,25 +2388,20 @@ def _export_analysis_result_to_xmi(
22632388 ENCODING_UTF_8 ,
22642389 )
22652390
2266- return str (
2267- self .__request_with_bytes_response (
2268- "get" ,
2269- f"/experimental/textanalysis/projects/{ project } /documentCollections/{ collection_name } "
2270- f"/documents/{ document_id } /processes/{ process_name } /exportTextAnalysisResult" ,
2271- headers = {
2272- HEADER_ACCEPT : MEDIA_TYPE_APPLICATION_XMI ,
2273- },
2274- ),
2275- ENCODING_UTF_8 ,
2276- )
2391+ except RequestException as e :
2392+ # in HD 6 below version 6.7 the endpoint is called with identifiers instead
2393+ return str (
2394+ self .__request_with_bytes_response (
2395+ "get" ,
2396+ f"/experimental/textanalysis/projects/{ project } /documentCollections/{ collection_name } "
2397+ f"/documents/{ document_id } /processes/{ process_name } /exportTextAnalysisResult" ,
2398+ headers = {
2399+ HEADER_ACCEPT : MEDIA_TYPE_APPLICATION_XMI ,
2400+ },
2401+ ),
2402+ ENCODING_UTF_8 ,
2403+ )
22772404
2278- @staticmethod
2279- def _is_higher_equal_version (version : str , compare_major : int , compare_minor : int ) -> bool :
2280- version_parts = version .split ("." )
2281- major = int (version_parts [0 ])
2282- return major > compare_major or (
2283- major == compare_major and int (version_parts [1 ]) >= compare_minor
2284- )
22852405
22862406 @experimental_api
22872407 def _export_analysis_result_typesystem (
@@ -2870,3 +2990,58 @@ def _create_cas_file_request_parts(file_param_name, filename, source, mime_type,
28702990 )
28712991
28722992 return files
2993+
2994+ @experimental_api
2995+ def _evaluate (
2996+ self ,
2997+ project : Project ,
2998+ comparison_process : Process ,
2999+ reference_process : Process ,
3000+ process_name : str ,
3001+ evaluation_configurations : List ["EvaluationConfiguration" ],
3002+ number_of_pipeline_instances : int ,
3003+ ) -> Process :
3004+ """
3005+ HIGHLY EXPERIMENTAL API - may soon change or disappear.
3006+
3007+ Use {process}.evaluate_against() instead.
3008+ """
3009+ creation_response = self .__request_with_json_response (
3010+ "post" ,
3011+ f"/experimental/textanalysis/projects/{ project .name } /documentCollections/{ comparison_process .document_source_name } /evaluationProcesses" ,
3012+ params = {
3013+ "comparisonProcessName" : comparison_process .name ,
3014+ "referenceProcessName" : reference_process .name ,
3015+ "processName" : process_name ,
3016+ "numberOfPipelineInstances" : number_of_pipeline_instances ,
3017+ "referenceDocumentCollectionName" : reference_process .document_source_name ,
3018+ },
3019+ json = [vars (evaluation_configuration ) for evaluation_configuration in evaluation_configurations ],
3020+ headers = {
3021+ HEADER_CONTENT_TYPE : MEDIA_TYPE_APPLICATION_JSON ,
3022+ HEADER_ACCEPT : MEDIA_TYPE_APPLICATION_JSON ,
3023+ },
3024+ )
3025+ if creation_response ["errorMessages" ]:
3026+ raise Exception (
3027+ f"Error during evaluation process creation { creation_response ['errorMessages' ]} "
3028+ )
3029+ self ._ensure_process_created (project , process_name )
3030+
3031+ return Process (
3032+ project = project ,
3033+ name = process_name ,
3034+ document_source_name = comparison_process .document_source_name ,
3035+ )
3036+
3037+ def _ensure_process_created (self , project : Project , process_name : str ):
3038+ processes = self ._list_processes (project )
3039+ total_time_slept = 0
3040+ while all (p .name != process_name for p in processes ):
3041+ if total_time_slept > self ._polling_timeout :
3042+ raise OperationTimeoutError (
3043+ f"Process not found for ${ total_time_slept } "
3044+ )
3045+ sleep (self ._poll_delay )
3046+ total_time_slept += self ._poll_delay
3047+ processes = self ._list_processes (project )
0 commit comments