wellcometrust
diff --git a/‎tests/common.py‎
Lines changed: 7 additions & 9 deletions b/‎tests/common.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎tests/prodigy/common.py‎
Lines changed: 7 additions & 9 deletions b/‎tests/prodigy/common.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎tests/prodigy/test_numbered_reference_annotator.py‎
Lines changed: 32 additions & 13 deletions b/‎tests/prodigy/test_numbered_reference_annotator.py‎
Lines changed: 32 additions & 13 deletions
diff --git a/‎tests/prodigy/test_prodigy_to_tsv.py‎
Lines changed: 0 additions & 5 deletions b/‎tests/prodigy/test_prodigy_to_tsv.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎tests/prodigy/test_reach_to_prodigy.py‎
Lines changed: 11 additions & 8 deletions b/‎tests/prodigy/test_reach_to_prodigy.py‎
Lines changed: 11 additions & 8 deletions
@@ -5,13 +5,11 @@
 
 
 def get_path(p):
-    return os.path.join(
-        os.path.dirname(__file__),
-        p
-    )
+    return os.path.join(os.path.dirname(__file__), p)
 
-TEST_CFG = get_path('test_data/test_config.ini')
-TEST_JSONL = get_path('test_data/test_jsonl.jsonl')
-TEST_REFERENCES = get_path('test_data/test_references.txt')
-TEST_TSV_PREDICT = get_path('test_data/test_tsv_predict.tsv')
-TEST_TSV_TRAIN = get_path('test_data/test_tsv_train.tsv')
+
+TEST_CFG = get_path("test_data/test_config.ini")
+TEST_JSONL = get_path("test_data/test_jsonl.jsonl")
+TEST_REFERENCES = get_path("test_data/test_references.txt")
+TEST_TSV_PREDICT = get_path("test_data/test_tsv_predict.tsv")
+TEST_TSV_TRAIN = get_path("test_data/test_tsv_train.tsv")
@@ -5,13 +5,11 @@
 
 
 def get_path(p):
-    return os.path.join(
-        os.path.dirname(__file__),
-        p
-    )
+    return os.path.join(os.path.dirname(__file__), p)
 
-TEST_TOKENS = get_path('test_data/test_tokens_to_tsv_tokens.jsonl')
-TEST_SPANS = get_path('test_data/test_tokens_to_tsv_spans.jsonl')
-TEST_REF_TOKENS = get_path('test_data/test_reference_to_token_tokens.jsonl')
-TEST_REF_SPANS = get_path('test_data/test_reference_to_token_spans.jsonl')
-TEST_REF_EXPECTED_SPANS = get_path('test_data/test_reference_to_token_expected.jsonl')
+
+TEST_TOKENS = get_path("test_data/test_tokens_to_tsv_tokens.jsonl")
+TEST_SPANS = get_path("test_data/test_tokens_to_tsv_spans.jsonl")
+TEST_REF_TOKENS = get_path("test_data/test_reference_to_token_tokens.jsonl")
+TEST_REF_SPANS = get_path("test_data/test_reference_to_token_spans.jsonl")
+TEST_REF_EXPECTED_SPANS = get_path("test_data/test_reference_to_token_expected.jsonl")
@@ -3,7 +3,10 @@
 
 import pytest
 import spacy
-from deep_reference_parser.prodigy.numbered_reference_annotator import NumberedReferenceAnnotator
+from deep_reference_parser.prodigy.numbered_reference_annotator import (
+    NumberedReferenceAnnotator,
+)
+
 
 @pytest.fixture(scope="function")
 def nra():
@@ -111,20 +114,30 @@ def test_numbered_reference_splitter(nra):
             {"text": "\n", "start": 470, "end": 471, "id": 92},
             {"text": "3", "start": 471, "end": 472, "id": 92},
             {"text": ".", "start": 472, "end": 473, "id": 92},
-    ]
+        ],
     }
 
     docs = list(nra.run([numbered_reference]))
     text = docs[0]["text"]
     spans = docs[0]["spans"]
-    ref_1 = text[spans[0]["start"]:spans[0]["end"]]
-    ref_2 = text[spans[1]["start"]:spans[1]["end"]]
-    ref_3 = text[spans[2]["start"]:spans[2]["end"]]
+    ref_1 = text[spans[0]["start"] : spans[0]["end"]]
+    ref_2 = text[spans[1]["start"] : spans[1]["end"]]
+    ref_3 = text[spans[2]["start"] : spans[2]["end"]]
 
     assert len(spans) == 3
-    assert ref_1 == "Global update on the health sector response to HIV, 2014. Geneva: World Health Organization; \n 2014:168."
-    assert ref_2.strip() == "WHO, UNICEF, UNAIDS. Global update on HIV treatment 2013: results, impact and \n opportunities. Geneva: World Health Organization; 2013:126."
-    assert ref_3.strip() == "Consolidated guidelines on the use of antiretroviral drugs for treating and preventing HIV infection: \n recommendations for a public health approach. Geneva: World Health Organization; 2013:272."
+    assert (
+        ref_1
+        == "Global update on the health sector response to HIV, 2014. Geneva: World Health Organization; \n 2014:168."
+    )
+    assert (
+        ref_2.strip()
+        == "WHO, UNICEF, UNAIDS. Global update on HIV treatment 2013: results, impact and \n opportunities. Geneva: World Health Organization; 2013:126."
+    )
+    assert (
+        ref_3.strip()
+        == "Consolidated guidelines on the use of antiretroviral drugs for treating and preventing HIV infection: \n recommendations for a public health approach. Geneva: World Health Organization; 2013:272."
+    )
+
 
 def test_numbered_reference_splitter_line_endings(nra):
     """
@@ -196,15 +209,21 @@ def test_numbered_reference_splitter_line_endings(nra):
             {"text": "\n\n", "start": 261, "end": 263, "id": 58},
             {"text": "3", "start": 262, "end": 264, "id": 59},
             {"text": ".", "start": 263, "end": 265, "id": 60},
-    ]
+        ],
     }
 
     docs = list(nra.run([numbered_reference]))
     text = docs[0]["text"]
     spans = docs[0]["spans"]
-    ref_1 = text[spans[0]["start"]:spans[0]["end"]]
-    ref_2 = text[spans[1]["start"]:spans[1]["end"]]
+    ref_1 = text[spans[0]["start"] : spans[0]["end"]]
+    ref_2 = text[spans[1]["start"] : spans[1]["end"]]
 
     assert len(spans) == 2
-    assert ref_1.strip() == "Global update on the health sector response to HIV, 2014. Geneva: World Health Organization; \n 2014:168."
-    assert ref_2.strip() == "WHO, UNICEF, UNAIDS. Global update on HIV treatment 2013: results, impact and \n opportunities. Geneva: World Health Organization; 2013:126"
+    assert (
+        ref_1.strip()
+        == "Global update on the health sector response to HIV, 2014. Geneva: World Health Organization; \n 2014:168."
+    )
+    assert (
+        ref_2.strip()
+        == "WHO, UNICEF, UNAIDS. Global update on HIV treatment 2013: results, impact and \n opportunities. Geneva: World Health Organization; 2013:126"
+    )
@@ -740,9 +740,4 @@ def test_reference_spans_real_example(doc):
 
     import pprint
 
-    #pp = pprint.PrettyPrinter()
-    #pp.pprint(actual)
-    #for token, span in zip(doc["tokens"], doc["spans"]):
-    #    print({token["text"]:span["label"]})
-
     assert actual == expected
@@ -4,37 +4,40 @@
 import pytest
 from deep_reference_parser.prodigy.reach_to_prodigy import ReachToProdigy
 
+
 @pytest.fixture(scope="function")
 def stp():
     ref_sections = [{}, {}, {}]
     return ReachToProdigy(ref_sections)
 
+
 def test_combine_n_rows(stp):
 
     doc = list(range(100, 200))
     out = stp.combine_n_rows(doc, n=5, join_char=" ")
 
-    last_in_doc = doc[len(doc) -1]
+    last_in_doc = doc[len(doc) - 1]
     last_in_out = int(out[-1].split(" ")[-1])
 
     assert last_in_doc == last_in_out
 
-    assert out[0] == '100 101 102 103 104'
-    assert out[-2] == '190 191 192 193 194'
-    assert out[-1] == '195 196 197 198 199'
+    assert out[0] == "100 101 102 103 104"
+    assert out[-2] == "190 191 192 193 194"
+    assert out[-1] == "195 196 197 198 199"
+
 
 def test_combine_n_rows_uneven_split(stp):
 
     doc = list(range(100, 200))
     out = stp.combine_n_rows(doc, n=7, join_char=" ")
 
-    last_in_doc = doc[len(doc) -1]
+    last_in_doc = doc[len(doc) - 1]
     last_in_out = int(out[-1].split(" ")[-1])
 
     assert last_in_doc == last_in_out
     assert len(out[-1].split(" ")) == 2
     assert len(out[-2].split(" ")) == 7
 
-    assert out[0] == '100 101 102 103 104 105 106'
-    assert out[-2] == '191 192 193 194 195 196 197'
-    assert out[-1] == '198 199'
+    assert out[0] == "100 101 102 103 104 105 106"
+    assert out[-2] == "191 192 193 194 195 196 197"
+    assert out[-1] == "198 199"