Skip to content

Commit

Permalink
Fix edge case and improve unit tests (#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Nov 1, 2023
1 parent 56ac5a4 commit 288b5f2
Show file tree
Hide file tree
Showing 40 changed files with 115 additions and 98 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ main_test.py
sample_test_data/
# Distribution / packaging
*.egg-info/

tests/TestData
12 changes: 10 additions & 2 deletions datasimulator/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
import rstr
import random

from cdislogging import get_logger

from .errors import UserError


logger = get_logger("data-simulator generator", log_level="info")

tried_words = False
WORDS = None

Expand All @@ -18,8 +23,11 @@ def generate_string_data(size=10, pattern=None, format=None):
try:
word_file = "/usr/share/dict/words"
WORDS = open(word_file).read().splitlines()
except Exception:
pass
logger.info("Using '/usr/share/dict/words' for string generation.")
except Exception as e:
logger.info(
f"Unable to use '/usr/share/dict/words' for string generation. Generating random strings instead. Details: {e}"
)
if pattern or not WORDS:
pattern = pattern or "^[0-9a-f]{" + str(size) + "}"
return rstr.xeger(pattern)
Expand Down
15 changes: 13 additions & 2 deletions datasimulator/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,25 @@ def generate_submission_order_path_to_node(self, node, cmc_node=None):
submission_order.append(cmc_node)
index = 0

project_node = None
while index < len(submission_order):
cur_node = submission_order[index]
index += 1
if not cur_node:
continue
for linked_node_dict in cur_node.required_links:
if linked_node_dict["node"] not in submission_order:
submission_order.append(linked_node_dict["node"])
if linked_node_dict["node"].name == "project":
project_node = linked_node_dict["node"]
continue
if linked_node_dict["node"] in submission_order:
# reorder to place the node at the beginning. eg. if this node is parent to 2 other
# nodes, we need it to be submitted before the 2 others, and not in-between.
submission_order.remove(linked_node_dict["node"])
submission_order.append(linked_node_dict["node"])

if project_node: # project should always be submitted first
submission_order.append(project_node)

submission_order.reverse()

return submission_order
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@


@pytest.fixture
def init_dictionary():
datadictionary = DataDictionary(root_dir=os.path.join(MOD_DIR, "schemas"))
def default_dictionary():
datadictionary = DataDictionary(root_dir=os.path.join(MOD_DIR, "schemas/default"))
dictionary.init(datadictionary)
1 change: 0 additions & 1 deletion tests/schema/schema.json

This file was deleted.

24 changes: 0 additions & 24 deletions tests/schemas/README.md

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ file_name:

file_size:
type: integer
term:
term:
$ref: "_terms.yaml#/file_size"

file_format:
Expand Down Expand Up @@ -105,7 +105,7 @@ data_file_error_type:

state:
term:
$ref: "_terms.yaml#/state"
$ref: "_terms.yaml#/state"
default: validated
downloadable:
- uploaded
Expand Down Expand Up @@ -136,7 +136,7 @@ state:

file_state:
term:
$ref: "_terms.yaml#/file_state"
$ref: "_terms.yaml#/file_state"
default: registered
enum:
- registered
Expand Down Expand Up @@ -177,7 +177,7 @@ data_file_properties:
file_size:
$ref: "#/file_size"
md5sum:
$ref: "#/md5sum"
$ref: "#/md5sum"
file_state:
$ref: "#/file_state"
object_id:
Expand All @@ -188,7 +188,7 @@ data_file_properties:
$ref: "#/data_file_error_type"
state_comment:
type: string
description: >
description: >
Optional comment about why the file is in the
current state, mainly for invalid state.
project_id:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ biomarker_name:
term: Biomarker Name
source: caDSR
cde_id: 5473
cde_version: 11.0
cde_version: 11.0
term_url: "https://cdebrowser.nci.nih.gov/CDEBrowser/search?elementDetails=9&FirstTimer=0&PageId=ElementDetailsGroup&publicId=5473&version=2.31"

biomarker_result:
Expand Down Expand Up @@ -618,7 +618,7 @@ dlco_ref_predictive_percent:
cde_version: 1.0
term_url: "https://cdebrowser.nci.nih.gov/CDEBrowser/search?elementDetails=9&FirstTimer=0&PageId=ElementDetailsGroup&publicId=2180255&version=1.0"

encoding:
encoding:
description: >
Version of ASCII encoding of quality values found in the file.
termDef:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
$schema: "http://json-schema.org/draft-04/schema#"

id: "acknowledgement"
title: Acknowledgement
title: Acknowledgement
type: object
namespace: http://gdc.nci.nih.gov
category: administrative
Expand All @@ -21,7 +21,7 @@ systemProperties:

links:
- name: projects
backref: acknowledgements
backref: acknowledgements
label: contribute_to
target_type: project
multiplicity: many_to_many
Expand Down
File renamed without changes.
File renamed without changes.
8 changes: 4 additions & 4 deletions tests/schemas/case.yaml → tests/schemas/default/case.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ program: '*'
project: '*'
description: >
The collection of all data related to a specific subject in the
context of a specific experiment.
context of a specific experiment.
additionalProperties: false
submittable: true
validators: null
Expand All @@ -22,7 +22,7 @@ systemProperties:
- state

links:
- name: experiments
- name: experiments
backref: cases
label: member_of
target_type: experiment
Expand All @@ -31,7 +31,7 @@ links:

required:
- submitter_id
- experiments
- experiments

uniqueKeys:
- [id]
Expand All @@ -56,7 +56,7 @@ properties:
disease_type:
description: "Name of the disease for the case."
type: string
experiments:
experiments:
$ref: "_definitions.yaml#/to_one"
project_id:
$ref: "_definitions.yaml#/project_id"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
$schema: "http://json-schema.org/draft-04/schema#"

id: "clinical_test"
title: Clinical Test
title: Clinical Test
type: object
namespace: http://gdc.nci.nih.gov
category: clinical
category: clinical
project: '*'
program: '*'
description: >
Metadata concerning any clinical tests used in relation to a case diagnosis.
description: >
Metadata concerning any clinical tests used in relation to a case diagnosis.
additionalProperties: false
submittable: true
submittable: true
validators: null

systemProperties:
Expand All @@ -21,9 +21,9 @@ systemProperties:
- state

links:
- name: cases
- name: cases
backref: clinical_tests
label: performed_for
label: performed_for
target_type: case
multiplicity: many_to_one
required: true
Expand Down Expand Up @@ -172,7 +172,7 @@ properties:
$ref: "_terms.yaml#/her2_erbb2_result_fish"
enum:
- Negative
- Not Performed
- Not Performed
- Positive
- Unknown

Expand Down Expand Up @@ -233,7 +233,7 @@ properties:
diagnoses:
$ref: "_definitions.yaml#/to_many"
project_id:
$ref: "_definitions.yaml#/project_id"
$ref: "_definitions.yaml#/project_id"
created_datetime:
$ref: "_definitions.yaml#/datetime"
updated_datetime:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ properties:

format:
description: >
The file format, physical medium, or dimensions of the resource. Examples of dimensions include size and duration. Recommended best practice is to use a controlled vocabulary such as the list of Internet Media Types [MIME] (http://www.iana.org/assignments/media-types/).
The file format, physical medium, or dimensions of the resource. Examples of dimensions include size and duration. Recommended best practice is to use a controlled vocabulary such as the list of Internet Media Types [MIME] (http://www.iana.org/assignments/media-types/).
type: string

language:
Expand Down Expand Up @@ -110,4 +110,3 @@ properties:

projects:
$ref: "_definitions.yaml#/to_one"

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,8 @@ properties:
term:
$ref: "_terms.yaml#/cause_of_death"
enum:
- Cancer Related
- Not Cancer Related
- Cancer Related
- Not Cancer Related
- Unknown

circumferential_resection_margin:
Expand Down Expand Up @@ -500,7 +500,7 @@ properties:
$ref: "_terms.yaml#/hiv_positive"
enum:
- "Yes"
- "No"
- "No"
- Unknown

hpv_positive_type:
Expand Down Expand Up @@ -559,7 +559,7 @@ properties:
lymph_nodes_positive:
term:
$ref: "_terms.yaml#/lymph_nodes_positive"
type: integer
type: integer

lymphatic_invasion_present:
term:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ links:
- name: projects
backref: experiments
label: performed_for
target_type: project
target_type: project
multiplicity: many_to_one
required: true

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ description: >
Any specifically defined piece of work that is undertaken or attempted to meet a single
requirement. (NCIt C47885)
additionalProperties: false
submittable: true
submittable: true
validators: null

systemProperties:
Expand All @@ -21,8 +21,8 @@ systemProperties:
- intended_release_date

required:
- code
- name
- code
- name
- programs

uniqueKeys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ required:

uniqueKeys:
- [ id ]
- [ project_id, submitter_id ]
- [ project_id, submitter_id ]

properties:
type:
enum: [ "publication" ]
enum: [ "publication" ]
id:
$ref: "_definitions.yaml#/UUID"
systemAlias: node_id
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ properties:
- Cytoplasmic
- Both
- None
- Not Determined
- Not Determined
frame_identifier:
description: "Name, number, or other identifier given to the frame of the slide from which this image was taken."
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ properties:
data_format:
term:
$ref: "_terms.yaml#/data_format"
type: string
type: string
experimental_strategy:
description: "Classification of the slide type with respect to its experimental use."
enum:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ properties:
data_type:
term:
$ref: "_terms.yaml#/data_type"
type: string
type: string
data_format:
term:
$ref: "_terms.yaml#/data_format"
type: string
type: string
experimental_strategy:
term:
$ref: "_terms.yaml#/experimental_strategy"
type: string
type: string
aliquots:
$ref: "_definitions.yaml#/to_one"
read_groups:
Expand Down
Loading

0 comments on commit 288b5f2

Please sign in to comment.