Skip to content

Commit

Permalink
Merge pull request #1178 from mild-blue/fix-generate-patients-for-pyt…
Browse files Browse the repository at this point in the history
…hon-3_11

Generate new patients for large DB + fix generation for python 3.11
  • Loading branch information
abragtim authored May 9, 2023
2 parents 22613b4 + e804ada commit 00a2c8b
Show file tree
Hide file tree
Showing 11 changed files with 6,877 additions and 2,844 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.16_amd64.deb
You need to have docker installed. And you need to have activated environment from the previous step.

After that simply run `make setup-small-non-empty-db` or `make setup-non-empty-db` for larger one.
If you want to remove the majority of patients with errors that often do not occur in real data,
then use the script [remove_inconsistent_patients.sql](local_testing_utilities/remove_inconsistent_patients.sql)

This runs postgres database in docker that has already some data inside.

Expand Down
5 changes: 3 additions & 2 deletions local_testing_utilities/generate_patients.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@

BRIDGING_PROBABILITY = 0.8
NON_DIRECTED_PROBABILITY = 0.9
GENERATED_TXM_EVENT_NAME = 'high_res_example_data'
GENERATED_TXM_EVENT_NAME = 'high_res_example_data' # generated with generate_patients.py
THEORETICAL_DOUBLE_TXM_EVENT_NAME = 'theoretical_double_small_event'
CROSSMATCH_TXM_EVENT_NAME = 'mixed_resolution_with_crossmatch_types'
LARGE_DATA_FOLDER = get_absolute_path(f'tests/resources/{GENERATED_TXM_EVENT_NAME}/')
SMALL_DATA_FOLDER = get_absolute_path('tests/resources/high_res_example_small_data/')
Expand Down Expand Up @@ -107,7 +108,7 @@ def random_acceptable() -> List[BloodGroup]:
return []
num_of_acceptable = random.randint(1, 4)
blood_groups = {BloodGroup.ZERO, BloodGroup.A, BloodGroup.B, BloodGroup.AB}
acceptable = random.sample(blood_groups, num_of_acceptable)
acceptable = random.sample(list(blood_groups), num_of_acceptable)
return acceptable


Expand Down
18 changes: 15 additions & 3 deletions local_testing_utilities/populate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from typing import List

from local_testing_utilities.generate_patients import (
CROSSMATCH_TXM_EVENT_NAME, GENERATED_TXM_EVENT_NAME, SMALL_DATA_FOLDER,
CROSSMATCH_TXM_EVENT_NAME, GENERATED_TXM_EVENT_NAME,
THEORETICAL_DOUBLE_TXM_EVENT_NAME, SMALL_DATA_FOLDER,
SMALL_DATA_FOLDER_MULTIPLE_DONORS, SMALL_DATA_FOLDER_THEORETICAL,
SMALL_DATA_FOLDER_WITH_CROSSMATCH, store_generated_patients_from_folder)
from local_testing_utilities.utils import create_or_overwrite_txm_event
Expand Down Expand Up @@ -181,13 +182,24 @@ def populate_db_multiple_recipients():


def populate_db_theoretical_double_crossmach():
create_or_overwrite_txm_event(name=CROSSMATCH_TXM_EVENT_NAME)
create_or_overwrite_txm_event(name=THEORETICAL_DOUBLE_TXM_EVENT_NAME)
add_users()
store_generated_patients_from_folder(SMALL_DATA_FOLDER_THEORETICAL, GENERATED_TXM_EVENT_NAME)
store_generated_patients_from_folder(SMALL_DATA_FOLDER_THEORETICAL, THEORETICAL_DOUBLE_TXM_EVENT_NAME)


def populate_large_db():
create_or_overwrite_txm_event(name='test')
create_or_overwrite_txm_event(name=THEORETICAL_DOUBLE_TXM_EVENT_NAME)
create_or_overwrite_txm_event(name=CROSSMATCH_TXM_EVENT_NAME)

user_models = add_users()
populate_db_with_data(user_models)

store_generated_patients_from_folder()
store_generated_patients_from_folder(txm_event_name=CROSSMATCH_TXM_EVENT_NAME,
folder=SMALL_DATA_FOLDER_WITH_CROSSMATCH)
store_generated_patients_from_folder(txm_event_name=THEORETICAL_DOUBLE_TXM_EVENT_NAME,
folder=SMALL_DATA_FOLDER_THEORETICAL)

# Hint: Use local_testing_utilities/remove_inconsistent_patients.sql script
# to remove inconsistent patients from DB.
16 changes: 16 additions & 0 deletions local_testing_utilities/remove_inconsistent_patients.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
Removes inconsistent patients.
Most often, this is related to parsing_issue BASIC_HLA_GROUP_IS_EMPTY.
*/

DELETE FROM recipient
WHERE id IN
(SELECT recipient.id FROM recipient
JOIN parsing_issue ON parsing_issue.recipient_id=recipient.id
WHERE parsing_issue.parsing_issue_detail = 'BASIC_HLA_GROUP_IS_EMPTY');

DELETE FROM donor
WHERE id IN
(SELECT donor.id FROM donor
JOIN parsing_issue ON parsing_issue.donor_id=donor.id
WHERE parsing_issue.parsing_issue_detail = 'BASIC_HLA_GROUP_IS_EMPTY');

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading

0 comments on commit 00a2c8b

Please sign in to comment.