Skip to content

Commit 09d7631

Browse files
committed
Automatically output statistics
- Output file with statistics of dataset when write_and_check_output is called.
1 parent a9baeb7 commit 09d7631

File tree

2 files changed

+41
-88
lines changed

2 files changed

+41
-88
lines changed

src/get_dataset.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,5 @@ def get_ct_pair_dataset(
8383
logging.info("write_full_dataset_to_file")
8484
output.write_full_dataset_to_file(dataset, args, out)
8585

86-
logging.info("output_stats")
87-
output.output_all_stats(dataset, args, out)
88-
8986
if logging.DEBUG >= logging.root.level:
9087
output.write_debug_sizes(dataset, out)

src/output.py

Lines changed: 41 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -49,61 +49,6 @@ def write_output(
4949
return file_type_list
5050

5151

52-
def write_and_check_output(
53-
df: pd.DataFrame,
54-
filename: str,
55-
assay_type: str,
56-
args: CalculationArgs,
57-
out: OutputArgs,
58-
):
59-
"""
60-
Write df to file and check that writing was successful.
61-
62-
:param df: Pandas Dataframe to write to output file.
63-
:type df: pd.DataFrame
64-
:param filename: Filename to write the output to
65-
:type filename: bool
66-
:param assay_type: Types of assays current_df contains information about. \
67-
Options: "BF" (binding+functional),
68-
"B" (binding),
69-
"all" (contains both BF and B information)
70-
:type assay_type: str
71-
:param args: Arguments related to how to calculate the dataset
72-
:type args: CalculationArgs
73-
:param out: Arguments related to how to output the dataset
74-
:type out: OutputArgs
75-
"""
76-
file_type_list = write_output(df, filename, out)
77-
sanity_checks.test_equality(
78-
df, filename, assay_type, file_type_list, args.calculate_rdkit
79-
)
80-
81-
82-
##### Output Specific Results #####
83-
def write_full_dataset_to_file(
84-
dataset: Dataset,
85-
args: CalculationArgs,
86-
out: OutputArgs,
87-
):
88-
"""
89-
If write_full_dataset, write df_combined with filtering columns to output_path.
90-
91-
:param dataset: Dataset with compound-target pairs.
92-
:type dataset: Dataset
93-
:param args: Arguments related to how to calculate the dataset
94-
:type args: CalculationArgs
95-
:param out: Arguments related to how to output the dataset
96-
:type out: OutputArgs
97-
"""
98-
desc = "all"
99-
if out.write_full_dataset:
100-
name_all = os.path.join(
101-
out.output_path,
102-
f"ChEMBL{args.chembl_version}_CTI_{args.limited_flag}_full_dataset",
103-
)
104-
write_and_check_output(dataset.df_result, name_all, desc, args, out)
105-
106-
10752
def output_stats(
10853
df: pd.DataFrame,
10954
output_file: str,
@@ -145,49 +90,60 @@ def output_stats(
14590
)
14691

14792

148-
def output_all_stats(dataset: Dataset, args: CalculationArgs, out: OutputArgs):
93+
def write_and_check_output(
94+
df: pd.DataFrame,
95+
filename: str,
96+
assay_type: str,
97+
args: CalculationArgs,
98+
out: OutputArgs,
99+
):
149100
"""
150-
Output stats for all datasets and subsets calculated.
101+
Write df to file and check that writing was successful.
151102
152-
:param dataset: Dataset with compound-target pairs.
153-
:type dataset: Dataset
103+
:param df: Pandas Dataframe to write to output file.
104+
:type df: pd.DataFrame
105+
:param filename: Filename to write the output to (should not include the file extension)
106+
:type filename: bool
107+
:param assay_type: Types of assays current_df contains information about. \
108+
Options: "BF" (binding+functional),
109+
"B" (binding),
110+
"all" (contains both BF and B information)
111+
:type assay_type: str
154112
:param args: Arguments related to how to calculate the dataset
155113
:type args: CalculationArgs
156114
:param out: Arguments related to how to output the dataset
157115
:type out: OutputArgs
158116
"""
159-
output_file = os.path.join(
160-
out.output_path,
161-
f"ChEMBL{args.chembl_version}_CTI_{args.limited_flag}_full_dataset_stats",
117+
file_type_list = write_output(df, filename, out)
118+
sanity_checks.test_equality(
119+
df, filename, assay_type, file_type_list, args.calculate_rdkit
162120
)
121+
output_stats(df, f"{filename}_stats", out)
163122

164-
output_stats(dataset.df_result, output_file, out)
165123

166-
if out.write_bf:
167-
output_file = os.path.join(
168-
out.output_path,
169-
f"ChEMBL{args.chembl_version}_"
170-
f"CTI_{args.limited_flag}_"
171-
f"BF_{args.min_nof_cpds_bf}_c_dt_d_dt_stats",
172-
)
173-
output_stats(
174-
dataset.df_result[dataset.df_result["BF_100_c_dt_d_dt"]],
175-
output_file,
176-
out,
177-
)
124+
##### Output Specific Results #####
125+
def write_full_dataset_to_file(
126+
dataset: Dataset,
127+
args: CalculationArgs,
128+
out: OutputArgs,
129+
):
130+
"""
131+
If write_full_dataset, write df_combined with filtering columns to output_path.
178132
179-
if out.write_b:
180-
output_file = os.path.join(
133+
:param dataset: Dataset with compound-target pairs.
134+
:type dataset: Dataset
135+
:param args: Arguments related to how to calculate the dataset
136+
:type args: CalculationArgs
137+
:param out: Arguments related to how to output the dataset
138+
:type out: OutputArgs
139+
"""
140+
desc = "all"
141+
if out.write_full_dataset:
142+
name_all = os.path.join(
181143
out.output_path,
182-
f"ChEMBL{args.chembl_version}_"
183-
f"CTI_{args.limited_flag}_"
184-
f"B_{args.min_nof_cpds_b}_c_dt_d_dt_stats",
185-
)
186-
output_stats(
187-
dataset.df_result[dataset.df_result["B_100_c_dt_d_dt"]],
188-
output_file,
189-
out,
144+
f"ChEMBL{args.chembl_version}_CTI_{args.limited_flag}_full_dataset",
190145
)
146+
write_and_check_output(dataset.df_result, name_all, desc, args, out)
191147

192148

193149
def write_debug_sizes(

0 commit comments

Comments
 (0)