Skip to content

Commit

Permalink
Rechecked dataset configs
Browse files Browse the repository at this point in the history
  • Loading branch information
denysgerasymuk799 committed Sep 17, 2024
1 parent 10cab77 commit cded508
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 63 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ results/*
.pt_tmp
lightning_logs
saved_models
cluster/run_baselines/sbatch_files
cluster/run_exp1/sbatch_files
cluster/run_exp2_3/sbatch_files
cluster/run_mixed_exp/sbatch_files

# Created by https://www.gitignore.io/api/python,pycharm+all
# Edit at https://www.gitignore.io/?templates=python,pycharm+all
Expand Down
5 changes: 2 additions & 3 deletions cluster/run_baselines/run-baselines.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Define the list of tuples (dataset_name, model_name, run_nums)
declare -a job_configs=(
"bank gandalf_clf 6"
"diabetes gandalf_clf 1,2,3,4,5,6"
"folk gandalf_clf 1,2"
"folk gandalf_clf 3,4"
"folk gandalf_clf 5,6"
"law_school gandalf_clf 6"
"bank gandalf_clf 2,4"
"bank gandalf_clf 6"
)

TEMPLATE_FILE="../cluster/run_baselines/run-baselines-template.sbatch"
Expand Down
21 changes: 3 additions & 18 deletions cluster/run_exp1/run-exp1-models-gpu.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
# Define the list of tuples (dataset_name, model_name, run_nums)
declare -a job_configs=(
"heart deletion exp1_mcar3 gandalf_clf 1,2,3"
"heart deletion exp1_mcar3 gandalf_clf 4,5,6"
"heart deletion exp1_mar3 gandalf_clf 1,2,3"
"heart deletion exp1_mar3 gandalf_clf 4,5,6"
"heart deletion exp1_mnar3 gandalf_clf 1,2,3"
"heart deletion exp1_mnar3 gandalf_clf 4,5,6"
"heart median-mode exp1_mcar3 gandalf_clf 1,2,3"
"heart median-mode exp1_mcar3 gandalf_clf 4,5,6"
"heart median-mode exp1_mar3 gandalf_clf 1,2,3"
"heart median-mode exp1_mar3 gandalf_clf 4,5,6"
"heart median-mode exp1_mnar3 gandalf_clf 1,2,3"
"heart median-mode exp1_mnar3 gandalf_clf 4,5,6"
"heart median-dummy exp1_mcar3 gandalf_clf 1,2,3"
"heart median-dummy exp1_mcar3 gandalf_clf 4,5,6"
"heart median-dummy exp1_mar3 gandalf_clf 1,2,3"
"heart median-dummy exp1_mar3 gandalf_clf 4,5,6"
"heart median-dummy exp1_mnar3 gandalf_clf 1,2,3"
"heart median-dummy exp1_mnar3 gandalf_clf 4,5,6"
"heart deletion exp1_mar3 gandalf_clf 3"
"heart deletion exp1_mcar3 gandalf_clf 6"
"heart deletion exp1_mnar3 gandalf_clf 3,6"
)

TEMPLATE_FILE="../cluster/run_exp1/run-exp1-models-gpu-template.sbatch"
Expand Down
44 changes: 4 additions & 40 deletions cluster/run_exp1/run-exp1-models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,9 @@ declare -a job_configs=(
"diabetes median-dummy exp1_mcar3 gandalf_clf 1,2,3,4,5,6"
"diabetes median-dummy exp1_mar3 gandalf_clf 1,2,3,4,5,6"
"diabetes median-dummy exp1_mnar3 gandalf_clf 1,2,3,4,5,6"
"german deletion exp1_mcar3 gandalf_clf 1,2,3,4,5,6"
"german deletion exp1_mar3 gandalf_clf 1,2,3,4,5,6"
"german deletion exp1_mnar3 gandalf_clf 1,2,3,4,5,6"
"german median-mode exp1_mcar3 gandalf_clf 1,2,3,4,5,6"
"german median-mode exp1_mar3 gandalf_clf 1,2,3,4,5,6"
"german median-mode exp1_mnar3 gandalf_clf 1,2,3,4,5,6"
"german median-dummy exp1_mcar3 gandalf_clf 1,2,3,4,5,6"
"german median-dummy exp1_mar3 gandalf_clf 1,2,3,4,5,6"
"german deletion exp1_mcar3 gandalf_clf 2,3,4,5,6"
"german median-mode exp1_mnar3 gandalf_clf 3,4,5,6"
"german median-dummy exp1_mcar3 gandalf_clf 4,5,6"
"german median-dummy exp1_mnar3 gandalf_clf 1,2,3,4,5,6"
"folk deletion exp1_mcar3 gandalf_clf 1,2"
"folk deletion exp1_mcar3 gandalf_clf 3,4"
Expand Down Expand Up @@ -45,41 +40,10 @@ declare -a job_configs=(
"folk median-dummy exp1_mnar3 gandalf_clf 1,2"
"folk median-dummy exp1_mnar3 gandalf_clf 3,4"
"folk median-dummy exp1_mnar3 gandalf_clf 5,6"
"law_school deletion exp1_mcar3 gandalf_clf 1,2"
"law_school deletion exp1_mcar3 gandalf_clf 3,4"
"law_school deletion exp1_mcar3 gandalf_clf 5,6"
"law_school deletion exp1_mar3 gandalf_clf 1,2"
"law_school deletion exp1_mar3 gandalf_clf 3,4"
"law_school deletion exp1_mar3 gandalf_clf 5,6"
"law_school deletion exp1_mnar3 gandalf_clf 1,2"
"law_school deletion exp1_mnar3 gandalf_clf 3,4"
"law_school deletion exp1_mnar3 gandalf_clf 5,6"
"law_school median-mode exp1_mcar3 gandalf_clf 1,2"
"law_school median-mode exp1_mcar3 gandalf_clf 3,4"
"law_school median-mode exp1_mcar3 gandalf_clf 5,6"
"law_school median-mode exp1_mar3 gandalf_clf 1,2"
"law_school median-mode exp1_mar3 gandalf_clf 3,4"
"law_school median-mode exp1_mar3 gandalf_clf 5,6"
"law_school median-mode exp1_mnar3 gandalf_clf 1,2"
"law_school median-mode exp1_mnar3 gandalf_clf 3,4"
"law_school median-mode exp1_mnar3 gandalf_clf 5,6"
"law_school median-dummy exp1_mcar3 gandalf_clf 1,2"
"law_school median-dummy exp1_mcar3 gandalf_clf 3,4"
"law_school median-dummy exp1_mcar3 gandalf_clf 5,6"
"law_school median-dummy exp1_mar3 gandalf_clf 1,2"
"law_school median-dummy exp1_mar3 gandalf_clf 3,4"
"law_school median-dummy exp1_mar3 gandalf_clf 5,6"
"law_school median-dummy exp1_mnar3 gandalf_clf 1,2"
"law_school median-dummy exp1_mnar3 gandalf_clf 3,4"
"law_school median-dummy exp1_mnar3 gandalf_clf 5,6"
"bank deletion exp1_mcar3 gandalf_clf 1,2"
"bank deletion exp1_mcar3 gandalf_clf 3,4"
"bank deletion exp1_mcar3 gandalf_clf 5,6"
"bank deletion exp1_mar3 gandalf_clf 1,2"
"bank deletion exp1_mar3 gandalf_clf 3,4"
"bank deletion exp1_mar3 gandalf_clf 5,6"
"bank deletion exp1_mnar3 gandalf_clf 1,2"
"bank deletion exp1_mnar3 gandalf_clf 3,4"
"bank deletion exp1_mar3 gandalf_clf 2"
"bank deletion exp1_mnar3 gandalf_clf 5,6"
"bank median-mode exp1_mcar3 gandalf_clf 1,2"
"bank median-mode exp1_mcar3 gandalf_clf 3,4"
Expand Down
89 changes: 89 additions & 0 deletions cluster/useful_mongo_queries.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// ==============================================================
// Find all successfully executed experiments for specific model
// ==============================================================
db.exp_nulls_data_cleaning.aggregate([
{
$match: {
model_name: 'gandalf_clf',
tag: 'OK'
}
},
{
$group: {
_id: {
dataset_name: "$dataset_name",
null_imputer_name: "$null_imputer_name",
evaluation_scenario: "$evaluation_scenario",
model_name: "$model_name",
experiment_iteration: "$experiment_iteration"
}
}
},
{
$sort: {
"_id.dataset_name": 1,
"_id.null_imputer_name": 1,
"_id.evaluation_scenario": 1,
"_id.experiment_iteration": 1
}
},
{
$project: {
_id: 0,
dataset_name: "$_id.dataset_name",
null_imputer_name: "$_id.null_imputer_name",
evaluation_scenario: "$_id.evaluation_scenario",
model_name: "$_id.model_name",
experiment_iteration: "$_id.experiment_iteration"
}
}
]);


// ==========================================================================================================
// Find best models based on F1 for specific dataset and each experiment iteration.
// If model results are the same, two models will be displayed per one experiment iteration as the best ones.
// ==========================================================================================================
db.exp_nulls_data_cleaning.aggregate([
{
$match: { dataset_name: "law_school", evaluation_scenario: "baseline", model_name: {$ne: "tabpfn_clf"}, metric: "F1", subgroup: "overall", tag: "OK" }
},
{
$group: {
_id: "$experiment_iteration", // Group by 'group'
maxTotalValue: { $max: "$metric_value" } // Find the maximum total_value for each group
}
},
{
$lookup: { // Use $lookup to join original documents back to max values
from: "exp_nulls_data_cleaning", // Perform a self-join
let: { groupId: "$_id", maxValue: "$maxTotalValue" },
pipeline: [
{ $match: {
$expr: {
$and: [
{ $eq: ["$experiment_iteration", "$$groupId"] }, // Match group
{ $eq: ["$metric_value", "$$maxValue"] }, // Match max value
{ $eq: ["$dataset_name", "law_school"] },
{ $eq: ["$evaluation_scenario", "baseline"] },
{ $eq: ["$metric", "F1"] },
{ $eq: ["$subgroup", "overall"] },
{ $eq: ["$tag", "OK"] },
{ $ne: ["$model_name", "tabpfn_clf"] }
]
}
}}
],
as: "maxValueDocs"
}
},
{
$unwind: "$maxValueDocs" // Unwind the array to return documents
},
{
$replaceRoot: { newRoot: "$maxValueDocs" } // Replace root to return original documents
},
{
$project: { _id: 0, dataset_split_seed: 0, exp_pipeline_guid: 0, model_params: 0, model_init_seed: 0, null_imputer_name: 0 } // Optionally, hide the total_value field from output
}
]);
3 changes: 1 addition & 2 deletions configs/yaml_files/diabetes_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
dataset_name: diabetes
bootstrap_fraction: 0.8
n_estimators: 5
#n_estimators: 50
n_estimators: 50
computation_mode: error_analysis
sensitive_attributes_dct: {'Gender': 'Female'}

0 comments on commit cded508

Please sign in to comment.