From ef03d6587bebb9b3a447e6761edb64b8ebb705a4 Mon Sep 17 00:00:00 2001
From: Chimezie Iwuanyanwu <mr.chrisopher@yahoo.com>
Date: Sat, 2 Dec 2023 16:55:28 -0600
Subject: [PATCH] Created training.py

Up-directories the module
---
 stepcovnet/config.py                          |  5 +-
 stepcovnet/executor.py                        |  7 ++-
 stepcovnet/inputs.py                          |  9 ++--
 ...rainingFeatureGenerator.py => training.py} | 48 +++++++++++++++++++
 .../training/TrainingHyperparameters.py       | 36 --------------
 stepcovnet/training/__init__.py               |  0
 train.py                                      |  5 +-
 7 files changed, 59 insertions(+), 51 deletions(-)
 rename stepcovnet/{training/TrainingFeatureGenerator.py => training.py} (86%)
 delete mode 100644 stepcovnet/training/TrainingHyperparameters.py
 delete mode 100644 stepcovnet/training/__init__.py

diff --git a/stepcovnet/config.py b/stepcovnet/config.py
index 7f4eb16..d16c4b1 100644
--- a/stepcovnet/config.py
+++ b/stepcovnet/config.py
@@ -4,10 +4,9 @@
 import numpy as np
 from sklearn.model_selection import train_test_split
 
-from stepcovnet import dataset
+from stepcovnet import dataset, training
 from stepcovnet.common.constants import NUM_ARROW_COMBS
 from stepcovnet.common.utils import get_channel_scalers
-from stepcovnet.training.TrainingHyperparameters import TrainingHyperparameters
 
 
 class AbstractConfig(ABC, object):
@@ -56,7 +55,7 @@ def __init__(
         dataset_path: str,
         dataset_type: Type[dataset.ModelDataset],
         dataset_config,
-        hyperparameters: TrainingHyperparameters,
+        hyperparameters: training.TrainingHyperparameters,
         all_scalers=None,
         limit: int = -1,
         lookback: int = 1,
diff --git a/stepcovnet/executor.py b/stepcovnet/executor.py
index d345d03..b9351be 100644
--- a/stepcovnet/executor.py
+++ b/stepcovnet/executor.py
@@ -8,12 +8,11 @@
 from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
 from tensorflow.python.keras.callbacks import ModelCheckpoint
 
-from stepcovnet import config, encoder, inputs
+from stepcovnet import config, encoder, inputs, training
 from stepcovnet.common.constants import NUM_ARROWS, NUM_ARROW_TYPES
 from stepcovnet.common.tf_config import tf_init
 from stepcovnet.common.utils import apply_scalers, get_samples_ngram_with_mask
 from stepcovnet.model.StepCOVNetModel import StepCOVNetModel
-from stepcovnet.training.TrainingHyperparameters import TrainingHyperparameters
 
 
 class AbstractExecutor(ABC, object):
@@ -121,7 +120,7 @@ def execute(self, input_data: inputs.TrainingInput):
             )
         return self.stepcovnet_model
 
-    def get_training_callbacks(self, hyperparameters: TrainingHyperparameters):
+    def get_training_callbacks(self, hyperparameters: training.TrainingHyperparameters):
         model_out_path = self.stepcovnet_model.model_root_path
         model_name = self.stepcovnet_model.model_name
         log_path = hyperparameters.log_path
@@ -151,7 +150,7 @@ def get_training_callbacks(self, hyperparameters: TrainingHyperparameters):
         return callbacks
 
     @staticmethod
-    def get_retraining_callbacks(hyperparameters: TrainingHyperparameters):
+    def get_retraining_callbacks(hyperparameters: training.TrainingHyperparameters):
         log_path = hyperparameters.log_path
         callbacks = []
 
diff --git a/stepcovnet/inputs.py b/stepcovnet/inputs.py
index 719bc2f..63cec17 100644
--- a/stepcovnet/inputs.py
+++ b/stepcovnet/inputs.py
@@ -3,10 +3,9 @@
 import numpy as np
 import tensorflow as tf
 
-from stepcovnet import config
+from stepcovnet import config, training
 from stepcovnet.common.utils import get_samples_ngram_with_mask
 from stepcovnet.data_collection.sample_collection_helper import get_audio_features
-from stepcovnet.training.TrainingFeatureGenerator import TrainingFeatureGenerator
 
 
 class AbstractInput(ABC, object):
@@ -53,7 +52,7 @@ def __init__(self, training_config: config.TrainingConfig):
             tf.TensorShape((None,) + self.config.label_shape),  # labels
             tf.TensorShape((None,)),  # sample weights
         )
-        self.train_feature_generator = TrainingFeatureGenerator(
+        self.train_feature_generator = training.TrainingFeatureGenerator(
             dataset_path=self.config.dataset_path,
             dataset_type=self.config.dataset_type,
             lookback=self.config.lookback,
@@ -65,7 +64,7 @@ def __init__(self, training_config: config.TrainingConfig):
             warmup=True,
             tokenizer_name=self.config.tokenizer_name,
         )
-        self.val_feature_generator = TrainingFeatureGenerator(
+        self.val_feature_generator = training.TrainingFeatureGenerator(
             dataset_path=self.config.dataset_path,
             dataset_type=self.config.dataset_type,
             lookback=self.config.lookback,
@@ -77,7 +76,7 @@ def __init__(self, training_config: config.TrainingConfig):
             shuffle=False,
             tokenizer_name=self.config.tokenizer_name,
         )
-        self.all_feature_generator = TrainingFeatureGenerator(
+        self.all_feature_generator = training.TrainingFeatureGenerator(
             dataset_path=self.config.dataset_path,
             dataset_type=self.config.dataset_type,
             lookback=self.config.lookback,
diff --git a/stepcovnet/training/TrainingFeatureGenerator.py b/stepcovnet/training.py
similarity index 86%
rename from stepcovnet/training/TrainingFeatureGenerator.py
rename to stepcovnet/training.py
index 46a3928..e4f3f4d 100644
--- a/stepcovnet/training/TrainingFeatureGenerator.py
+++ b/stepcovnet/training.py
@@ -9,6 +9,54 @@
     normalize_tokenized_arrows,
 )
 
+"""Configurable training hyperparameters"""
+# TODO(https://github.com/cpuguy96/StepCOVNet/issues/2):
+#  Move all training hyperparameters into config file
+import tensorflow as tf
+
+
+class TrainingHyperparameters(object):
+    DEFAULT_METRICS = [
+        tf.keras.metrics.CategoricalAccuracy(name="acc"),
+        tf.keras.metrics.Precision(name="pre"),
+        tf.keras.metrics.Recall(name="rec"),
+        tf.keras.metrics.AUC(curve="PR", name="pr_auc"),
+        tf.keras.metrics.AUC(name="auc"),
+    ]
+    DEFAULT_LOSS = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05)
+    DEFAULT_OPTIMIZER = tf.keras.optimizers.Nadam(beta_1=0.99)
+    DEFAULT_EPOCHS = 15
+    DEFAULT_PATIENCE = 3
+    DEFAULT_BATCH_SIZE = 32
+
+    def __init__(
+        self,
+        optimizer=None,
+        loss=None,
+        metrics=None,
+        batch_size=None,
+        epochs=None,
+        patience=None,
+        log_path=None,
+        retrain=None,
+    ):
+        self.optimizer = optimizer if optimizer is not None else self.DEFAULT_OPTIMIZER
+        self.loss = loss if loss is not None else self.DEFAULT_LOSS
+        self.metrics = metrics if metrics is not None else self.DEFAULT_METRICS
+        self.patience = patience if patience is not None else self.DEFAULT_PATIENCE
+        self.epochs = epochs if epochs is not None else self.DEFAULT_EPOCHS
+        self.batch_size = (
+            batch_size if batch_size is not None else self.DEFAULT_BATCH_SIZE
+        )
+        self.retrain = retrain if retrain is not None else True
+        self.log_path = log_path
+
+    def __str__(self):
+        str_dict = {}
+        for key, value in self.__dict__.items():
+            str_dict[key] = str(value)
+        return str_dict.__str__()
+
 
 class TrainingFeatureGenerator(object):
     def __init__(
diff --git a/stepcovnet/training/TrainingHyperparameters.py b/stepcovnet/training/TrainingHyperparameters.py
deleted file mode 100644
index 37a31a3..0000000
--- a/stepcovnet/training/TrainingHyperparameters.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""Configurable training hyperparameters"""
-# TODO(https://github.com/cpuguy96/StepCOVNet/issues/2):
-#  Move all training hyperparameters into config file
-import tensorflow as tf
-
-
-class TrainingHyperparameters(object):
-    DEFAULT_METRICS = [
-        tf.keras.metrics.CategoricalAccuracy(name='acc'),
-        tf.keras.metrics.Precision(name='pre'),
-        tf.keras.metrics.Recall(name='rec'),
-        tf.keras.metrics.AUC(curve="PR", name='pr_auc'),
-        tf.keras.metrics.AUC(name='auc')
-    ]
-    DEFAULT_LOSS = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05)
-    DEFAULT_OPTIMIZER = tf.keras.optimizers.Nadam(beta_1=0.99)
-    DEFAULT_EPOCHS = 15
-    DEFAULT_PATIENCE = 3
-    DEFAULT_BATCH_SIZE = 32
-
-    def __init__(self, optimizer=None, loss=None, metrics=None, batch_size=None, epochs=None, patience=None,
-                 log_path=None, retrain=None):
-        self.optimizer = optimizer if optimizer is not None else self.DEFAULT_OPTIMIZER
-        self.loss = loss if loss is not None else self.DEFAULT_LOSS
-        self.metrics = metrics if metrics is not None else self.DEFAULT_METRICS
-        self.patience = patience if patience is not None else self.DEFAULT_PATIENCE
-        self.epochs = epochs if epochs is not None else self.DEFAULT_EPOCHS
-        self.batch_size = batch_size if batch_size is not None else self.DEFAULT_BATCH_SIZE
-        self.retrain = retrain if retrain is not None else True
-        self.log_path = log_path
-
-    def __str__(self):
-        str_dict = {}
-        for key, value in self.__dict__.items():
-            str_dict[key] = str(value)
-        return str_dict.__str__()
diff --git a/stepcovnet/training/__init__.py b/stepcovnet/training/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/train.py b/train.py
index fdd5f2f..a842df1 100644
--- a/train.py
+++ b/train.py
@@ -4,12 +4,11 @@
 
 import joblib
 
-from stepcovnet import config, data, executor, inputs
+from stepcovnet import config, data, executor, inputs, training
 from stepcovnet.model.ClassifierModel import ClassifierModel
 from stepcovnet.model.GPT2ArrowModel import GPT2ArrowModel
 from stepcovnet.model.StepCOVNetModel import StepCOVNetModel
 from stepcovnet.model.VggishAudioModel import VggishAudioModel
-from stepcovnet.training.TrainingHyperparameters import TrainingHyperparameters
 
 
 def load_training_data(input_path: str):
@@ -35,7 +34,7 @@ def run_training(
 ):
     dataset_path, dataset_type, scalers, dataset_config = load_training_data(input_path)
 
-    hyperparameters = TrainingHyperparameters(log_path=log_path)
+    hyperparameters = training.TrainingHyperparameters(log_path=log_path)
     training_config = config.TrainingConfig(
         dataset_path=dataset_path,
         dataset_type=dataset_type,