add

undeadyequ · Feb 12, 2021 · b5c3c9b · b5c3c9b
1 parent e296dc6
commit b5c3c9b
Show file tree

Hide file tree

Showing 7 changed files with 291 additions and 181 deletions.
diff --git a/_drafts/impl/espnet1_skill.md b/_drafts/impl/espnet1_skill.md
@@ -0,0 +1,6 @@
+---
+layout: post
+title:  "espnet1_skill"
+date:   2019-7-17 12:52:48 +0900
+categories: jekyll update
+---
diff --git a/_drafts/impl/espnet2_args.md b/_drafts/impl/espnet2_args.md
@@ -0,0 +1,62 @@
+## args usage in big project
+    - input args types:
+        - yaml
+            - XX_conf
+                - tts_conf
+        - cmd
+        - .sh
+    - add args name to parser in:
+        - task.tts.abs_task.ABSTask
+            - Abs related (grouped)
+                - general
+                - Distributive setting
+                - Hardware
+                    - cudnn
+                - Dataset (where data come from)
+                    - train_path_type_name, etc.
+                - Batch_sampler (How to generate batch)
+                    - sequence_iter_setting
+                    - chunk_iter_setting
+                - Pretraining ()
+                    -
+                - Training ( How to control training process)
+                    - epoch, patience, early_stop_set, grad_clip etc.
+                - Optimizer
+                    -                     
+            - trainer related
+                - None
+            - specific_task related
+                - task related
+                - preprocess related
+                - sub-model related <= \_conf args is added by Class_choices
+
+                    ```python
+                    class_choices_list = [
+                        # --feats_extractor and --feats_extractor_conf
+                        feats_extractor_choices,
+                        ...
+                    ]
+                    ...
+                    for class_choices in cls.class_choices_list:
+                        # Append --<name> and --<name>_conf.
+                        # e.g. --encoder and --encoder_conf
+                        class_choices.add_arguments(group)
+                    ...
+                    class Class_choices:
+                        ...
+                        def add_arguments(self, parser):
+                            parser.add_argument(
+                                f"--{self.name}",
+                            )
+                            parser.add_argument(
+                                f"--{self.name}_conf",
+                            )
+                    ```
+    - Use args of parser in model
+        - XX_conf is packaged input into model
+            - tts_clss(idim=vocab_size, odim=odim, \**args.tts_conf)
+
+
+                tts_class = tts_choices.get_class(args.tts)
+        tts = tts_class(idim=vocab_size, odim=odim, \**args.tts_conf)
+            - Tacotron(\**xx_conf)
diff --git a/_drafts/impl/espnet2_core_function.md b/_drafts/impl/espnet2_core_function.md
@@ -0,0 +1,113 @@
+## system architect
+
+### The overall process of training Speech data
+    - Prepare all types of feed data in files with (name, data) format. (name, raw_file) is required
+    - Create feed data iterator, read specific data, set by args, in batch form
+        - Read each data element in key-value format
+        - Organize data element in batch with specified method
+        - Build batch iterator
+
+    - Build middle-level data processing model which compute middle-value used in final model
+        - Gather several middle-level data processor
+        -
+
+    - Create trainer which specify the training process
+        - Build core-model
+        - build other model, optimizer etc, used in core-model
+
+
+### training process
+
+```python
+->main(cmd): [TTSTask=>AbsTask]        # Build All train/Eval Environment
+  ->main_worker(args): [AbsTask]
+    ->build_model(args): model [TTSTask]                <= Unfolded below
+    ->build_optimizer(args, model=model): [AbsTask]
+    ->scheduler_classes.get(name):scheduler
+    ->schedulers.append(scheduler)
+    ->build_iter_factory(args):train_iter_factory []
+      ->build_sequence_iter_factory(args, iter_option):SequenceIterFactory [] \
+        ->ESPnetDataset(iter_option.data_path_and_name_and_type, iter_option.preprocess):dataset [torch.utils.data]
+        ->build_batch_sampler(iter_option.batch_type, iter_option.shape_files, batch_size):batch_sampler [samplers]
+          ->NumElementsBatchSampler()
+        ->SequenceIterFactory(dataset, batches, args.seeds):seqFactor []
+          ->self.build_iter(epoch): Dataloader []
+    ->build_iter_factory(args):valid_iter_factory []
+    # Dump all args to config.yaml
+    # Load Pretrained model
+    ->trainer.run(model,
+                  optimizers,
+                  schedulers,
+                  train_iter_factory,
+                  valid_iter_factory,
+                  seeds)            # Training Process Overlook
+      ->train_one_epoch(             
+              model,
+              optimizers,
+              schedulers,
+              train_iter_factory.build_iter(iepoch),
+              )                     # Training detailed control in each epoch                       
+        -> model(**batch)
+          ->forward(text, text_len, speech, speech_len, spkem): [Tacotron2_controllable]
+      # report result
+      # Save checkpoints
+      # Training Error Handling
+```
+
+
+### build model
+```python
+[AbsTask] ->main():None []
+->main_worker(args):None [ESPnetTTSModel]
+->build_model(args):model [TTSTask]
+  # feats_extract
+  ->feats_extractor_choices.get_class(args.feats_extract):feats_extract_class
+  ->feats_extract_class(**args.feats_extract_conf):feats_extract
+  # emo_extract
+  ...
+  ->emofeats_extract_class():emo_extract
+  ...
+  ->emo_classifier_class():emo_classifier
+  # Normalizer
+  ...
+  # TTS
+  ...
+  ->tts_class = tts_choices.get_class(args.tts)
+  ->tts_class(idim, odim, **args.tts_conf):tts
+  # Extract component
+  ...
+  ->pitch_extract_class(**args.pitch_extract_conf):pitch_extract
+
+  # Conclude
+  ->ESPnetTTSModel(feats_extract, emo_extract, emo_classifier, tts):model
+```
+
+
+
+### inference process
+```python
+-> ExpStyleTTS(model_f, model_conf, vocoder_conf, text): exp
+  -> TTSTask.build_model_from_file(model_f, model_conf, device): (model, args)
+    -> yaml.safe_load(model_conf): args
+    -> cls.build_model(args): model
+    -> model.to(device)
+    -> model.load_state_dict(torch.load(model_f, map_location=device))
+  # inference1
+  -> exp.exp_ref_audio(ref_audio):
+    -> Text2Speech(model, model_conf, vocoder_conf, forward_window=1): t2s  # text->mels mels->mags->wavs
+    -> t2s.__call__(text, ref_audio=None, emo_labs=None, emo_feats=None):
+      -> batch["text"] = text  # required
+      -> batch["speech"] = ref_audio # option
+      -> batch["emo_labs"] = emo_labs # option
+      ...
+      -> decode_config.update({"forward_window": forward_window}, ...) # model_specific input
+      -> model(**batch, **decode_config): mels
+      -> utils.grifflim.Spectrogram2wav(vocoder_conf): spectrogram2wav  # both mel and linear
+      -> spectrogram2wav(mels)
+        -> utils.logmel2linear(mels): mags_pred
+          -> np.linalg.pinv(mels_basis, mels)
+        -> utils.grifflim(mags_pred): wav
+
+  # inference2
+  -> exp.exp_emo_labs(ref_audio):
+```
diff --git a/_drafts/impl/espnet2_skill_memo.md b/_drafts/impl/espnet2_skill_memo.md
@@ -1,107 +1,11 @@
 ---
 layout: post
-title:  "knowledge_organize"
+title:  "espnet2_skill"
 date:   2019-7-17 12:52:48 +0900
 categories: jekyll update
 ---
-
-## system architect
-  - training process
-
-```python
-->main(cmd): [TTSTask=>AbsTask]        # Build All train/Eval Environment
-  ->main_worker(args): [AbsTask]
-    ->build_model(args): model [TTSTask]                <= Unfolded below
-    ->build_optimizer(args, model=model): [AbsTask]
-    ->scheduler_classes.get(name):scheduler
-    ->schedulers.append(scheduler)
-    ->build_iter_factory(args):train_iter_factory []
-      ->build_sequence_iter_factory(args, iter_option):SequenceIterFactory [] \
-        ->ESPnetDataset(iter_option.data_path_and_name_and_type, iter_option.preprocess):dataset [torch.utils.data]
-        ->build_batch_sampler(iter_option.batch_type, iter_option.shape_files, batch_size):batch_sampler [samplers]
-          ->NumElementsBatchSampler()
-        ->SequenceIterFactory(dataset, batches, args.seeds):seqFactor []
-          ->self.build_iter(epoch): Dataloader []
-    ->build_iter_factory(args):valid_iter_factory []
-    # Dump all args to config.yaml
-    # Load Pretrained model
-
-
-    ->trainer.run(model,
-                  optimizers,
-                  schedulers,
-                  train_iter_factory,
-                  valid_iter_factory,
-                  seeds)            # Training Process Overlook
-      ->train_one_epoch(             
-              model,
-              optimizers,
-              schedulers,
-              train_iter_factory.build_iter(iepoch),
-              )                     # Training detailed control in each epoch                       
-        -> model(**batch)
-          ->forward(text, text_len, speech, speech_len, spkem): [Tacotron2_controllable]
-
-      # report result
-      # Save checkpoints
-      # Training Error Handling
-
-
-```
-
-- build model
-
-```python
-[AbsTask] ->main():None []
-->main_worker(args):None [ESPnetTTSModel]
-->build_model(args):model [TTSTask]
-  # feats_extract
-  ->feats_extractor_choices.get_class(args.feats_extract):feats_extract_class
-  ->feats_extract_class(**args.feats_extract_conf):feats_extract
-  # emo_extract
-  ...
-  ->emofeats_extract_class():emo_extract
-  ...
-  ->emo_classifier_class():emo_classifier
-  # Normalizer
-  ...
-  # TTS
-  ...
-  ->tts_class = tts_choices.get_class(args.tts)
-  ->tts_class(idim, odim, **args.tts_conf):tts
-  # Extract component
-  ...
-  ->pitch_extract_class(**args.pitch_extract_conf):pitch_extract
-
-  # Conclude
-  ->ESPnetTTSModel(feats_extract, emo_extract, emo_classifier, tts):model
-```
-
-  - inference process
-  ```python
-  -> ExpStyleTTS(model_f, model_conf, vocoder_conf, text): exp
-    -> TTSTask.build_model_from_file(model_f, model_conf, device): (model, args)
-      -> yaml.safe_load(model_conf): args
-      -> cls.build_model(args): model
-      -> model.to(device)
-      -> model.load_state_dict(torch.load(model_f, map_location=device))
-    # inference1
-    -> exp.exp_ref_audio(ref_audio):
-      ->
-      -> Text2Speech(model, model_conf, vocoder_conf, forward_window=1): t2s
-      -> t2s.__call__(text, ref_audio=None, emo_labs=None, emo_feats=None):
-        -> batch["text"] = text  # required
-        -> batch["speech"] = ref_audio # option
-        -> batch["emo_labs"] = emo_labs # option
-        ...
-        -> decode_config.update({"forward_window": forward_window}, ...) # model_specific input
-        -> model(**batch, **decode_config): mels
-        -> Spectrogram2wav(vocoder_conf): spectrogram2wav
-        -> spectrogram2wav(mels)
-
-    # inference2
-    -> exp.exp_emo_labs(ref_audio):
-  ```
+* This will becorem
+{:toc}
 
 
 
@@ -121,70 +25,6 @@ categories: jekyll update
     ```
 
 
-
-## args usage in big project
-    - input args types:
-        - yaml
-            - XX_conf
-                - tts_conf
-        - cmd
-        - .sh
-    - add args name to parser in:
-        - task.tts.abs_task.ABSTask
-            - Abs related (grouped)
-                - general
-                - Distributive setting
-                - Hardware
-                    - cudnn
-                - Dataset (where data come from)
-                    - train_path_type_name, etc.
-                - Batch_sampler (How to generate batch)
-                    - sequence_iter_setting
-                    - chunk_iter_setting
-                - Pretraining ()
-                    -
-                - Training ( How to control training process)
-                    - epoch, patience, early_stop_set, grad_clip etc.
-                - Optimizer
-                    -                     
-            - trainer related
-                - None
-            - specific_task related
-                - task related
-                - preprocess related
-                - sub-model related <= \_conf args is added by Class_choices
-
-                    ```python
-                    class_choices_list = [
-                        # --feats_extractor and --feats_extractor_conf
-                        feats_extractor_choices,
-                        ...
-                    ]
-                    ...
-                    for class_choices in cls.class_choices_list:
-                        # Append --<name> and --<name>_conf.
-                        # e.g. --encoder and --encoder_conf
-                        class_choices.add_arguments(group)
-                    ...
-                    class Class_choices:
-                        ...
-                        def add_arguments(self, parser):
-                            parser.add_argument(
-                                f"--{self.name}",
-                            )
-                            parser.add_argument(
-                                f"--{self.name}_conf",
-                            )
-                    ```
-    - Use args of parser in model
-        - XX_conf is packaged input into model
-            - tts_clss(idim=vocab_size, odim=odim, \**args.tts_conf)
-
-
-                tts_class = tts_choices.get_class(args.tts)
-        tts = tts_class(idim=vocab_size, odim=odim, \**args.tts_conf)
-            - Tacotron(\**xx_conf)
-
 ## Change model from args
     - tts_class = tts_choices.get_class(args.tts)
 
@@ -204,20 +44,6 @@ categories: jekyll update
     ```
 
 
-## The overall process of training Speech data
-    - Prepare all types of feed data in files with (name, data) format. (name, raw_file) is required
-    - Create feed data iterator, read specific data, set by args, in batch form
-        - Read each data element in key-value format
-        - Organize data element in batch with specified method
-        - Build batch iterator
-
-    - Build middle-level data processing model which compute middle-value used in final model
-        - Gather several middle-level data processor
-        -
-
-    - Create trainer which specify the training process
-        - Build core-model
-        - build other model, optimizer etc, used in core-model
 
 
 ## Sampler