fix ctconline

PaddlePaddle · zh794390558 · Jan 24, 2022 · Jan 19, 2022 · Jan 19, 2022 · Jan 20, 2022
commit 624e86d07d585db5e7a0e6735f1e706657105e6d
diff --git a/paddlespeech/s2t/exps/deepspeech2/model.py b/paddlespeech/s2t/exps/deepspeech2/model.py
@@ -417,13 +417,15 @@ def compute_result_transcripts(self, audio, audio_len):
                 audio, audio_len, decoder_chunk_size=1)
             result_transcripts = trans[-1:]
         elif self.args.model_type == "offline":
+            batch_size = output_probs.shape[0]
+            self.model.decoder.reset_decoder(batch_size = batch_size)
             output_probs, output_lens = self.static_forward_offline(audio,
                                                                     audio_len)
 
             self.model.decoder.next(output_probs, output_lens)
 
             trans_best, trans_beam = self.model.decoder.decode()
-            self.model.decoder.reset_decoder()
+
             result_transcripts = trans_best
 
         else:
@@ -522,7 +524,8 @@ def static_forward_online(self, audio, audio_len,
             if self.args.enable_auto_log is True:
                 # record the model preprocessing time
                 self.autolog.times.stamp()
-
+
+            self.model.decoder.reset_decoder(batch_size = 1)
             for i in range(0, num_chunk):
                 start = i * chunk_stride
                 end = start + chunk_size
@@ -566,7 +569,7 @@ def static_forward_online(self, audio, audio_len,
                 probs_chunk_lens_list.append(output_chunk_lens)
                 trans_best, trans_beam = self.model.decoder.decode()
                 trans.append(trans_best[0])
-            self.model.decoder.reset_decoder()
+
 
             output_probs = np.concatenate(probs_chunk_list, axis=1)
             output_lens = np.sum(probs_chunk_lens_list, axis=0)

diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py
@@ -170,9 +170,11 @@ def decode(self, audio, audio_len):
         # Make sure the decoder has been initialized
         eouts, eouts_len = self.encoder(audio, audio_len)
         probs = self.decoder.softmax(eouts)
+        batch_size = probs.shape[0]
+        self.decoder.reset_decoder(batch_size=batch_size)
         self.decoder.next(probs, eouts_len)
         trans_best, trans_beam = self.decoder.decode()
-        self.decoder.reset_decoder()
+
         return trans_best
 
     @classmethod

diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py
@@ -299,9 +299,10 @@ def decode(self, audio, audio_len):
         eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
             audio, audio_len, None, None)
         probs = self.decoder.softmax(eouts)
+        batch_size = probs.shape[0]
+        self.decoder.reset_decoder(batch_size=batch_size)
         self.decoder.next(probs, eouts_len)
         trans_best, trans_beam = self.decoder.decode()
-        self.decoder.reset_decoder()
         return trans_best
 
     @classmethod

diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py
@@ -268,7 +268,13 @@ def init_decoder(self, batch_size, vocab_list, decoding_method,
         Returns:
             CTCBeamSearchDecoder
         """
+        self.batch_size = batch_size
+        self.vocab_list = vocab_list
         self.decoding_method = decoding_method
+        self.beam_size = beam_size
+        self.cutoff_prob = cutoff_prob
+        self.cutoff_top_n = cutoff_top_n
+        self.num_processes = num_processes
         if decoding_method == "ctc_beam_search":
             self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
                                   vocab_list)
@@ -420,7 +426,17 @@ def decode(self):
 
         return results_best, results_beam
 
-    def reset_decoder(self):
+    def reset_decoder(self, batch_size=-1, beam_size=-1, num_processes=-1, cutoff_prob=-1.0, cutoff_top_n=-1):
+        if batch_size > 0:
+            self.batch_size = batch_size
+        if beam_size > 0:
+            self.beam_size = beam_size
+        if num_processes > 0:
+            self.num_processes = num_processes
+        if cutoff_prob > 0:
+            self.cutoff_prob = cutoff_prob
+        if cutoff_top_n > 0:
+            self.cutoff_top_n = cutoff_top_n
         """
         Reset the decoder state
         Raises:
@@ -429,7 +445,7 @@ def reset_decoder(self):
         if self.beam_search_decoder is None:
             raise Exception(
                 "You need to initialize the beam_search_decoder firstly")
-        self.beam_search_decoder.reset_state()
+        self.beam_search_decoder.reset_state(self.batch_size, self.beam_size, self.num_processes, self.cutoff_prob, self.cutoff_top_n)
 
     def del_decoder(self):
         """

diff --git a/third_party/ctc_decoders/ctc_beam_search_decoder.cpp b/third_party/ctc_decoders/ctc_beam_search_decoder.cpp
@@ -478,6 +478,10 @@ CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch(
       cutoff_top_n(cutoff_top_n),
       ext_scorer(ext_scorer),
       blank_id(blank_id) {
+    VALID_CHECK_GT(
+        this->beam_size, 0, "beam_size must be greater than 0!");
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
     this->vocabulary = vocabulary;
     for (size_t i = 0; i < batch_size; i++) {
         this->decoder_storage_vector.push_back(
@@ -560,11 +564,26 @@ CtcBeamSearchDecoderBatch::decode() {
     }
     return batch_results;
 }
+
+
 /**
  * reset the state of ctcBeamSearchDecoderBatch
  */
+void CtcBeamSearchDecoderBatch::reset_state(
+    size_t batch_size,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob,
+    size_t cutoff_top_n){
 
-void CtcBeamSearchDecoderBatch::reset_state() {
+    this->batch_size = batch_size;
+    this->beam_size = beam_size;
+    this->num_processes = num_processes;
+    this->cutoff_prob = cutoff_prob;
+    this->cutoff_top_n = cutoff_top_n;
+
+    VALID_CHECK_GT(
+        this->beam_size, 0, "beam_size must be greater than 0!");
     VALID_CHECK_GT(
         this->num_processes, 0, "num_processes must be nonnegative!");
     // thread pool

diff --git a/third_party/ctc_decoders/ctc_beam_search_decoder.h b/third_party/ctc_decoders/ctc_beam_search_decoder.h
@@ -123,7 +123,12 @@ class CtcBeamSearchDecoderBatch {
 
     std::vector<std::vector<std::pair<double, std::string>>> decode();
 
-    void reset_state();
+    void reset_state(
+        size_t batch_size,
+        size_t beam_size,
+        size_t num_processes,
+        double cutoff_prob,
+        size_t cutoff_top_n);
 
   private:
     std::vector<std::string> vocabulary;