From ad057305f718f51d9b05c344b80bcbeff2ba4c21 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Thu, 27 Jul 2023 19:51:43 -0400 Subject: [PATCH] Make RDOVAE encoder use LinearLayer directly --- autogen.bat | 4 +--- autogen.sh | 2 +- dnn/download_model.bat | 2 -- dnn/download_model.sh | 4 +--- dnn/dred_rdovae_enc.c | 23 +++++++++++------------ dnn/nnet.h | 2 +- dnn/write_lpcnet_weights.c | 2 +- silk/dred_encoder.c | 2 +- 8 files changed, 17 insertions(+), 24 deletions(-) diff --git a/autogen.bat b/autogen.bat index 998a8fb6d..098a5aa56 100644 --- a/autogen.bat +++ b/autogen.bat @@ -10,8 +10,6 @@ for /F "tokens=4 delims= " %%A in ('findstr "download_model.sh" autogen.sh') do REM Remove trailing ")" character from the model variable set "model=%model:~0,-1%" -cd dnn -call download_model.bat %model% -cd .. +call dnn\download_model.bat %model% echo Updating build configuration files, please wait.... diff --git a/autogen.sh b/autogen.sh index f2f3841d1..559067b0c 100755 --- a/autogen.sh +++ b/autogen.sh @@ -9,7 +9,7 @@ set -e srcdir=`dirname $0` test -n "$srcdir" && cd "$srcdir" -(cd dnn; ./download_model.sh 2ddc476) +dnn/download_model.sh eb72d29 echo "Updating build configuration files, please wait...." diff --git a/dnn/download_model.bat b/dnn/download_model.bat index ba16f0f0d..32d03ff88 100644 --- a/dnn/download_model.bat +++ b/dnn/download_model.bat @@ -7,5 +7,3 @@ if not exist %model% ( ) tar -xvzf %model% -move .\src\*.c . -move .\src\*.h . diff --git a/dnn/download_model.sh b/dnn/download_model.sh index b5fbf0002..aceeee9dc 100755 --- a/dnn/download_model.sh +++ b/dnn/download_model.sh @@ -8,6 +8,4 @@ if [ ! -f $model ]; then wget https://media.xiph.org/lpcnet/data/$model fi tar xvof $model -touch src/nnet_data.[ch] -touch src/plc_data.[ch] -mv src/*.[ch] . +touch *_data.[ch] diff --git a/dnn/dred_rdovae_enc.c b/dnn/dred_rdovae_enc.c index d1bee1eeb..9361af17b 100644 --- a/dnn/dred_rdovae_enc.c +++ b/dnn/dred_rdovae_enc.c @@ -46,50 +46,49 @@ void dred_rdovae_encode_dframe( float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE]; int output_index = 0; int input_index = 0; - float zero_vector[1024] = {0}; /* run encoder stack and concatenate output in buffer*/ - _lpcnet_compute_dense(&model->enc_dense1, &buffer[output_index], input); + compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH); input_index = output_index; output_index += ENC_DENSE1_OUT_SIZE; - compute_gruB(&model->enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]); + compute_generic_gru(&model->enc_dense2_input, &model->enc_dense2_recurrent, enc_state->dense2_state, &buffer[input_index]); OPUS_COPY(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE); input_index = output_index; output_index += ENC_DENSE2_OUT_SIZE; - _lpcnet_compute_dense(&model->enc_dense3, &buffer[output_index], &buffer[input_index]); + compute_generic_dense(&model->enc_dense3, &buffer[output_index], &buffer[input_index], ACTIVATION_TANH); input_index = output_index; output_index += ENC_DENSE3_OUT_SIZE; - compute_gruB(&model->enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]); + compute_generic_gru(&model->enc_dense4_input, &model->enc_dense4_recurrent, enc_state->dense4_state, &buffer[input_index]); OPUS_COPY(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE); input_index = output_index; output_index += ENC_DENSE4_OUT_SIZE; - _lpcnet_compute_dense(&model->enc_dense5, &buffer[output_index], &buffer[input_index]); + compute_generic_dense(&model->enc_dense5, &buffer[output_index], &buffer[input_index], ACTIVATION_TANH); input_index = output_index; output_index += ENC_DENSE5_OUT_SIZE; - compute_gruB(&model->enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]); + compute_generic_gru(&model->enc_dense6_input, &model->enc_dense6_recurrent, enc_state->dense6_state, &buffer[input_index]); OPUS_COPY(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE); input_index = output_index; output_index += ENC_DENSE6_OUT_SIZE; - _lpcnet_compute_dense(&model->enc_dense7, &buffer[output_index], &buffer[input_index]); + compute_generic_dense(&model->enc_dense7, &buffer[output_index], &buffer[input_index], ACTIVATION_TANH); input_index = output_index; output_index += ENC_DENSE7_OUT_SIZE; - _lpcnet_compute_dense(&model->enc_dense8, &buffer[output_index], &buffer[input_index]); + compute_generic_dense(&model->enc_dense8, &buffer[output_index], &buffer[input_index], ACTIVATION_TANH); output_index += ENC_DENSE8_OUT_SIZE; /* compute latents from concatenated input buffer */ - compute_conv1d(&model->bits_dense, latents, enc_state->bits_dense_state, buffer); + compute_generic_conv1d(&model->bits_dense, latents, enc_state->bits_dense_state, buffer, BITS_DENSE_IN_SIZE, ACTIVATION_LINEAR); /* next, calculate initial state */ - _lpcnet_compute_dense(&model->gdense1, &buffer[output_index], buffer); + compute_generic_dense(&model->gdense1, &buffer[output_index], buffer, ACTIVATION_TANH); input_index = output_index; - _lpcnet_compute_dense(&model->gdense2, initial_state, &buffer[input_index]); + compute_generic_dense(&model->gdense2, initial_state, &buffer[input_index], ACTIVATION_TANH); } diff --git a/dnn/nnet.h b/dnn/nnet.h index 71c91ca37..0e7cd6dc3 100644 --- a/dnn/nnet.h +++ b/dnn/nnet.h @@ -161,7 +161,7 @@ int sample_from_pdf(const float *pdf, int N, float exp_boost, float pdf_floor); extern const WeightArray lpcnet_arrays[]; extern const WeightArray lpcnet_plc_arrays[]; -extern const WeightArray rdovae_enc_arrays[]; +extern const WeightArray rdovaeenc_arrays[]; extern const WeightArray rdovae_dec_arrays[]; int linear_init(LinearLayer *layer, const WeightArray *arrays, diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c index b1760161b..aa36db1f6 100644 --- a/dnn/write_lpcnet_weights.c +++ b/dnn/write_lpcnet_weights.c @@ -72,7 +72,7 @@ int main(void) FILE *fout = fopen("weights_blob.bin", "w"); write_weights(lpcnet_arrays, fout); write_weights(lpcnet_plc_arrays, fout); - write_weights(rdovae_enc_arrays, fout); + write_weights(rdovaeenc_arrays, fout); write_weights(rdovae_dec_arrays, fout); fclose(fout); return 0; diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index 89e0f9000..5bae39e9d 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -69,7 +69,7 @@ void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels) enc->Fs = Fs; enc->channels = channels; #ifndef USE_WEIGHTS_FILE - init_rdovaeenc(&enc->model, rdovae_enc_arrays); + init_rdovaeenc(&enc->model, rdovaeenc_arrays); #endif dred_encoder_reset(enc); }