From fe20bfd952b1dbfbd8bba4b2cd5227754f91a9c8 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Thu, 25 Aug 2016 01:24:37 +0900 Subject: [PATCH 01/20] dnn load --- julius/main.c | 27 ++ libsent/Makefile.in | 1 + libsent/include/sent/dnn.h | 44 ++++ libsent/include/sent/hmm_calc.h | 7 +- libsent/src/phmm/calc_dnn.c | 315 ++++++++++++++++++++++++ libsent/src/phmm/outprob_dnn.c | 420 ++++++++++++++++++++++++++++++++ 6 files changed, 813 insertions(+), 1 deletion(-) create mode 100644 libsent/include/sent/dnn.h create mode 100644 libsent/src/phmm/calc_dnn.c create mode 100644 libsent/src/phmm/outprob_dnn.c diff --git a/julius/main.c b/julius/main.c index 598d28d0..84fdfc91 100644 --- a/julius/main.c +++ b/julius/main.c @@ -92,6 +92,33 @@ main(int argc, char *argv[]) return -1; } + { + HMMWork wrk; + char *wfile[5] = { + "/home/ri/dictation-kit/model/dnn/W_l1.npy", + "/home/ri/dictation-kit/model/dnn/W_l2.npy", + "/home/ri/dictation-kit/model/dnn/W_l3.npy", + "/home/ri/dictation-kit/model/dnn/W_l4.npy", + "/home/ri/dictation-kit/model/dnn/W_l5.npy"}; + char *bfile[5] = { + "/home/ri/dictation-kit/model/dnn/bias_l1.npy", + "/home/ri/dictation-kit/model/dnn/bias_l2.npy", + "/home/ri/dictation-kit/model/dnn/bias_l3.npy", + "/home/ri/dictation-kit/model/dnn/bias_l4.npy", + "/home/ri/dictation-kit/model/dnn/bias_l5.npy"}; + + dnn_init(&(wrk.dnn), 120, 11, 1320, 2004, 2048, 5, wfile, bfile, "/home/ri/dictation-kit/model/dnn/W_output.npy", "/home/ri/dictation-kit/model/dnn/bias_output.npy", "/home/ri/dictation-kit/model/dnn/prior.dnn", 1.0f, 64); + exit(1); + } + + + + + + + + + /* add application options */ record_add_option(); module_add_option(); diff --git a/libsent/Makefile.in b/libsent/Makefile.in index 1b0c3ee6..76c14234 100644 --- a/libsent/Makefile.in +++ b/libsent/Makefile.in @@ -106,6 +106,7 @@ src/phmm/gprune_beam.o \ src/phmm/addlog.o \ src/phmm/mkwhmm.o \ src/phmm/vsegment.o \ +src/phmm/calc_dnn.o \ src/util/endian.o \ src/util/jlog.o \ src/util/mymalloc.o \ diff --git a/libsent/include/sent/dnn.h b/libsent/include/sent/dnn.h new file mode 100644 index 00000000..1b504270 --- /dev/null +++ b/libsent/include/sent/dnn.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1991-2016 Kawahara Lab., Kyoto University + * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology + * Copyright (c) 2005-2016 Julius project team, Nagoya Institute of Technology + * All rights reserved + */ + +#ifndef __SENT_DNN_H__ +#define __SENT_DNN_H__ + +#include +#include +#include +#include + +typedef struct { + float *w; /* w [out * in]*/ + float *b; /* b [out] */ + int in; + int out; +} DNNLayer; + +typedef struct { + DNNLayer o; /* output layer */ + DNNLayer *h; /* hidden layer */ + int hnum; /* number of hidden layers */ + + float *state_prior; /* state priors [id] */ + int state_prior_num; /* num of above (= output layer length) */ + + int batch_size; /* batch size */ + + int veclen; /* input vector length (before expansion) */ + int contextlen; /* context length */ + + int inputnodenum; /* input layer node number */ + int hiddennodenum; /* hidden layer node number */ + int outputnodenum; /* output layer node number */ + + float *work[2]; + +} DNNData; + +#endif /* __SENT_DNN_H__ */ diff --git a/libsent/include/sent/hmm_calc.h b/libsent/include/sent/hmm_calc.h index bda950a6..8651d24e 100644 --- a/libsent/include/sent/hmm_calc.h +++ b/libsent/include/sent/hmm_calc.h @@ -29,6 +29,7 @@ #include #include #include +#include /** * @brief Symbols to specify which Gaussian pruning algorithm to use. @@ -156,8 +157,9 @@ typedef struct __hmmwork__{ boolean batch_computation; -} HMMWork; + DNNData dnn; ///< DNN definitions +} HMMWork; #ifdef __cplusplus extern "C" { @@ -228,6 +230,9 @@ boolean gprune_beam_init(HMMWork *wrk); void gprune_beam_free(HMMWork *wrk); void gprune_beam(HMMWork *wrk, HTK_HMM_Dens **g, int gnum, int *last_id, int lnum); +boolean dnn_init(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize); + +boolean dnn_calc_outprob(HMMWork *wrk); #ifdef __cplusplus } diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c new file mode 100644 index 00000000..f8d2f525 --- /dev/null +++ b/libsent/src/phmm/calc_dnn.c @@ -0,0 +1,315 @@ +/* + * Copyright (c) 1991-2013 Kawahara Lab., Kyoto University + * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology + * Copyright (c) 2005-2013 Julius project team, Nagoya Institute of Technology + * All rights reserved + */ + +#include +#include +#include +#include +#include + +/************************************************************************/ +/* .npy file load */ + +boolean load_npy(float *array, char *filename, int x, int y) +{ + FILE *fp; + unsigned char code; + char magic[6]; + unsigned char major_version; + unsigned char minor_version; + unsigned short header_len; + char *header; + size_t len; + boolean fortran_order; + int i, j; + + if ((fp = fopen_readfile(filename)) == NULL) { + jlog("Error: load_npy: unable to open: %s\n", filename); + return FALSE; + } + if ((len = myfread(&code, 1, 1, fp)) < 1) { + jlog("Error: load_npy: failed to read header: %s\n", filename); + fclose_readfile(fp); + return FALSE; + } + if (code != 0x93) { + jlog("Error: load_npy: wrong magic number, not an npy file: %s\n", filename); + return FALSE; + } + if ((len = myfread(magic, 1, 5, fp)) < 5) { + jlog("Error: load_npy: failed to read header: %s\n", filename); + fclose_readfile(fp); + return FALSE; + } + magic[5] = '\0'; + if (strmatch(magic, "NUMPY") == FALSE) { + jlog("Error: load_npy: wrong magic header, not an npy file: %s\n", filename); + return FALSE; + } + if ((len = myfread(&major_version, 1, 1, fp)) < 1) { + jlog("Error: load_npy: failed to read header: %s\n", filename); + fclose_readfile(fp); + return FALSE; + } + /* we only assume Version 1.x format */ + /* not check subversion x */ + if (major_version != 1) { + jlog("Error: load_npy: can read only Version 1.0 but this file is Version %d\n", major_version); + fclose_readfile(fp); + return FALSE; + } + if ((len = myfread(&minor_version, 1, 1, fp)) < 1) { + jlog("Error: load_npy: failed to read header: %s\n", filename); + fclose_readfile(fp); + return FALSE; + } + + /* currently not support all conversion */ + /* accept only littlen endian 4byte float, with fortran order */ + /* produce error if the file has other format */ + if ((len = myfread(&header_len, 2, 1, fp)) < 1) { + jlog("Error: load_npy: failed to read header length: %s\n", filename); + fclose_readfile(fp); + return FALSE; + } +#ifdef WORDS_BIGENDIAN + swap_bytes(&header_len, 2, 1); +#endif + header = (char *)mymalloc(header_len + 1); + if ((len = myfread(header, 1, header_len, fp)) < header_len) { + jlog("Error: load_npy: failed to read header (%d bytes): %s\n", header_len, filename); + free(header); + fclose_readfile(fp); + return FALSE; + } + header[header_len] = '\0'; + if (strstr(header, "'descr': '= 6.0f) return 1.0f; + return logistic_table[(int)((x + 6.0f) * LOGISTIC_TABLE_FACTOR)]; +} + +/* initialize dnn layer */ +void dnn_layer_init(DNNLayer *l) +{ + l->w = NULL; + l->b = NULL; + l->in = 0; + l->out = 0; +} + +/* load dnn layer parameter from files */ +boolean dnn_layer_load(DNNLayer *l, int in, int out, char *wfile, char *bfile) +{ + l->in = in; + l->out = out; + l->w = (float *)mymalloc(sizeof(float) * l->out * l->in); + l->b = (float *)mymalloc(sizeof(float) * l->out); + if (! load_npy(l->w, wfile, l->in, l->out)) return FALSE; + jlog("Stat: dnn_layer_load: loaded %s\n", wfile); + if (! load_npy(l->b, bfile, l->out, 1)) return FALSE; + jlog("Stat: dnn_layer_load: loaded %s\n", bfile); + return TRUE; +} + +/* clear dnn layer */ +void dnn_layer_clear(DNNLayer *l) +{ + if (l->w != NULL) free(l->w); + if (l->b != NULL) free(l->b); + dnn_layer_init(l); +} + +/* initialize dnn */ +boolean dnn_init(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize) +{ + int i; + + /* build logistic table */ + logistic_table_build(); + + /* check for input length */ + int inputlen = veclen * contextlen; + if (inputnodes != inputlen) { + jlog("Error: dnn_init: veclen(%d) * contextlen(%d) != inputnodes(%d)\n", veclen, contextlen, inputnodes); + return FALSE; + } + + jlog("Stat: dnn_init: input: vec %d * context %d = %d dim\n", veclen, contextlen, inputlen); + jlog("Stat: dnn_init: input layer: %d dim\n", inputnodes); + jlog("Stat: dnn_init: %d hidden layer(s): %d dim\n", hiddenlayernum, hiddennodes); + jlog("Stat: dnn_init: output layer: %d dim\n", outputnodes); + + /* initialize layers */ + dnn->hnum = hiddenlayernum; + dnn->h = (DNNLayer *)mymalloc(sizeof(DNNLayer) * dnn->hnum); + for (i = 0; i < dnn->hnum; i++) { + dnn_layer_init(&(dnn->h[i])); + } + dnn_layer_init(&(dnn->o)); + + /* load layer parameters */ + if (dnn_layer_load(&(dnn->h[0]), inputnodes, hiddennodes, wfile[0], bfile[0]) == FALSE) return FALSE; + for (i = 1; i < dnn->hnum; i++) { + if (dnn_layer_load(&(dnn->h[i]), hiddennodes, hiddennodes, wfile[i], bfile[i]) == FALSE) return FALSE; + } + if (dnn_layer_load(&(dnn->o), hiddennodes, outputnodes, output_wfile, output_bfile) == FALSE) return FALSE; + + /* load state prior */ + { + FILE *fp; + int id; + float val; + + dnn->state_prior_num = outputnodes; + dnn->state_prior = (float *)mymalloc(sizeof(float) * dnn->state_prior_num); + for (i = 0; i < dnn->state_prior_num; i++) { + dnn->state_prior[i] = 0.0f; + } + if ((fp = fopen(priorfile, "rb")) == NULL) { + jlog("Error: cannot open %s\n", priorfile); + return FALSE; + } + while (fscanf(fp, "%d %e", &id, &val) != EOF){ + if (id < 0 || id >= dnn->state_prior_num) { + jlog("Error: wrong state id in prior file (%d)\n", id); + fclose_readfile(fp); + return FALSE; + } + dnn->state_prior[id] = val * prior_factor; + } + fclose(fp); + jlog("Stat: dnn_init: state prior loaded: %s\n", priorfile); + } + + /* allocate work area */ + dnn->work[0] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); + dnn->work[1] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); + + return TRUE; +} + +void dnn_ff(DNNData *dnn, float *in, float *out_ret) +{ + int n; + float *src, *dst; + + /* feed forward by standard logistic function */ + src = in; + n = 0; + for (int i = 0; i < dnn->hnum; i++) { + dnn->work[n] = logistic_func(dnn->h[i].w * src + dnn->h[i].b); + src = dnn->work[n]; + if (++n > 1) n = 0; + } + out_ret = dnn->o.w * src + dnn->o.b; + /* do soft max */ +#if 0 + /* log10( (exp(x)/sum(exp(x))) / state_prior) */ + out_ret = exp(out_ret); + allsum = sum(out_ret); + out_ret /= allsum; + out_ret /= dnn->state_prior; + out_ret = log10(out_ret); +#else + /* INV_LOG_TEN * (x - addlogarray(x) - log(state_prior)) */ +#endif + +} + +/* compute outprob by DNN for the current state and parameter */ +boolean dnn_calc_outprob(HMMWork *wrk) +{ +} + diff --git a/libsent/src/phmm/outprob_dnn.c b/libsent/src/phmm/outprob_dnn.c new file mode 100644 index 00000000..30c8570b --- /dev/null +++ b/libsent/src/phmm/outprob_dnn.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 1991-2016 Kawahara Lab., Kyoto University + * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology + * Copyright (c) 2005-2016 Julius project team, Nagoya Institute of Technology + * All rights reserved + */ + +#include +#include +#include +#include +#include +#include + +#define LOG_UNDEF (LOG_ZERO - 1) ///< Value to be used as the initial cache value + +/** + * Initialize the cache data, should be called once on startup. + * + * @param wrk [i/o] HMM computation work area + * + * @return TRUE on success, FALSE on failure. + */ +boolean +outprob_cache_init(HMMWork *wrk) +{ + wrk->statenum = wrk->OP_hmminfo->totalstatenum; + wrk->outprob_cache = NULL; + wrk->outprob_allocframenum = 0; + wrk->OP_time = -1; + wrk->croot = NULL; + return TRUE; +} + +/** + * Prepare cache for the next input, by clearing the existing cache. + * + * @param wrk [i/o] HMM computation work area + * + * @return TRUE on success, FALSE on failure. + */ +boolean +outprob_cache_prepare(HMMWork *wrk) +{ + int s,t; + + /* clear already allocated area */ + for (t = 0; t < wrk->outprob_allocframenum; t++) { + for (s = 0; s < wrk->statenum; s++) { + wrk->outprob_cache[t][s] = LOG_UNDEF; + } + } + + return TRUE; +} + +/** + * Expand the cache to time axis if needed. + * + * @param wrk [i/o] HMM computation work area + * @param reqframe [in] required frame length + */ +static void +outprob_cache_extend(HMMWork *wrk, int reqframe) +{ + int newnum; + int size; + int t, s; + LOGPROB *tmpp; + + /* if enough length are already allocated, return immediately */ + if (reqframe < wrk->outprob_allocframenum) return; + + /* allocate per certain period */ + newnum = reqframe + 1; + if (newnum < wrk->outprob_allocframenum + OUTPROB_CACHE_PERIOD) newnum = wrk->outprob_allocframenum + OUTPROB_CACHE_PERIOD; + size = (newnum - wrk->outprob_allocframenum) * wrk->statenum; + + /* allocate */ + if (wrk->outprob_cache == NULL) { + wrk->outprob_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * newnum); + } else { + wrk->outprob_cache = (LOGPROB **)myrealloc(wrk->outprob_cache, sizeof(LOGPROB *) * newnum); + } + tmpp = (LOGPROB *)mybmalloc2(sizeof(LOGPROB) * size, &(wrk->croot)); + /* clear the new part */ + for(t = wrk->outprob_allocframenum; t < newnum; t++) { + wrk->outprob_cache[t] = &(tmpp[(t - wrk->outprob_allocframenum) * wrk->statenum]); + for (s = 0; s < wrk->statenum; s++) { + wrk->outprob_cache[t][s] = LOG_UNDEF; + } + } + + /*jlog("outprob cache: %d->%d\n", outprob_allocframenum, newnum);*/ + wrk->outprob_allocframenum = newnum; +} + +/** + * Free work area for cache. + * + * @param wrk [i/o] HMM computation work area + * + */ +void +outprob_cache_free(HMMWork *wrk) +{ + if (wrk->croot != NULL) mybfree2(&(wrk->croot)); + if (wrk->outprob_cache != NULL) free(wrk->outprob_cache); +} + + +/** + * @brief Compute output probability of a state. + * + * Set the needed values to the global variables + * that begins with "OP_", and call calc_outprob_state(). The + * calc_outprob_state() is actually a function pointer, and the entity is + * either calc_tied_mix() for tied-mixture model and calc_mix() for others. + * (If you use GMS, the entity will be gms_state() instead.) + * + * The state-level cache is also consulted here. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] time frame + * @param stateinfo [in] state information to compute the output probability + * @param param [in] input parameter vectors + * + * @return output log probability. + */ +LOGPROB +outprob_state(HMMWork *wrk, int t, HTK_HMM_State *stateinfo, HTK_Param *param) +{ + LOGPROB outp; + int sid; + int i, d; + HTK_HMM_State *s; + + sid = stateinfo->id; + + /* set global values for outprob functions to access them */ + wrk->OP_state = stateinfo; + wrk->OP_state_id = sid; + wrk->OP_param = param; + if (wrk->OP_time != t) { + wrk->OP_last_time = wrk->OP_time; + wrk->OP_time = t; + for(d=0,i=0;iOP_nstream;i++) { + wrk->OP_vec_stream[i] = &(param->parvec[t][d]); + d += wrk->OP_veclen_stream[i]; + } + + outprob_cache_extend(wrk, t); /* extend cache if needed */ + wrk->last_cache = wrk->outprob_cache[t]; /* reduce 2-d array access */ + } + + if (param->is_outprob) { + /* return the param as output probability */ + if (sid >= param->veclen) { + jlog("Error: state id in the dummy HMM exceeds vector length (%d > %d)\n", sid, param->veclen); + return(LOG_ZERO); + } + return(param->parvec[t][sid]); + } + + if (wrk->batch_computation) { + /* batch computation: if the frame is not computed yet, pre-compute all */ + s = wrk->OP_hmminfo->ststart; + if (wrk->last_cache[s->id] == LOG_UNDEF) { + for (; s; s = s->next) { + wrk->OP_state = s; + wrk->OP_state_id = s->id; + wrk->last_cache[s->id] = (*(wrk->calc_outprob_state))(wrk); + } + } + wrk->OP_state = stateinfo; + wrk->OP_state_id = sid; + } + + /* consult cache */ + if ((outp = wrk->last_cache[sid]) == LOG_UNDEF) { + outp = wrk->last_cache[sid] = (*(wrk->calc_outprob_state))(wrk); + } + return(outp); +} + +/** + * Initialize work area for outprob_cd_nbest(). + * + * @param wrk [i/o] HMM computation work area + * @param num [in] number of top states to be calculated. + */ +void +outprob_cd_nbest_init(HMMWork *wrk, int num) +{ + wrk->cd_nbest_maxprobs = (LOGPROB *)mymalloc(sizeof(LOGPROB) * num); + wrk->cd_nbest_maxn = num; +} + +/** + * Free work area for outprob_cd_nbest(). + * + * @param wrk [i/o] HMM computation work area + * + */ +void +outprob_cd_nbest_free(HMMWork *wrk) +{ + free(wrk->cd_nbest_maxprobs); +} + +/** + * Return average of N-beat outprob for pseudo state set. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] input frame + * @param lset [in] pseudo state set + * @param param [in] input parameter data + * + * @return outprob log probability, average of top N states in @a lset. + */ +static LOGPROB +outprob_cd_nbest(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) +{ + LOGPROB prob; + int i, k, n; + + n = 0; + for(i=0;inum;i++) { + prob = outprob_state(wrk, t, lset->s[i], param); + /*jlog("\t\t%d:%f\n", i, prob);*/ + if (prob <= LOG_ZERO) continue; + if (n == 0 || prob <= wrk->cd_nbest_maxprobs[n-1]) { + if (n == wrk->cd_nbest_maxn) continue; + wrk->cd_nbest_maxprobs[n] = prob; + n++; + } else { + for(k=0; k wrk->cd_nbest_maxprobs[k]) { + memmove(&(wrk->cd_nbest_maxprobs[k+1]), &(wrk->cd_nbest_maxprobs[k]), + sizeof(LOGPROB) * (n - k - ( (n == wrk->cd_nbest_maxn) ? 1 : 0))); + wrk->cd_nbest_maxprobs[k] = prob; + break; + } + } + if (n < wrk->cd_nbest_maxn) n++; + } + } + prob = 0.0; + for(i=0;icd_nbest_maxprobs[i]);*/ + prob += wrk->cd_nbest_maxprobs[i]; + } + return(prob/(float)n); +} + +/** + * Return maximum outprob of the pseudo state set. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] input frame + * @param lset [in] pseudo state set + * @param param [in] input parameter data + * + * @return maximum output log probability among states in @a lset. + */ +static LOGPROB +outprob_cd_max(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) +{ + LOGPROB maxprob, prob; + int i; + + maxprob = LOG_ZERO; + for(i=0;inum;i++) { + prob = outprob_state(wrk, t, lset->s[i], param); + if (maxprob < prob) maxprob = prob; + } + return(maxprob); +} + +/** + * Return average outprob of the pseudo state set. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] input frame + * @param lset [in] pseudo state set + * @param param [in] input parameter data + * + * @return average output log probability of states in @a lset. + */ +static LOGPROB +outprob_cd_avg(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) +{ + LOGPROB sum, p; + int i,j; + sum = 0.0; + j = 0; + for(i=0;inum;i++) { + p = outprob_state(wrk, t, lset->s[i], param); + if (p > LOG_ZERO) { + sum += p; + j++; + } + } + return(sum/(float)j); +} + +/** + * Compute the log output probability of a pseudo state set. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] input frame + * @param lset [in] pseudo state set + * @param param [in] input parameter data + * + * @return the computed log output probability. + */ +LOGPROB +outprob_cd(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) +{ + LOGPROB ret; + + /* select computation method */ + switch(wrk->OP_hmminfo->cdset_method) { + case IWCD_AVG: + ret = outprob_cd_avg(wrk, t, lset, param); + break; + case IWCD_MAX: + ret = outprob_cd_max(wrk, t, lset, param); + break; + case IWCD_NBEST: + ret = outprob_cd_nbest(wrk, t, lset, param); + break; + } + return(ret); +} + + +/** + * Top function to compute the output probability of a HMM state. + * + * @param wrk [i/o] HMM computation work area + * @param t [in] input frame + * @param hmmstate [in] HMM state + * @param param [in] input parameter data + * + * @return the computed log output probability. + */ +LOGPROB +outprob(HMMWork *wrk, int t, HMM_STATE *hmmstate, HTK_Param *param) +{ + if (hmmstate->is_pseudo_state) { + return(outprob_cd(wrk, t, hmmstate->out.cdset, param)); + } else { + return(outprob_state(wrk, t, hmmstate->out.state, param)); + } +} + + + + +static boolean +mywrite(char *buf, size_t unitbyte, int unitnum, FILE *fp, boolean needswap) +{ + size_t tmp; + + if (needswap) swap_bytes(buf, unitbyte, unitnum); + if ((tmp = myfwrite(buf, unitbyte, unitnum, fp)) < (size_t)unitnum) { + jlog("Error: outprob_cache_output: failed to write %d bytes\n", unitbyte * unitnum); + return(FALSE); + } + // if (needswap) swap_bytes(buf, unitbyte, unitnum); + return(TRUE); +} + +boolean +outprob_cache_output(FILE *fp, HMMWork *wrk, int framenum) +{ + int s,t; + boolean needswap; + +#ifdef WORDS_BIGENDIAN + needswap = FALSE; +#else /* LITTLE ENDIAN */ + needswap = TRUE; +#endif + + needswap = TRUE; + + if (wrk->outprob_allocframenum < framenum) { + jlog("Error: outprob_cache_output: framenum > allocated (%d > %d)\n", framenum, wrk->outprob_allocframenum); + return FALSE; + } + + { + unsigned int ui; + unsigned short us; + short st; + float f; + + jlog("Stat: outprob_cache_output: %d states, %d samples\n", wrk->statenum, framenum); + + ui = framenum; + if (!mywrite((char *)&ui, sizeof(unsigned int), 1, fp, needswap)) return FALSE; + ui = wrk->OP_param->header.wshift; + if (!mywrite((char *)&ui, sizeof(unsigned int), 1, fp, needswap)) return FALSE; + us = wrk->statenum * sizeof(float); + if (!mywrite((char *)&us, sizeof(unsigned short), 1, fp, needswap)) return FALSE; + st = F_USER; + if (!mywrite((char *)&st, sizeof(short), 1, fp, needswap)) return FALSE; + + for (t = 0; t < framenum; t++) { + for (s = 0; s < wrk->statenum; s++) { + f = wrk->outprob_cache[t][s]; + if (!mywrite((char *)&f, sizeof(float), 1, fp, needswap)) return FALSE; + } + } + } + + return TRUE; +} From 2903f15e1a9f2cc75acffac05aa442ce9d8f6237 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Thu, 25 Aug 2016 12:09:40 +0900 Subject: [PATCH 02/20] incorporate DNN functions inside JCONF_AM and PROCESS_AM, made dnnconf file --- julius/main.c | 15 ++--- libjulius/include/julius/extern.h | 1 + libjulius/include/julius/jconf.h | 20 ++++++ libjulius/include/julius/recog.h | 6 ++ libjulius/src/default.c | 14 ++++ libjulius/src/instance.c | 20 ++++++ libjulius/src/jfunc.c | 9 ++- libjulius/src/m_fusion.c | 30 ++++++++- libjulius/src/m_info.c | 9 +++ libjulius/src/m_jconf.c | 106 ++++++++++++++++++++++++++++++ libjulius/src/m_options.c | 11 ++++ libsent/include/sent/dnn.h | 1 + libsent/include/sent/hmm_calc.h | 11 ++-- libsent/src/phmm/calc_dnn.c | 63 ++++++++++++++++-- libsent/src/phmm/outprob_init.c | 3 +- 15 files changed, 295 insertions(+), 24 deletions(-) diff --git a/julius/main.c b/julius/main.c index 84fdfc91..675bea44 100644 --- a/julius/main.c +++ b/julius/main.c @@ -92,8 +92,10 @@ main(int argc, char *argv[]) return -1; } + +#if 0 { - HMMWork wrk; + DNNData *dnn; char *wfile[5] = { "/home/ri/dictation-kit/model/dnn/W_l1.npy", "/home/ri/dictation-kit/model/dnn/W_l2.npy", @@ -107,16 +109,13 @@ main(int argc, char *argv[]) "/home/ri/dictation-kit/model/dnn/bias_l4.npy", "/home/ri/dictation-kit/model/dnn/bias_l5.npy"}; - dnn_init(&(wrk.dnn), 120, 11, 1320, 2004, 2048, 5, wfile, bfile, "/home/ri/dictation-kit/model/dnn/W_output.npy", "/home/ri/dictation-kit/model/dnn/bias_output.npy", "/home/ri/dictation-kit/model/dnn/prior.dnn", 1.0f, 64); + dnn = dnn_new(); + dnn_setup(dnn, 120, 11, 1320, 2004, 2048, 5, wfile, bfile, "/home/ri/dictation-kit/model/dnn/W_output.npy", "/home/ri/dictation-kit/model/dnn/bias_output.npy", "/home/ri/dictation-kit/model/dnn/prior.dnn", 1.0f, 64); + dnn_free(dnn); exit(1); } - - - - - - +#endif /* add application options */ diff --git a/libjulius/include/julius/extern.h b/libjulius/include/julius/extern.h index 553a546b..4520f17b 100644 --- a/libjulius/include/julius/extern.h +++ b/libjulius/include/julius/extern.h @@ -174,6 +174,7 @@ void opt_release(Jconf *jconf); void get_dirname(char *path); boolean config_string_parse(char *str, Jconf *jconf); boolean config_file_parse(char *conffile, Jconf *jconf); +boolean dnn_config_file_parse(char *filename, JCONF_AM *am); /* m_chkparam.c */ boolean checkpath(char *filename); boolean j_jconf_finalize(Jconf *jconf); diff --git a/libjulius/include/julius/jconf.h b/libjulius/include/julius/jconf.h index 597f1d91..9620e677 100644 --- a/libjulius/include/julius/jconf.h +++ b/libjulius/include/julius/jconf.h @@ -214,6 +214,26 @@ typedef struct __jconf_am__ { */ int gprune_plugin_source; + /** + * DNN configuration for DNN-HMM + */ + struct { + boolean enabled; + int veclen; /* vector length */ + int contextlen; /* context length */ + int inputnodes; /* number of input nodes (should match veclen * contextlen) */ + int outputnodes; /* number of output nodes (should match HMM state for num and order */ + int hiddennodes; /* number of nodes in a hidden layer */ + int hiddenlayernum; /* number of hidden layers */ + char **wfile; /* W matrix files for hidden layers */ + char **bfile; /* b vector files for hidden layers */ + char *output_wfile; /* W matrix file for output layer */ + char *output_bfile; /* b vector file for output layer */ + char *priorfile; /* state prior file */ + float prior_factor; /* state prior factor */ + int batchsize; /* batch size */ + } dnn; + /* pointer to next instance */ struct __jconf_am__ *next; diff --git a/libjulius/include/julius/recog.h b/libjulius/include/julius/recog.h index e749a0d7..6790b493 100644 --- a/libjulius/include/julius/recog.h +++ b/libjulius/include/julius/recog.h @@ -47,6 +47,7 @@ * +- *pointer to JCONF_AM * +- *pointer to MFCCCalc * +- hmminfo, hmm_gs + * +- dnn * +- hmmwrk * +- multipath, ccd_flag, cmn_loaded * +- PROCESS_LM[] (linked list) @@ -754,6 +755,11 @@ typedef struct __process_am__ { */ HMMWork hmmwrk; + /** + * DNN definitions + */ + DNNData *dnn; + /** * pointer to next * diff --git a/libjulius/src/default.c b/libjulius/src/default.c index 191f46f1..ffb91ec6 100644 --- a/libjulius/src/default.c +++ b/libjulius/src/default.c @@ -153,6 +153,20 @@ jconf_set_default_values_am(JCONF_AM *j) j->frontend.sscalc = FALSE; j->frontend.sscalc_len = 300; j->frontend.ssload_filename = NULL; + j->dnn.enabled = FALSE; + j->dnn.veclen = 0; + j->dnn.contextlen = 0; + j->dnn.inputnodes = 0; + j->dnn.outputnodes = 0; + j->dnn.hiddennodes = 0; + j->dnn.hiddenlayernum = 0; + j->dnn.wfile = NULL; + j->dnn.bfile = NULL; + j->dnn.output_wfile = NULL; + j->dnn.output_bfile = NULL; + j->dnn.priorfile = NULL; + j->dnn.prior_factor = 1.0; + j->dnn.batchsize = 1; } /** diff --git a/libjulius/src/instance.c b/libjulius/src/instance.c index 95c6a52a..5b8ed0b0 100644 --- a/libjulius/src/instance.c +++ b/libjulius/src/instance.c @@ -164,6 +164,7 @@ j_process_am_free(PROCESS_AM *am) outprob_free(&(am->hmmwrk)); if (am->hmminfo) hmminfo_free(am->hmminfo); if (am->hmm_gs) hmminfo_free(am->hmm_gs); + if (am->dnn) dnn_free(am->dnn); /* not free am->jconf */ free(am); } @@ -360,6 +361,25 @@ j_jconf_am_new() void j_jconf_am_free(JCONF_AM *amconf) { + int i; + if (amconf->dnn.wfile) { + for (i = 0; i < amconf->dnn.hiddenlayernum; i++) { + free(amconf->dnn.wfile[i]); + } + free(amconf->dnn.wfile); + } + if (amconf->dnn.bfile) { + for (i = 0; i < amconf->dnn.hiddenlayernum; i++) { + free(amconf->dnn.bfile[i]); + } + free(amconf->dnn.bfile); + } + if (amconf->dnn.output_wfile) + free(amconf->dnn.output_wfile); + if (amconf->dnn.output_bfile) + free(amconf->dnn.output_bfile); + if (amconf->dnn.priorfile) + free(amconf->dnn.priorfile); free(amconf); } diff --git a/libjulius/src/jfunc.c b/libjulius/src/jfunc.c index 2b59b181..6022afb8 100644 --- a/libjulius/src/jfunc.c +++ b/libjulius/src/jfunc.c @@ -336,17 +336,20 @@ j_config_remove_initial(Jconf *jconf) if(jconf->am_root->next != NULL && jconf->am_root->id == 0) { am = jconf->am_root->next; - free(jconf->am_root); + /*free(jconf->am_root);*/ + j_jconf_am_free(jconf->am_root); jconf->am_root = am; } if(jconf->lm_root->next != NULL && jconf->lm_root->id == 0) { lm = jconf->lm_root->next; - free(jconf->lm_root); + /*free(jconf->lm_root);*/ + j_jconf_lm_free(jconf->lm_root); jconf->lm_root = lm; } if(jconf->search_root->next != NULL && jconf->search_root->id == 0) { s = jconf->search_root->next; - free(jconf->search_root); + /*free(jconf->search_root);*/ + j_jconf_search_free(jconf->search_root); jconf->search_root = s; } } diff --git a/libjulius/src/m_fusion.c b/libjulius/src/m_fusion.c index 0486c2e1..d2e8acbc 100644 --- a/libjulius/src/m_fusion.c +++ b/libjulius/src/m_fusion.c @@ -551,6 +551,32 @@ j_load_am(Recog *recog, JCONF_AM *amconf) return FALSE; } } + /* DNN */ + if (amconf->dnn.enabled == TRUE) { + if ((am->dnn = dnn_new()) == NULL) { + jlog("ERROR: m_fusion: cannnot allocate DNN memory area\n"); + return FALSE; + } + if (dnn_setup(am->dnn, + amconf->dnn.veclen, + amconf->dnn.contextlen, + amconf->dnn.inputnodes, + amconf->dnn.outputnodes, + amconf->dnn.hiddennodes, + amconf->dnn.hiddenlayernum, + amconf->dnn.wfile, + amconf->dnn.bfile, + amconf->dnn.output_wfile, + amconf->dnn.output_bfile, + amconf->dnn.priorfile, + amconf->dnn.prior_factor, + amconf->dnn.batchsize) == FALSE) { + jlog("ERROR: m_fusion: failed to initialize DNN\n"); + dnn_free(am->dnn); + am->dnn = NULL; + return FALSE; + } + } /* fixate model-specific params */ /* set params whose default will change by models and not specified in arg */ @@ -1330,11 +1356,11 @@ j_final_fusion(Recog *recog) } #endif if (am->config->hmm_gs_filename != NULL) {/* with GMS */ - if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { + if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres, am->dnn) == FALSE) { return FALSE; } } else { - if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { + if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres, am->dnn) == FALSE) { return FALSE; } } diff --git a/libjulius/src/m_info.c b/libjulius/src/m_info.c index 85fa5131..7e191081 100644 --- a/libjulius/src/m_info.c +++ b/libjulius/src/m_info.c @@ -382,6 +382,15 @@ print_engine_info(Recog *recog) jlog(" sp transition penalty = %+2.1f\n", am->config->iwsp_penalty); } + if (am->dnn) { + jlog("\n DNN parameters:\n"); + jlog(" DNN input dim. = %d (%d x %d)\n", am->dnn->inputnodenum, am->dnn->veclen, am->dnn->contextlen); + jlog(" DNN output dim. = %d\n", am->dnn->outputnodenum); + jlog(" # of hidden layers = %d\n", am->dnn->hnum); + jlog(" hidden layer dim. = %d\n", am->dnn->hiddennodenum); + jlog(" state prior factor = %f\n", am->dnn->prior_factor); + jlog(" batch size = %d\n", am->dnn->batch_size); + } jlog("\n"); } diff --git a/libjulius/src/m_jconf.c b/libjulius/src/m_jconf.c index c0f1a1e1..4e1e4b04 100644 --- a/libjulius/src/m_jconf.c +++ b/libjulius/src/m_jconf.c @@ -568,4 +568,110 @@ config_file_parse(char *conffile, Jconf *jconf) return(ret); } +/* parse DNN config file */ +boolean +dnn_config_file_parse(char *filename, JCONF_AM *am) +{ + FILE *fp; + char buf[BUFLEN]; + char *p; + char *v; + int i, n; + boolean error_flag; + + if (am->dnn.wfile != NULL) { + jlog("ERROR: dnn_config_file_parse: duplicated loading: %s\n", filename); + return FALSE; + } + + if ((fp = fopen(filename, "r")) == NULL) { + jlog("ERROR: dnn_config_file_parse: failed to open %s\n", filename); + return FALSE; + } + while (fgets_jconf(buf, BUFLEN, fp) != NULL) { + if (buf[0] == '\0') continue; + p = strchr(buf, ' '); + if (p == NULL) { + jlog("ERROR: dnn_config_file_parse: wrong file format: %s\n", filename); + fclose(fp); + return FALSE; + } + v = p; + while (*v == ' ') v++; + *p = '\0'; + if (strmatch(buf, "feature_len")) am->dnn.veclen = atoi(v); + else if (strmatch(buf, "context_len")) am->dnn.contextlen = atoi(v); + else if (strmatch(buf, "input_nodes")) am->dnn.inputnodes = atoi(v); + else if (strmatch(buf, "output_nodes")) am->dnn.outputnodes = atoi(v); + else if (strmatch(buf, "hidden_nodes")) am->dnn.hiddennodes = atoi(v); + else if (strmatch(buf, "hidden_layers")) { + am->dnn.hiddenlayernum = atoi(v); + am->dnn.wfile = (char **)mymalloc(sizeof(char *) * am->dnn.hiddenlayernum); + am->dnn.bfile = (char **)mymalloc(sizeof(char *) * am->dnn.hiddenlayernum); + for (i = 0; i < am->dnn.hiddenlayernum; i++) { + am->dnn.wfile[i] = NULL; + am->dnn.bfile[i] = NULL; + } + } else if (buf[0] == 'W') { + n = atoi(&(buf[1])); + if (n > am->dnn.hiddenlayernum) { + jlog("ERROR: dnn_config_file_parse: W%d > # of hidden_layers (%d)\n", n, am->dnn.hiddenlayernum); + fclose(fp); + return FALSE; + } else if (n <= 0) { + jlog("ERROR: dnn_config_file_parse: layer id should begin with 1\n"); + fclose(fp); + return FALSE; + } + am->dnn.wfile[n-1] = strdup(v); + } else if (buf[0] == 'B') { + n = atoi(&(buf[1])); + if (n > am->dnn.hiddenlayernum) { + jlog("ERROR: dnn_config_file_parse: B%d > # of hidden_layers (%d)\n", n, am->dnn.hiddenlayernum); + fclose(fp); + return FALSE; + } else if (n <= 0) { + jlog("ERROR: dnn_config_file_parse: layer id should begin with 1\n"); + fclose(fp); + return FALSE; + } + am->dnn.bfile[n-1] = strdup(v); + } else if (strmatch(buf, "output_W")) am->dnn.output_wfile = strdup(v); + else if (strmatch(buf, "output_B")) am->dnn.output_bfile = strdup(v); + else if (strmatch(buf, "state_prior")) am->dnn.priorfile = strdup(v); + else if (strmatch(buf, "state_prior_factor")) am->dnn.prior_factor = atof(v); + else if (strmatch(buf, "batch_size")) am->dnn.batchsize = atoi(v); + else { + jlog("ERROR: dnn_config_file_parse: unknown spec: %s %s\n", buf, v); + fclose(fp); + return FALSE; + } + } + if (fclose(fp) == -1) { + jlog("ERROR: dnn_config_file_parse: failed to close file\n"); + return FALSE; + } + + /* check validity */ + error_flag = FALSE; + for (i = 0; i < am->dnn.hiddenlayernum; i++) { + if (am->dnn.wfile[i] == NULL) { + jlog("ERROR: dnn_config_file_parse: no W file specified for hidden layer #%d\n", i + 1); + error_flag = TRUE; + } + if (am->dnn.bfile[i] == NULL) { + jlog("ERROR: dnn_config_file_parse: no B file specified for hidden layer #%d\n", i + 1); + error_flag = TRUE; + } + } + if (error_flag == TRUE) { + return FALSE; + } + + am->dnn.enabled = TRUE; + return TRUE; +} + + + /* end of file */ diff --git a/libjulius/src/m_options.c b/libjulius/src/m_options.c index 9e2de6f5..5eaa4676 100644 --- a/libjulius/src/m_options.c +++ b/libjulius/src/m_options.c @@ -1329,6 +1329,17 @@ opt_parse(int argc, char *argv[], char *cwd, Jconf *jconf) GET_TMPARG; jconf->outprob_outfile = filepath(tmparg, cwd); continue; + } else if (strmatch(argv[i],"-dnnconf")) { + if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; + GET_TMPARG; + tmparg = filepath(tmparg, cwd); + if (dnn_config_file_parse(tmparg, jconf->amnow) == FALSE) { + jlog("ERROR: m_options: failed to read %s\n", tmparg); + free(tmparg); + return FALSE; + } + free(tmparg); + continue; } if (argv[i][0] == '-' && strlen(argv[i]) == 2) { /* 1-letter options */ diff --git a/libsent/include/sent/dnn.h b/libsent/include/sent/dnn.h index 1b504270..d3d57bda 100644 --- a/libsent/include/sent/dnn.h +++ b/libsent/include/sent/dnn.h @@ -27,6 +27,7 @@ typedef struct { float *state_prior; /* state priors [id] */ int state_prior_num; /* num of above (= output layer length) */ + float prior_factor; /* prior factor */ int batch_size; /* batch size */ diff --git a/libsent/include/sent/hmm_calc.h b/libsent/include/sent/hmm_calc.h index 8651d24e..77806505 100644 --- a/libsent/include/sent/hmm_calc.h +++ b/libsent/include/sent/hmm_calc.h @@ -85,6 +85,7 @@ typedef struct __hmmwork__{ /* local storage of pointers to the HMM */ HTK_HMM_INFO *OP_hmminfo; ///< Current %HMM definition data HTK_HMM_INFO *OP_gshmm; ///< Current GMS %HMM data + DNNData *OP_dnn; ///< DNN definition data /* local storage of input parameters */ HTK_Param *OP_param; ///< Current parameter @@ -157,8 +158,6 @@ typedef struct __hmmwork__{ boolean batch_computation; - DNNData dnn; ///< DNN definitions - } HMMWork; #ifdef __cplusplus @@ -174,7 +173,7 @@ LOGPROB addlog_array(LOGPROB *x, int n); boolean outprob_init(HMMWork *wrk, HTK_HMM_INFO *hmminfo, HTK_HMM_INFO *gshmm, int gms_num, - int gprune_method, int gprune_mixnum + int gprune_method, int gprune_mixnum, DNNData *dnn ); boolean outprob_prepare(HMMWork *wrk, int framenum); void outprob_free(HMMWork *wrk); @@ -230,7 +229,11 @@ boolean gprune_beam_init(HMMWork *wrk); void gprune_beam_free(HMMWork *wrk); void gprune_beam(HMMWork *wrk, HTK_HMM_Dens **g, int gnum, int *last_id, int lnum); -boolean dnn_init(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize); +/* calc_dnn.c */ +DNNData *dnn_new(); +void dnn_clear(DNNData *dnn); +void dnn_free(DNNData *dnn); +boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize); boolean dnn_calc_outprob(HMMWork *wrk); diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index f8d2f525..ea3d3691 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -14,7 +14,7 @@ /************************************************************************/ /* .npy file load */ -boolean load_npy(float *array, char *filename, int x, int y) +static boolean load_npy(float *array, char *filename, int x, int y) { FILE *fp; unsigned char code; @@ -181,7 +181,7 @@ static float logistic_func(float x) } /* initialize dnn layer */ -void dnn_layer_init(DNNLayer *l) +static void dnn_layer_init(DNNLayer *l) { l->w = NULL; l->b = NULL; @@ -190,7 +190,7 @@ void dnn_layer_init(DNNLayer *l) } /* load dnn layer parameter from files */ -boolean dnn_layer_load(DNNLayer *l, int in, int out, char *wfile, char *bfile) +static boolean dnn_layer_load(DNNLayer *l, int in, int out, char *wfile, char *bfile) { l->in = in; l->out = out; @@ -204,21 +204,70 @@ boolean dnn_layer_load(DNNLayer *l, int in, int out, char *wfile, char *bfile) } /* clear dnn layer */ -void dnn_layer_clear(DNNLayer *l) +static void dnn_layer_clear(DNNLayer *l) { if (l->w != NULL) free(l->w); if (l->b != NULL) free(l->b); dnn_layer_init(l); } +/*********************************************************************/ +DNNData *dnn_new() +{ + DNNData *d; + + d = (DNNData *)mymalloc(sizeof(DNNData)); + memset(d, 0, sizeof(DNNData)); + + return d; +} + +void dnn_clear(DNNData *dnn) +{ + int i; + + if (dnn->h) { + for (i = 0; i < dnn->hnum; i++) { + dnn_layer_clear(&(dnn->h[i])); + } + free(dnn->h); + } + if (dnn->state_prior) free(dnn->state_prior); + for (i = 0; i < 2; i++) { + if (dnn->work[i]) free(dnn->work[i]); + } + memset(dnn, 0, sizeof(DNNData)); +} + +void dnn_free(DNNData *dnn) +{ + dnn_clear(dnn); + free(dnn); +} + + /* initialize dnn */ -boolean dnn_init(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize) +boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize) { int i; + if (dnn == NULL) return FALSE; + + /* clear old data if exist */ + dnn_clear(dnn); + /* build logistic table */ logistic_table_build(); + /* set values */ + dnn->batch_size = batchsize; + dnn->veclen = veclen; + dnn->contextlen = contextlen; + dnn->inputnodenum = inputnodes; + dnn->hiddennodenum = hiddennodes; + dnn->outputnodenum = outputnodes; + dnn->prior_factor = prior_factor; + /* check for input length */ int inputlen = veclen * contextlen; if (inputnodes != inputlen) { @@ -280,6 +329,7 @@ boolean dnn_init(DNNData *dnn, int veclen, int contextlen, int inputnodes, int o return TRUE; } +#if 0 void dnn_ff(DNNData *dnn, float *in, float *out_ret) { int n; @@ -305,9 +355,10 @@ void dnn_ff(DNNData *dnn, float *in, float *out_ret) #else /* INV_LOG_TEN * (x - addlogarray(x) - log(state_prior)) */ #endif - } +#endif + /* compute outprob by DNN for the current state and parameter */ boolean dnn_calc_outprob(HMMWork *wrk) { diff --git a/libsent/src/phmm/outprob_init.c b/libsent/src/phmm/outprob_init.c index af5fb56e..58d3d27a 100644 --- a/libsent/src/phmm/outprob_init.c +++ b/libsent/src/phmm/outprob_init.c @@ -67,7 +67,7 @@ boolean outprob_init(HMMWork *wrk, HTK_HMM_INFO *hmminfo, HTK_HMM_INFO *gshmm, int gms_num, - int gprune_method, int gprune_mixnum + int gprune_method, int gprune_mixnum, DNNData *dnn ) { int i; @@ -156,6 +156,7 @@ outprob_init(HMMWork *wrk, HTK_HMM_INFO *hmminfo, /* store common variable to global */ wrk->OP_hmminfo = hmminfo; wrk->OP_gshmm = gshmm; /* NULL if GMS not used */ + wrk->OP_dnn = dnn; /* NULL if DNN not used */ wrk->OP_gprune_num = gprune_mixnum; /* store multi-stream data */ From 4c7917a914c8124cde35b614d9b97257da2726ac Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Thu, 25 Aug 2016 12:11:23 +0900 Subject: [PATCH 03/20] added -dnnconf to usage string --- libjulius/src/m_usage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libjulius/src/m_usage.c b/libjulius/src/m_usage.c index 839a6457..da32c7dd 100644 --- a/libjulius/src/m_usage.c +++ b/libjulius/src/m_usage.c @@ -212,6 +212,7 @@ j_output_argument_help(FILE *fp) fprintf(fp, "\n Acoustic Model:\n"); fprintf(fp, " -h hmmdefsfile HMM definition file name\n"); fprintf(fp, " [-hlist HMMlistfile] HMMlist filename (must for triphone model)\n"); + fprintf(fp, " [-dnnconf file] DNN configuration file\n"); fprintf(fp, " [-iwcd1 methodname] switch IWCD triphone handling on 1st pass\n"); fprintf(fp, " best N use N best score (default of n-gram, N=%d)\n", jconf->am_root->iwcdmaxn); fprintf(fp, " max use maximum score\n"); From c60bdfd3e61c7127b59572c5a85a75a42d1e1c8b Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Thu, 25 Aug 2016 12:59:56 +0900 Subject: [PATCH 04/20] add DNN outprob switcher, ready to impl. --- libsent/src/phmm/calc_dnn.c | 15 +- libsent/src/phmm/outprob.c | 12 + libsent/src/phmm/outprob_dnn.c | 420 --------------------------------- 3 files changed, 26 insertions(+), 421 deletions(-) delete mode 100644 libsent/src/phmm/outprob_dnn.c diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index ea3d3691..dbf56871 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -359,8 +359,21 @@ void dnn_ff(DNNData *dnn, float *in, float *out_ret) #endif -/* compute outprob by DNN for the current state and parameter */ +/* compute outprob by DNN for the current frame and store them to current frame state outprob cache */ boolean dnn_calc_outprob(HMMWork *wrk) { + /* frame = wrk->OP_time */ + /* param = wrk->OP_param */ + /* input vector = wrk->OP_param[wrk->OP_time][] */ + /* store state outprob to wrk->last_cache[] */ + + printf("%d\n", wrk->OP_time); + + { + int s; + for (s = 0; s < wrk->statenum; s++) { + wrk->last_cache[s] = 0.0f; + } + } } diff --git a/libsent/src/phmm/outprob.c b/libsent/src/phmm/outprob.c index f373274f..6baf3d5c 100644 --- a/libsent/src/phmm/outprob.c +++ b/libsent/src/phmm/outprob.c @@ -215,6 +215,18 @@ outprob_state(HMMWork *wrk, int t, HTK_HMM_State *stateinfo, HTK_Param *param) return(param->parvec[t][sid]); } + if (wrk->OP_dnn != NULL) { + /* for DNN, if the frame is not computed yet, batch-compute for the frame and save them to current cache */ + s = wrk->OP_hmminfo->ststart; + if (wrk->last_cache[s->id] == LOG_UNDEF) { + dnn_calc_outprob(wrk); + } + wrk->OP_state = stateinfo; + wrk->OP_state_id = sid; + /* consult cache and return the state prob */ + return(wrk->last_cache[sid]); + } + if (wrk->batch_computation) { /* batch computation: if the frame is not computed yet, pre-compute all */ s = wrk->OP_hmminfo->ststart; diff --git a/libsent/src/phmm/outprob_dnn.c b/libsent/src/phmm/outprob_dnn.c deleted file mode 100644 index 30c8570b..00000000 --- a/libsent/src/phmm/outprob_dnn.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 1991-2016 Kawahara Lab., Kyoto University - * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - * Copyright (c) 2005-2016 Julius project team, Nagoya Institute of Technology - * All rights reserved - */ - -#include -#include -#include -#include -#include -#include - -#define LOG_UNDEF (LOG_ZERO - 1) ///< Value to be used as the initial cache value - -/** - * Initialize the cache data, should be called once on startup. - * - * @param wrk [i/o] HMM computation work area - * - * @return TRUE on success, FALSE on failure. - */ -boolean -outprob_cache_init(HMMWork *wrk) -{ - wrk->statenum = wrk->OP_hmminfo->totalstatenum; - wrk->outprob_cache = NULL; - wrk->outprob_allocframenum = 0; - wrk->OP_time = -1; - wrk->croot = NULL; - return TRUE; -} - -/** - * Prepare cache for the next input, by clearing the existing cache. - * - * @param wrk [i/o] HMM computation work area - * - * @return TRUE on success, FALSE on failure. - */ -boolean -outprob_cache_prepare(HMMWork *wrk) -{ - int s,t; - - /* clear already allocated area */ - for (t = 0; t < wrk->outprob_allocframenum; t++) { - for (s = 0; s < wrk->statenum; s++) { - wrk->outprob_cache[t][s] = LOG_UNDEF; - } - } - - return TRUE; -} - -/** - * Expand the cache to time axis if needed. - * - * @param wrk [i/o] HMM computation work area - * @param reqframe [in] required frame length - */ -static void -outprob_cache_extend(HMMWork *wrk, int reqframe) -{ - int newnum; - int size; - int t, s; - LOGPROB *tmpp; - - /* if enough length are already allocated, return immediately */ - if (reqframe < wrk->outprob_allocframenum) return; - - /* allocate per certain period */ - newnum = reqframe + 1; - if (newnum < wrk->outprob_allocframenum + OUTPROB_CACHE_PERIOD) newnum = wrk->outprob_allocframenum + OUTPROB_CACHE_PERIOD; - size = (newnum - wrk->outprob_allocframenum) * wrk->statenum; - - /* allocate */ - if (wrk->outprob_cache == NULL) { - wrk->outprob_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * newnum); - } else { - wrk->outprob_cache = (LOGPROB **)myrealloc(wrk->outprob_cache, sizeof(LOGPROB *) * newnum); - } - tmpp = (LOGPROB *)mybmalloc2(sizeof(LOGPROB) * size, &(wrk->croot)); - /* clear the new part */ - for(t = wrk->outprob_allocframenum; t < newnum; t++) { - wrk->outprob_cache[t] = &(tmpp[(t - wrk->outprob_allocframenum) * wrk->statenum]); - for (s = 0; s < wrk->statenum; s++) { - wrk->outprob_cache[t][s] = LOG_UNDEF; - } - } - - /*jlog("outprob cache: %d->%d\n", outprob_allocframenum, newnum);*/ - wrk->outprob_allocframenum = newnum; -} - -/** - * Free work area for cache. - * - * @param wrk [i/o] HMM computation work area - * - */ -void -outprob_cache_free(HMMWork *wrk) -{ - if (wrk->croot != NULL) mybfree2(&(wrk->croot)); - if (wrk->outprob_cache != NULL) free(wrk->outprob_cache); -} - - -/** - * @brief Compute output probability of a state. - * - * Set the needed values to the global variables - * that begins with "OP_", and call calc_outprob_state(). The - * calc_outprob_state() is actually a function pointer, and the entity is - * either calc_tied_mix() for tied-mixture model and calc_mix() for others. - * (If you use GMS, the entity will be gms_state() instead.) - * - * The state-level cache is also consulted here. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] time frame - * @param stateinfo [in] state information to compute the output probability - * @param param [in] input parameter vectors - * - * @return output log probability. - */ -LOGPROB -outprob_state(HMMWork *wrk, int t, HTK_HMM_State *stateinfo, HTK_Param *param) -{ - LOGPROB outp; - int sid; - int i, d; - HTK_HMM_State *s; - - sid = stateinfo->id; - - /* set global values for outprob functions to access them */ - wrk->OP_state = stateinfo; - wrk->OP_state_id = sid; - wrk->OP_param = param; - if (wrk->OP_time != t) { - wrk->OP_last_time = wrk->OP_time; - wrk->OP_time = t; - for(d=0,i=0;iOP_nstream;i++) { - wrk->OP_vec_stream[i] = &(param->parvec[t][d]); - d += wrk->OP_veclen_stream[i]; - } - - outprob_cache_extend(wrk, t); /* extend cache if needed */ - wrk->last_cache = wrk->outprob_cache[t]; /* reduce 2-d array access */ - } - - if (param->is_outprob) { - /* return the param as output probability */ - if (sid >= param->veclen) { - jlog("Error: state id in the dummy HMM exceeds vector length (%d > %d)\n", sid, param->veclen); - return(LOG_ZERO); - } - return(param->parvec[t][sid]); - } - - if (wrk->batch_computation) { - /* batch computation: if the frame is not computed yet, pre-compute all */ - s = wrk->OP_hmminfo->ststart; - if (wrk->last_cache[s->id] == LOG_UNDEF) { - for (; s; s = s->next) { - wrk->OP_state = s; - wrk->OP_state_id = s->id; - wrk->last_cache[s->id] = (*(wrk->calc_outprob_state))(wrk); - } - } - wrk->OP_state = stateinfo; - wrk->OP_state_id = sid; - } - - /* consult cache */ - if ((outp = wrk->last_cache[sid]) == LOG_UNDEF) { - outp = wrk->last_cache[sid] = (*(wrk->calc_outprob_state))(wrk); - } - return(outp); -} - -/** - * Initialize work area for outprob_cd_nbest(). - * - * @param wrk [i/o] HMM computation work area - * @param num [in] number of top states to be calculated. - */ -void -outprob_cd_nbest_init(HMMWork *wrk, int num) -{ - wrk->cd_nbest_maxprobs = (LOGPROB *)mymalloc(sizeof(LOGPROB) * num); - wrk->cd_nbest_maxn = num; -} - -/** - * Free work area for outprob_cd_nbest(). - * - * @param wrk [i/o] HMM computation work area - * - */ -void -outprob_cd_nbest_free(HMMWork *wrk) -{ - free(wrk->cd_nbest_maxprobs); -} - -/** - * Return average of N-beat outprob for pseudo state set. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] input frame - * @param lset [in] pseudo state set - * @param param [in] input parameter data - * - * @return outprob log probability, average of top N states in @a lset. - */ -static LOGPROB -outprob_cd_nbest(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) -{ - LOGPROB prob; - int i, k, n; - - n = 0; - for(i=0;inum;i++) { - prob = outprob_state(wrk, t, lset->s[i], param); - /*jlog("\t\t%d:%f\n", i, prob);*/ - if (prob <= LOG_ZERO) continue; - if (n == 0 || prob <= wrk->cd_nbest_maxprobs[n-1]) { - if (n == wrk->cd_nbest_maxn) continue; - wrk->cd_nbest_maxprobs[n] = prob; - n++; - } else { - for(k=0; k wrk->cd_nbest_maxprobs[k]) { - memmove(&(wrk->cd_nbest_maxprobs[k+1]), &(wrk->cd_nbest_maxprobs[k]), - sizeof(LOGPROB) * (n - k - ( (n == wrk->cd_nbest_maxn) ? 1 : 0))); - wrk->cd_nbest_maxprobs[k] = prob; - break; - } - } - if (n < wrk->cd_nbest_maxn) n++; - } - } - prob = 0.0; - for(i=0;icd_nbest_maxprobs[i]);*/ - prob += wrk->cd_nbest_maxprobs[i]; - } - return(prob/(float)n); -} - -/** - * Return maximum outprob of the pseudo state set. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] input frame - * @param lset [in] pseudo state set - * @param param [in] input parameter data - * - * @return maximum output log probability among states in @a lset. - */ -static LOGPROB -outprob_cd_max(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) -{ - LOGPROB maxprob, prob; - int i; - - maxprob = LOG_ZERO; - for(i=0;inum;i++) { - prob = outprob_state(wrk, t, lset->s[i], param); - if (maxprob < prob) maxprob = prob; - } - return(maxprob); -} - -/** - * Return average outprob of the pseudo state set. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] input frame - * @param lset [in] pseudo state set - * @param param [in] input parameter data - * - * @return average output log probability of states in @a lset. - */ -static LOGPROB -outprob_cd_avg(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) -{ - LOGPROB sum, p; - int i,j; - sum = 0.0; - j = 0; - for(i=0;inum;i++) { - p = outprob_state(wrk, t, lset->s[i], param); - if (p > LOG_ZERO) { - sum += p; - j++; - } - } - return(sum/(float)j); -} - -/** - * Compute the log output probability of a pseudo state set. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] input frame - * @param lset [in] pseudo state set - * @param param [in] input parameter data - * - * @return the computed log output probability. - */ -LOGPROB -outprob_cd(HMMWork *wrk, int t, CD_State_Set *lset, HTK_Param *param) -{ - LOGPROB ret; - - /* select computation method */ - switch(wrk->OP_hmminfo->cdset_method) { - case IWCD_AVG: - ret = outprob_cd_avg(wrk, t, lset, param); - break; - case IWCD_MAX: - ret = outprob_cd_max(wrk, t, lset, param); - break; - case IWCD_NBEST: - ret = outprob_cd_nbest(wrk, t, lset, param); - break; - } - return(ret); -} - - -/** - * Top function to compute the output probability of a HMM state. - * - * @param wrk [i/o] HMM computation work area - * @param t [in] input frame - * @param hmmstate [in] HMM state - * @param param [in] input parameter data - * - * @return the computed log output probability. - */ -LOGPROB -outprob(HMMWork *wrk, int t, HMM_STATE *hmmstate, HTK_Param *param) -{ - if (hmmstate->is_pseudo_state) { - return(outprob_cd(wrk, t, hmmstate->out.cdset, param)); - } else { - return(outprob_state(wrk, t, hmmstate->out.state, param)); - } -} - - - - -static boolean -mywrite(char *buf, size_t unitbyte, int unitnum, FILE *fp, boolean needswap) -{ - size_t tmp; - - if (needswap) swap_bytes(buf, unitbyte, unitnum); - if ((tmp = myfwrite(buf, unitbyte, unitnum, fp)) < (size_t)unitnum) { - jlog("Error: outprob_cache_output: failed to write %d bytes\n", unitbyte * unitnum); - return(FALSE); - } - // if (needswap) swap_bytes(buf, unitbyte, unitnum); - return(TRUE); -} - -boolean -outprob_cache_output(FILE *fp, HMMWork *wrk, int framenum) -{ - int s,t; - boolean needswap; - -#ifdef WORDS_BIGENDIAN - needswap = FALSE; -#else /* LITTLE ENDIAN */ - needswap = TRUE; -#endif - - needswap = TRUE; - - if (wrk->outprob_allocframenum < framenum) { - jlog("Error: outprob_cache_output: framenum > allocated (%d > %d)\n", framenum, wrk->outprob_allocframenum); - return FALSE; - } - - { - unsigned int ui; - unsigned short us; - short st; - float f; - - jlog("Stat: outprob_cache_output: %d states, %d samples\n", wrk->statenum, framenum); - - ui = framenum; - if (!mywrite((char *)&ui, sizeof(unsigned int), 1, fp, needswap)) return FALSE; - ui = wrk->OP_param->header.wshift; - if (!mywrite((char *)&ui, sizeof(unsigned int), 1, fp, needswap)) return FALSE; - us = wrk->statenum * sizeof(float); - if (!mywrite((char *)&us, sizeof(unsigned short), 1, fp, needswap)) return FALSE; - st = F_USER; - if (!mywrite((char *)&st, sizeof(short), 1, fp, needswap)) return FALSE; - - for (t = 0; t < framenum; t++) { - for (s = 0; s < wrk->statenum; s++) { - f = wrk->outprob_cache[t][s]; - if (!mywrite((char *)&f, sizeof(float), 1, fp, needswap)) return FALSE; - } - } - } - - return TRUE; -} From ae703af10704bf34d1b0eccbb4c5dff1c98f4180 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Thu, 25 Aug 2016 16:15:50 +0900 Subject: [PATCH 05/20] added feature extraction, splicing, and prob calculation. has bug in feature extraction, need comparison with adintool --- libjulius/include/julius/jconf.h | 2 ++ libjulius/include/julius/recog.h | 14 ++++++++ libjulius/src/default.c | 2 ++ libjulius/src/instance.c | 10 ++++++ libjulius/src/m_fusion.c | 17 ++++++--- libjulius/src/m_info.c | 5 +++ libjulius/src/m_jconf.c | 6 +++- libjulius/src/m_options.c | 5 +++ libjulius/src/realtime-1stpass.c | 61 +++++++++++++++++++++++++++++--- libsent/src/phmm/calc_dnn.c | 50 +++++++++++++++++++++++--- 10 files changed, 157 insertions(+), 15 deletions(-) diff --git a/libjulius/include/julius/jconf.h b/libjulius/include/julius/jconf.h index 9620e677..0d851796 100644 --- a/libjulius/include/julius/jconf.h +++ b/libjulius/include/julius/jconf.h @@ -219,6 +219,8 @@ typedef struct __jconf_am__ { */ struct { boolean enabled; + short paramtype; /* feature type */ + char *optionstring; /* feature extraction option string */ int veclen; /* vector length */ int contextlen; /* context length */ int inputnodes; /* number of input nodes (should match veclen * contextlen) */ diff --git a/libjulius/include/julius/recog.h b/libjulius/include/julius/recog.h index 6790b493..c6895d28 100644 --- a/libjulius/include/julius/recog.h +++ b/libjulius/include/julius/recog.h @@ -633,11 +633,25 @@ typedef struct __mfcc_calc__ { * */ DeltaBuf *db; + /** * accel MFCC cycle buffer * */ DeltaBuf *ab; + + /** + * splice cycle buffer + * + */ + VECT *splicedmfcc; + int splicedlen; + + /** + * splice number + */ + int splice; + /** * working buffer holding current computing mfcc vector * diff --git a/libjulius/src/default.c b/libjulius/src/default.c index ffb91ec6..229735bf 100644 --- a/libjulius/src/default.c +++ b/libjulius/src/default.c @@ -154,6 +154,8 @@ jconf_set_default_values_am(JCONF_AM *j) j->frontend.sscalc_len = 300; j->frontend.ssload_filename = NULL; j->dnn.enabled = FALSE; + j->dnn.paramtype = F_ERR_INVALID; + j->dnn.optionstring = NULL; j->dnn.veclen = 0; j->dnn.contextlen = 0; j->dnn.inputnodes = 0; diff --git a/libjulius/src/instance.c b/libjulius/src/instance.c index 5b8ed0b0..2479b064 100644 --- a/libjulius/src/instance.c +++ b/libjulius/src/instance.c @@ -56,6 +56,11 @@ j_mfcccalc_new(JCONF_AM *amconf) mfcc->para = &(amconf->analysis.para); mfcc->hmm_loaded = (amconf->analysis.para_hmm.loaded == 1) ? TRUE : FALSE; mfcc->htk_loaded = (amconf->analysis.para_htk.loaded == 1) ? TRUE : FALSE; + if (amconf->dnn.enabled) { + mfcc->splice = amconf->dnn.contextlen; + } else { + mfcc->splice = 1; + } mfcc->wrk = WMP_work_new(mfcc->para); if (mfcc->wrk == NULL) { jlog("ERROR: j_mfcccalc_new: failed to initialize feature computation\n"); @@ -94,6 +99,7 @@ j_mfcccalc_free(MFCCCalc *mfcc) if (mfcc->rest_param) free_param(mfcc->rest_param); if (mfcc->param) free_param(mfcc->param); if (mfcc->wrk) WMP_free(mfcc->wrk); + if (mfcc->splicedmfcc) free(mfcc->splicedmfcc); if (mfcc->tmpmfcc) free(mfcc->tmpmfcc); if (mfcc->db) WMP_deltabuf_free(mfcc->db); if (mfcc->ab) WMP_deltabuf_free(mfcc->ab); @@ -362,6 +368,10 @@ void j_jconf_am_free(JCONF_AM *amconf) { int i; + + if (amconf->dnn.optionstring) + free(amconf->dnn.optionstring); + if (amconf->dnn.wfile) { for (i = 0; i < amconf->dnn.hiddenlayernum; i++) { free(amconf->dnn.wfile[i]); diff --git a/libjulius/src/m_fusion.c b/libjulius/src/m_fusion.c index d2e8acbc..17c758ae 100644 --- a/libjulius/src/m_fusion.c +++ b/libjulius/src/m_fusion.c @@ -125,8 +125,13 @@ initialize_HMM(JCONF_AM *amconf, Jconf *jconf) hmminfo_free(hmminfo); return NULL; } - /* set acoustic analysis parameters from HMM header */ - calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); + if (amconf->dnn.enabled) { + /* for DNN, use dnnconf */ + calc_para_from_header(&(amconf->analysis.para), amconf->dnn.paramtype, amconf->dnn.veclen); + } else { + /* set acoustic analysis parameters from HMM header */ + calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); + } } /* check if tied_mixture */ if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) { @@ -163,7 +168,9 @@ initialize_HMM(JCONF_AM *amconf, Jconf *jconf) hmminfo->cdmax_num = amconf->iwcdmaxn; if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk)); - if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm)); + if (amconf->dnn.enabled == FALSE) { /* disable HMMDEFS-side parameter check on DNN */ + if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm)); + } apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default)); return(hmminfo); @@ -884,7 +891,9 @@ mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc) s1 = amconf->frontend.ssload_filename; s2 = mfcc->frontend.ssload_filename; if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { - return TRUE; + if (amconf->dnn.enabled == FALSE || amconf->dnn.contextlen == mfcc->splice) { + return TRUE; + } } } } diff --git a/libjulius/src/m_info.c b/libjulius/src/m_info.c index 7e191081..b34bfd4a 100644 --- a/libjulius/src/m_info.c +++ b/libjulius/src/m_info.c @@ -223,6 +223,11 @@ print_mfcc_info(FILE *fp, MFCCCalc *mfcc, Jconf *jconf) jlog(" Julius defaults"); } jlog("\n"); + + if (mfcc->splice > 1) { + jlog("\t frame splicing = %d\n", mfcc->splice); + jlog("\n"); + } } diff --git a/libjulius/src/m_jconf.c b/libjulius/src/m_jconf.c index 4e1e4b04..e17c11ee 100644 --- a/libjulius/src/m_jconf.c +++ b/libjulius/src/m_jconf.c @@ -599,7 +599,11 @@ dnn_config_file_parse(char *filename, JCONF_AM *am) v = p; while (*v == ' ') v++; *p = '\0'; - if (strmatch(buf, "feature_len")) am->dnn.veclen = atoi(v); + if (strmatch(buf, "feature_type")) { + am->dnn.paramtype = param_str2code(v); + } else if (strmatch(buf, "feature_options")) { + am->dnn.optionstring = strdup(v); + } else if (strmatch(buf, "feature_len")) am->dnn.veclen = atoi(v); else if (strmatch(buf, "context_len")) am->dnn.contextlen = atoi(v); else if (strmatch(buf, "input_nodes")) am->dnn.inputnodes = atoi(v); else if (strmatch(buf, "output_nodes")) am->dnn.outputnodes = atoi(v); diff --git a/libjulius/src/m_options.c b/libjulius/src/m_options.c index 5eaa4676..6cf73a2d 100644 --- a/libjulius/src/m_options.c +++ b/libjulius/src/m_options.c @@ -1338,6 +1338,11 @@ opt_parse(int argc, char *argv[], char *cwd, Jconf *jconf) free(tmparg); return FALSE; } + if (jconf->amnow->dnn.optionstring) { + if (config_string_parse(jconf->amnow->dnn.optionstring, jconf) == FALSE) { + return FALSE; + } + } free(tmparg); continue; } diff --git a/libjulius/src/realtime-1stpass.c b/libjulius/src/realtime-1stpass.c index c97a3823..ec9b3564 100644 --- a/libjulius/src/realtime-1stpass.c +++ b/libjulius/src/realtime-1stpass.c @@ -174,7 +174,7 @@ init_param(MFCCCalc *mfcc) mfcc->param->header.wshift = para->smp_period * para->frameshift; mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ - mfcc->param->veclen = para->veclen; + mfcc->param->veclen = para->veclen * mfcc->splice; /* ǧ¼±½èÍýÃæ/½ªÎ»¸å¤Ë¥»¥Ã¥È¤µ¤ì¤ëÊÑ¿ô: param->parvec (¥Ñ¥é¥á¡¼¥¿¥Ù¥¯¥È¥ë·ÏÎó) @@ -249,7 +249,6 @@ RealTimeInit(Recog *recog) mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; } } - for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { para = mfcc->para; @@ -264,6 +263,12 @@ RealTimeInit(Recog *recog) /* ¥Ç¥ë¥¿·×»»¤Î¤¿¤á¤Î¥ï¡¼¥¯¥¨¥ê¥¢¤ò³ÎÊÝ */ /* allocate work area for the delta computation */ mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen); + /* splice buffer */ + if (mfcc->splice > 1) { + printf("para->veclen=%d\n", para->veclen); + mfcc->splicedmfcc = (VECT *)mymalloc(sizeof(VECT) * para->veclen * mfcc->splice); + mfcc->splicedlen = 0; + } /* MAP-CMN ÍѤνé´ü¥±¥×¥¹¥È¥é¥àÊ¿¶Ñ¤òÆɤ߹þ¤ó¤Ç½é´ü²½¤¹¤ë */ /* Initialize the initial cepstral mean data from file for MAP-CMN */ if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight); @@ -334,6 +339,7 @@ reset_mfcc(Recog *recog) /* set the delta cycle buffer */ if (para->delta) WMP_deltabuf_prepare(mfcc->db); if (para->acc) WMP_deltabuf_prepare(mfcc->ab); + if (mfcc->splice > 1) mfcc->splicedlen = 0; } } @@ -401,6 +407,7 @@ RealTimePipeLinePrepare(Recog *recog) /* check type coherence between param and hmminfo here */ if (recog->jconf->input.paramtype_check_flag) { for(am=recog->amlist;am;am=am->next) { + if (am->config->dnn.enabled) continue; if (!check_param_coherence(am->hmminfo, am->mfcc->param)) { jlog("ERROR: input parameter type does not match AM\n"); return FALSE; @@ -436,6 +443,24 @@ RealTimePipeLinePrepare(Recog *recog) return TRUE; } +/* splice */ +static boolean +splice_mfcc(MFCCCalc *mfcc) +{ + if (mfcc->splicedlen >= mfcc->splice) { + memmove(mfcc->splicedmfcc, &(mfcc->splicedmfcc[mfcc->para->veclen]), (mfcc->splice - 1) * mfcc->para->veclen); + mfcc->splicedlen --; + } + memcpy(&(mfcc->splicedmfcc[mfcc->para->veclen * mfcc->splicedlen]), mfcc->tmpmfcc, sizeof(VECT) * mfcc->para->veclen); + mfcc->splicedlen++; + if (mfcc->splicedlen < mfcc->splice) { + /* if ret == FALSE, there is no available frame. So just wait for + next input */ + return FALSE; + } + return TRUE; +} + /** * * @brief ²»À¼ÇÈ·Á¤«¤é¥Ñ¥é¥á¡¼¥¿¥Ù¥¯¥È¥ë¤ò·×»»¤¹¤ë. @@ -569,6 +594,16 @@ RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) /* perform CMN */ if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); + for (i = 0; i < para->vecbuflen; i++) { + printf(" %f", tmpmfcc[i]); + } + printf("\n"); + + /* splice */ + if (mfcc->splice > 1) { + return(splice_mfcc(mfcc)); + } + return TRUE; } @@ -786,6 +821,7 @@ RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = int i, now, ret; MFCCCalc *mfcc; RealBeam *r; + VECT *mfccvec; r = &(recog->real); @@ -839,9 +875,14 @@ RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = /* calculate a parameter vector from current waveform windows and store to r->tmpmfcc */ if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) { + if (mfcc->splice > 1) { + mfccvec = mfcc->splicedmfcc; + } else { + mfccvec = mfcc->tmpmfcc; + } #ifdef ENABLE_PLUGIN /* call post-process plugin if exist */ - plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f); + plugin_exec_vector_postprocess(mfccvec, mfcc->param->veclen, mfcc->f); #endif /* MFCC´°À®¡¤ÅÐÏ¿ */ mfcc->valid = TRUE; @@ -850,7 +891,7 @@ RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); return -1; } - memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); + memcpy(mfcc->param->parvec[mfcc->f], mfccvec, sizeof(VECT) * mfcc->param->veclen); #ifdef RDEBUG printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f); #endif @@ -1114,6 +1155,7 @@ RealTimeParam(Recog *recog) #ifdef RDEBUG int i; #endif + VECT *mfccvec; r = &(recog->real); @@ -1251,12 +1293,21 @@ RealTimeParam(Recog *recog) } /* a new frame has been obtained from delta buffer to tmpmfcc */ if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc); + /* splice */ + if (mfcc->splice > 1) { + if (splice_mfcc(mfcc) == FALSE) { + continue; + } + mfccvec = mfcc->splicedmfcc; + } else { + mfccvec = mfcc->tmpmfcc; + } if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); return FALSE; } /* store to mfcc->f */ - memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); + memcpy(mfcc->param->parvec[mfcc->f], mfccvec, sizeof(VECT) * mfcc->param->veclen); #ifdef ENABLE_PLUGIN /* call postprocess plugin if any */ plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f); diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index dbf56871..86176b7d 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -317,6 +317,8 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int return FALSE; } dnn->state_prior[id] = val * prior_factor; + // log10-nize prior + dnn->state_prior[id] = log10(dnn->state_prior[id]); } fclose(fp); jlog("Stat: dnn_init: state prior loaded: %s\n", priorfile); @@ -362,18 +364,56 @@ void dnn_ff(DNNData *dnn, float *in, float *out_ret) /* compute outprob by DNN for the current frame and store them to current frame state outprob cache */ boolean dnn_calc_outprob(HMMWork *wrk) { + int hidx, i, j, d, n; + float *src, *dst; + DNNLayer *h; + float x; + DNNData *dnn = wrk->OP_dnn; + /* frame = wrk->OP_time */ /* param = wrk->OP_param */ /* input vector = wrk->OP_param[wrk->OP_time][] */ /* store state outprob to wrk->last_cache[] */ - printf("%d\n", wrk->OP_time); + printf("%d %d\n", wrk->OP_time, wrk->OP_param->veclen); + for (i = 0; i < wrk->OP_param->veclen; i++) { + printf("%4d: %f\n", i, wrk->OP_param->parvec[wrk->OP_time][i]); + } - { - int s; - for (s = 0; s < wrk->statenum; s++) { - wrk->last_cache[s] = 0.0f; + /* feed forward through hidden layers by standard logistic function */ + src = &(wrk->OP_param->parvec[wrk->OP_time][0]); + n = 0; + for (hidx = 0; hidx < dnn->hnum; hidx++) { + h = &(dnn->h[hidx]); + d = 0; + for (i = 0; i < h->out; i++) { + x = 0.0f; + for (j = 0; j < h->in; j++) { + x += h->w[d] * src[j]; + d++; + } + x += h->b[i]; + dnn->work[n][i] = logistic_func(x); } + src = dnn->work[n]; + if (++n > 1) n = 0; + } + /* output layer */ + d = 0; + for (i = 0; i < dnn->o.out; i++) { + x = 0.0f; + for (j = 0; j < dnn->o.in; j++) { + x += dnn->o.w[d] * src[j]; + d++; + } + x += dnn->o.b[i]; + wrk->last_cache[i] = logistic_func(x); + } + /* do softmax */ + /* INV_LOG_TEN * (x - addlogarray(x)) - log10(state_prior)) */ + float logprob = addlog_array(wrk->last_cache, wrk->statenum); + for (i = 0; i < wrk->statenum; i++) { + wrk->last_cache[i] = INV_LOG_TEN * (wrk->last_cache[i] - logprob) - dnn->state_prior[i]; } } From 43609840168332dcc4fa636a419947e277b67037 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 01:55:35 +0900 Subject: [PATCH 06/20] fix codes, now works but slow and a little wierd cmn --- adintool/mainloop.c | 2 +- libjulius/src/m_fusion.c | 4 ++ libjulius/src/realtime-1stpass.c | 8 +-- libsent/src/phmm/calc_dnn.c | 107 +++++++++++++++++++------------ libsent/src/wav2mfcc/para.c | 8 ++- 5 files changed, 77 insertions(+), 52 deletions(-) diff --git a/adintool/mainloop.c b/adintool/mainloop.c index 97fa105f..34ec6941 100644 --- a/adintool/mainloop.c +++ b/adintool/mainloop.c @@ -349,7 +349,7 @@ vecnet_sub(SP16 *Speech, int nowlen, Recog *recog) #if 0 { int i; - for (i = 0; i < vecnet_veclen; i++) { + for (i = 0; i < a->conf.vecnet_veclen; i++) { printf(" %f", mfcc->tmpmfcc[i]); } printf("\n"); diff --git a/libjulius/src/m_fusion.c b/libjulius/src/m_fusion.c index 17c758ae..7af9d5ee 100644 --- a/libjulius/src/m_fusion.c +++ b/libjulius/src/m_fusion.c @@ -564,6 +564,10 @@ j_load_am(Recog *recog, JCONF_AM *amconf) jlog("ERROR: m_fusion: cannnot allocate DNN memory area\n"); return FALSE; } + if (amconf->dnn.outputnodes != am->hmminfo->totalstatenum) { + jlog("ERROR: m_fusion: mismatch in DNN output and HMM states (%d != %d)\n", amconf->dnn.outputnodes, am->hmminfo->totalstatenum); + return FALSE; + } if (dnn_setup(am->dnn, amconf->dnn.veclen, amconf->dnn.contextlen, diff --git a/libjulius/src/realtime-1stpass.c b/libjulius/src/realtime-1stpass.c index ec9b3564..cbb852a0 100644 --- a/libjulius/src/realtime-1stpass.c +++ b/libjulius/src/realtime-1stpass.c @@ -265,7 +265,6 @@ RealTimeInit(Recog *recog) mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen); /* splice buffer */ if (mfcc->splice > 1) { - printf("para->veclen=%d\n", para->veclen); mfcc->splicedmfcc = (VECT *)mymalloc(sizeof(VECT) * para->veclen * mfcc->splice); mfcc->splicedlen = 0; } @@ -448,7 +447,7 @@ static boolean splice_mfcc(MFCCCalc *mfcc) { if (mfcc->splicedlen >= mfcc->splice) { - memmove(mfcc->splicedmfcc, &(mfcc->splicedmfcc[mfcc->para->veclen]), (mfcc->splice - 1) * mfcc->para->veclen); + memmove(mfcc->splicedmfcc, &(mfcc->splicedmfcc[mfcc->para->veclen]), sizeof(VECT) * (mfcc->splice - 1) * mfcc->para->veclen); mfcc->splicedlen --; } memcpy(&(mfcc->splicedmfcc[mfcc->para->veclen * mfcc->splicedlen]), mfcc->tmpmfcc, sizeof(VECT) * mfcc->para->veclen); @@ -594,11 +593,6 @@ RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) /* perform CMN */ if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); - for (i = 0; i < para->vecbuflen; i++) { - printf(" %f", tmpmfcc[i]); - } - printf("\n"); - /* splice */ if (mfcc->splice > 1) { return(splice_mfcc(mfcc)); diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index 86176b7d..afc8149d 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -154,7 +154,9 @@ static boolean load_npy(float *array, char *filename, int x, int y) /* standard logistic function value table: take range x[-6,6] */ /* table size: LOGISTIC_TABLE_FACTOR * 12 * 4 (bytes) */ #define LOGISTIC_TABLE_FACTOR 20000 -#define LOGISTIC_TABLE_MAX (12 * LOGISTIC_TABLE_FACTOR) +#define LOGISTIC_TABLE_MAX (16 * LOGISTIC_TABLE_FACTOR) +#define LOGISTIC_MIN 0.000334 +#define LOGISTIC_MAX 0.999666 static float logistic_table[LOGISTIC_TABLE_MAX+1]; /* logistic value table */ @@ -166,7 +168,7 @@ static void logistic_table_build() double x; for (i = 0; i <= LOGISTIC_TABLE_MAX; i++) { - x = (double)i / (double)LOGISTIC_TABLE_FACTOR - 6.0; + x = (double)i / (double)LOGISTIC_TABLE_FACTOR - 8.0; d = 1.0 / (1.0 + exp(-x)); logistic_table[i] = (float)d; } @@ -175,9 +177,9 @@ static void logistic_table_build() /* return logistic function value, consulting table */ static float logistic_func(float x) { - if (x <= -6.0f) return 0.0f; - if (x >= 6.0f) return 1.0f; - return logistic_table[(int)((x + 6.0f) * LOGISTIC_TABLE_FACTOR)]; + if (x <= -8.0f) return LOGISTIC_MIN; + if (x >= 8.0f) return LOGISTIC_MAX; + return logistic_table[(int)((x + 8.0f) * LOGISTIC_TABLE_FACTOR + 0.5)]; } /* initialize dnn layer */ @@ -233,7 +235,7 @@ void dnn_clear(DNNData *dnn) free(dnn->h); } if (dnn->state_prior) free(dnn->state_prior); - for (i = 0; i < 2; i++) { + for (i = 0; i < dnn->hnum; i++) { if (dnn->work[i]) free(dnn->work[i]); } memset(dnn, 0, sizeof(DNNData)); @@ -306,7 +308,7 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int for (i = 0; i < dnn->state_prior_num; i++) { dnn->state_prior[i] = 0.0f; } - if ((fp = fopen(priorfile, "rb")) == NULL) { + if ((fp = fopen(priorfile, "r")) == NULL) { jlog("Error: cannot open %s\n", priorfile); return FALSE; } @@ -325,44 +327,32 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int } /* allocate work area */ - dnn->work[0] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); - dnn->work[1] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); + for (i = 0; i < dnn->hnum; i++) { + dnn->work[i] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); + } return TRUE; } -#if 0 -void dnn_ff(DNNData *dnn, float *in, float *out_ret) +static void +sub1(float *dst, float *src, float *w, float *b, int out, int in) { - int n; - float *src, *dst; + float x; + float *s; + int i, j; - /* feed forward by standard logistic function */ - src = in; - n = 0; - for (int i = 0; i < dnn->hnum; i++) { - dnn->work[n] = logistic_func(dnn->h[i].w * src + dnn->h[i].b); - src = dnn->work[n]; - if (++n > 1) n = 0; + for (i = 0; i < out; i++) { + x = 0.0f; + s = src; + for (j = 0; j < in; j++) { + x += *(w++) * *(s++); + } + *(dst++) = x + *(b++); } - out_ret = dnn->o.w * src + dnn->o.b; - /* do soft max */ -#if 0 - /* log10( (exp(x)/sum(exp(x))) / state_prior) */ - out_ret = exp(out_ret); - allsum = sum(out_ret); - out_ret /= allsum; - out_ret /= dnn->state_prior; - out_ret = log10(out_ret); -#else - /* INV_LOG_TEN * (x - addlogarray(x) - log(state_prior)) */ -#endif } -#endif - /* compute outprob by DNN for the current frame and store them to current frame state outprob cache */ -boolean dnn_calc_outprob(HMMWork *wrk) +static boolean dnn_calc_outprob0(HMMWork *wrk) { int hidx, i, j, d, n; float *src, *dst; @@ -375,16 +365,12 @@ boolean dnn_calc_outprob(HMMWork *wrk) /* input vector = wrk->OP_param[wrk->OP_time][] */ /* store state outprob to wrk->last_cache[] */ - printf("%d %d\n", wrk->OP_time, wrk->OP_param->veclen); - for (i = 0; i < wrk->OP_param->veclen; i++) { - printf("%4d: %f\n", i, wrk->OP_param->parvec[wrk->OP_time][i]); - } - /* feed forward through hidden layers by standard logistic function */ src = &(wrk->OP_param->parvec[wrk->OP_time][0]); n = 0; for (hidx = 0; hidx < dnn->hnum; hidx++) { h = &(dnn->h[hidx]); + dst = dnn->work[n]; d = 0; for (i = 0; i < h->out; i++) { x = 0.0f; @@ -393,7 +379,7 @@ boolean dnn_calc_outprob(HMMWork *wrk) d++; } x += h->b[i]; - dnn->work[n][i] = logistic_func(x); + dst[i] = logistic_func(x); } src = dnn->work[n]; if (++n > 1) n = 0; @@ -407,7 +393,7 @@ boolean dnn_calc_outprob(HMMWork *wrk) d++; } x += dnn->o.b[i]; - wrk->last_cache[i] = logistic_func(x); + wrk->last_cache[i] = x; } /* do softmax */ /* INV_LOG_TEN * (x - addlogarray(x)) - log10(state_prior)) */ @@ -417,3 +403,40 @@ boolean dnn_calc_outprob(HMMWork *wrk) } } +boolean dnn_calc_outprob(HMMWork *wrk) +{ + int hidx, i, j, d, n; + float *src, *dst; + DNNLayer *h; + float x; + DNNData *dnn = wrk->OP_dnn; + + /* frame = wrk->OP_time */ + /* param = wrk->OP_param */ + /* input vector = wrk->OP_param[wrk->OP_time][] */ + /* store state outprob to wrk->last_cache[] */ + + /* feed forward through hidden layers by standard logistic function */ + n = 0; + src = &(wrk->OP_param->parvec[wrk->OP_time][0]); + dst = dnn->work[n]; + for (hidx = 0; hidx < dnn->hnum; hidx++) { + h = &(dnn->h[hidx]); + sub1(dst, src, h->w, h->b, h->out, h->in); + for (i = 0; i < h->out; i++) { + dst[i] = logistic_func(dst[i]); + } + src = dst; + //if (++n > 1) n = 0; + dst = dnn->work[++n]; + } + /* output layer */ + sub1(wrk->last_cache, src, dnn->o.w, dnn->o.b, dnn->o.out, dnn->o.in); + + /* do softmax */ + /* INV_LOG_TEN * (x - addlogarray(x)) - log10(state_prior)) */ + float logprob = addlog_array(wrk->last_cache, wrk->statenum); + for (i = 0; i < wrk->statenum; i++) { + wrk->last_cache[i] = INV_LOG_TEN * (wrk->last_cache[i] - logprob) - dnn->state_prior[i]; + } +} diff --git a/libsent/src/wav2mfcc/para.c b/libsent/src/wav2mfcc/para.c index ce99cecb..440cf0a3 100644 --- a/libsent/src/wav2mfcc/para.c +++ b/libsent/src/wav2mfcc/para.c @@ -352,8 +352,12 @@ calc_para_from_header(Value *para, short param_type, short vec_size) /* on filter-bank output, also overwrite the number of filterbank */ if (para->basetype == F_FBANK || para->basetype == F_MELSPEC) { if (para->fbank_num != dim) { - jlog("Warning: number of filterbank is set to %d, but AM requires %d\n", para->fbank_num, dim); - jlog("Warning: use value of AM: %d\n", dim); + if (para->fbank_num != -1) { + jlog("Warning: number of filterbank is set to %d, but computed number from vector length is %d\n", para->fbank_num, dim); + jlog("Warning: overwrite: %d -> %d\n", para->fbank_num, dim); + } else { + jlog("Warning: number of filterbank was set to %d, estimated from vector length (%d) and parameter type\n", dim, vec_size); + } para->fbank_num = dim; } } From 0dba913de58d777b767b6ac8321c1ce568eff493 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 13:34:23 +0900 Subject: [PATCH 07/20] use SIMD instruction (AVX) --- jclient-perl/jclient.pl | 0 libsent/include/sent/util.h | 2 ++ libsent/src/phmm/calc_dnn.c | 44 +++++++++++++++++++++++++++++++++++++ libsent/src/util/mymalloc.c | 30 +++++++++++++++++++++++++ 4 files changed, 76 insertions(+) mode change 100644 => 100755 jclient-perl/jclient.pl diff --git a/jclient-perl/jclient.pl b/jclient-perl/jclient.pl old mode 100644 new mode 100755 diff --git a/libsent/include/sent/util.h b/libsent/include/sent/util.h index 73b7ec94..47bc5172 100644 --- a/libsent/include/sent/util.h +++ b/libsent/include/sent/util.h @@ -88,6 +88,8 @@ void *mymalloc(size_t size); void *mymalloc_big(size_t elsize, size_t nelem); void *myrealloc(void *, size_t); void *mycalloc(size_t, size_t); +void *mymalloc_aligned(size_t size, size_t align); +void myfree_aligned(void *ptr); /* endian.c */ void swap_sample_bytes(SP16 *buf, int len); diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index afc8149d..9cb5d56d 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -11,6 +11,12 @@ #include #include +#define AVX + +#ifdef AVX +#include +#endif /* AVX */ + /************************************************************************/ /* .npy file load */ @@ -196,20 +202,36 @@ static boolean dnn_layer_load(DNNLayer *l, int in, int out, char *wfile, char *b { l->in = in; l->out = out; +#ifdef AVX + if (l->in % 8 != 0) { + jlog("Error: dnn_layer_load: input vector length is not 8-element aligned (%d)\n", l->in); + return FALSE; + } + /* memory block should be aligned for 32 bytes for AVX instructions */ + l->w = (float *)mymalloc_aligned(sizeof(float) * l->out * l->in, 32); + l->b = (float *)mymalloc_aligned(sizeof(float) * l->out, 32); +#else l->w = (float *)mymalloc(sizeof(float) * l->out * l->in); l->b = (float *)mymalloc(sizeof(float) * l->out); +#endif /* AVX */ if (! load_npy(l->w, wfile, l->in, l->out)) return FALSE; jlog("Stat: dnn_layer_load: loaded %s\n", wfile); if (! load_npy(l->b, bfile, l->out, 1)) return FALSE; jlog("Stat: dnn_layer_load: loaded %s\n", bfile); + return TRUE; } /* clear dnn layer */ static void dnn_layer_clear(DNNLayer *l) { +#ifdef AVX + if (l->w != NULL) myfree_aligned(l->w); + if (l->b != NULL) myfree_aligned(l->b); +#else if (l->w != NULL) free(l->w); if (l->b != NULL) free(l->b); +#endif /* AVX */ dnn_layer_init(l); } @@ -341,6 +363,27 @@ sub1(float *dst, float *src, float *w, float *b, int out, int in) float *s; int i, j; +#ifdef AVX + float *fstore; + fstore = (float *)mymalloc_aligned(32, 32); + + int n = in / 8; + for (i = 0; i < out; i++) { + x = 0.0f; + s = src; + for (j = 0; j < n; j++) { + __m256 v1 = _mm256_load_ps(w); + __m256 v2 = _mm256_load_ps(s); + __m256 result = _mm256_dp_ps(v1, v2, 0xff); + _mm256_store_ps(fstore, result); + x += fstore[0] + fstore[4]; + w += 8; + s += 8; + } + *(dst++) = x + *(b++); + } + myfree_aligned(fstore); +#else for (i = 0; i < out; i++) { x = 0.0f; s = src; @@ -349,6 +392,7 @@ sub1(float *dst, float *src, float *w, float *b, int out, int in) } *(dst++) = x + *(b++); } +#endif /* AVX */ } /* compute outprob by DNN for the current frame and store them to current frame state outprob cache */ diff --git a/libsent/src/util/mymalloc.c b/libsent/src/util/mymalloc.c index 67241ff1..54943c5d 100644 --- a/libsent/src/util/mymalloc.c +++ b/libsent/src/util/mymalloc.c @@ -133,3 +133,33 @@ mycalloc(size_t nelem, size_t elsize) return p; } +/* MMDFiles_alignedmalloc: aligned malloc*/ +void *mymalloc_aligned(size_t size, size_t align) +{ + void *ptr; +#if defined(_MSC_VER) + ptr = _aligned_malloc(size, align); +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + ptr = aligned_alloc(align, size); +#elif defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L + if (posix_memalign(&ptr, align, size) != 0) + ptr = NULL; +#else + ptr = malloc(size); +#endif // defined is defined, use the Windows stuff. + return ptr; +} + +/* MMDFiles_alignedfree: free aligned malloc */ +void myfree_aligned(void *ptr) +{ +#if defined(_MSC_VER) + _aligned_free(ptr); +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + free(ptr); +#elif defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L + free(ptr); +#else + free(ptr); +#endif // defined is defined, use the Windows stuff. +} From 586f4f68735c85d1ea0937f8b07ee940a9405aac Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 14:09:00 +0900 Subject: [PATCH 08/20] added calc_dnn.c --- msvc/libsent/libsent.vcxproj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/msvc/libsent/libsent.vcxproj b/msvc/libsent/libsent.vcxproj index 90d75538..e07f5c47 100644 --- a/msvc/libsent/libsent.vcxproj +++ b/msvc/libsent/libsent.vcxproj @@ -1,4 +1,4 @@ - + @@ -161,6 +161,7 @@ + @@ -209,4 +210,4 @@ - \ No newline at end of file + From e02f9985e0555a6312d12e457007c719a38701e4 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 18:45:41 +0900 Subject: [PATCH 09/20] check if compiler supports AVX SIMD instruction in configure --- libsent/configure | 52 +++++++++++++++++++++++++++++++++++++++++++- libsent/configure.in | 19 ++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/libsent/configure b/libsent/configure index 4117489d..118ed632 100755 --- a/libsent/configure +++ b/libsent/configure @@ -3274,6 +3274,57 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu +xxxxAVX=yes +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SIMD AVX instruction" >&5 +$as_echo_n "checking for SIMD AVX instruction... " >&6; } + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ +__m256 v; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +xxxxAVX=no + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +if test "$xxxxAVX" = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for SIMD AVX instruction with -mavx" >&5 +$as_echo_n "checking for SIMD AVX instruction with -mavx... " >&6; } + CFLAGS="$CFLAGS -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ +__m256 v; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + as_fn_error $? "no support for SIMD AVX instruction" "$LINENO" 5 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: @@ -3548,7 +3599,6 @@ case "$host_os" in esac - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : diff --git a/libsent/configure.in b/libsent/configure.in index 0aad85d6..85f644fc 100644 --- a/libsent/configure.in +++ b/libsent/configure.in @@ -124,6 +124,25 @@ dnl Checks for compiler. AC_PROG_CC AC_PROG_CPP +dnl Checks for AVX capability +xxxxAVX=yes +AC_MSG_CHECKING([for SIMD AVX instruction]) +AC_TRY_COMPILE([#include +],[__m256 v;], +AC_MSG_RESULT([yes]), +AC_MSG_RESULT([no]) +xxxxAVX=no +) +if test "$xxxxAVX" = no; then + dnl retry with "-mavx" option + AC_MSG_CHECKING([for SIMD AVX instruction with -mavx]) + CFLAGS="$CFLAGS -mavx" + AC_TRY_COMPILE([#include + ],[__m256 v;], + AC_MSG_RESULT([yes]), + AC_MSG_ERROR([no support for SIMD AVX instruction])) +fi + dnl Checks for programs. AC_PROG_INSTALL AC_PATH_PROG(RM, rm) From c892894d01f4cdff7088a4b6d0a25996179776f5 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 18:47:20 +0900 Subject: [PATCH 10/20] add option -cmnstatic to use loaded cepstral mean/variance for all frame without updating --- libjulius/include/julius/jconf.h | 4 ++++ libjulius/include/julius/recog.h | 4 ++++ libjulius/src/default.c | 1 + libjulius/src/instance.c | 1 + libjulius/src/m_chkparam.c | 4 ++++ libjulius/src/m_fusion.c | 3 ++- libjulius/src/m_info.c | 18 +++++++++++++++--- libjulius/src/m_options.c | 5 +++++ libjulius/src/m_usage.c | 3 ++- libjulius/src/realtime-1stpass.c | 2 +- libsent/include/sent/mfcc.h | 3 ++- libsent/src/wav2mfcc/wav2mfcc-pipe.c | 20 ++++++++++++++------ 12 files changed, 55 insertions(+), 13 deletions(-) diff --git a/libjulius/include/julius/jconf.h b/libjulius/include/julius/jconf.h index 0d851796..b8082171 100644 --- a/libjulius/include/julius/jconf.h +++ b/libjulius/include/julius/jconf.h @@ -159,6 +159,10 @@ typedef struct __jconf_am__ { * CMN: load initial cepstral mean from file at startup (-cmnload) */ char *cmnload_filename; + /** + * CMN: perform map-cmn + */ + boolean map_cmn; /** * CMN: update cepstral mean while recognition * (-cmnnoupdate to unset) diff --git a/libjulius/include/julius/recog.h b/libjulius/include/julius/recog.h index c6895d28..43f996f2 100644 --- a/libjulius/include/julius/recog.h +++ b/libjulius/include/julius/recog.h @@ -545,6 +545,10 @@ typedef struct __mfcc_calc__ { * CMN: load initial cepstral mean from file at startup (-cmnload) */ char *load_filename; + /** + * CMN: perform map cmn + */ + boolean map_cmn; /** * CMN: update cepstral mean while recognition * (-cmnnoupdate to unset) diff --git a/libjulius/src/default.c b/libjulius/src/default.c index 229735bf..e315b70d 100644 --- a/libjulius/src/default.c +++ b/libjulius/src/default.c @@ -145,6 +145,7 @@ jconf_set_default_values_am(JCONF_AM *j) make_default_para(&(j->analysis.para_default)); make_default_para_htk(&(j->analysis.para_htk)); j->analysis.cmnload_filename = NULL; + j->analysis.map_cmn = TRUE; j->analysis.cmn_update = TRUE; j->analysis.cmnsave_filename = NULL; j->analysis.cmn_map_weight = 100.0; diff --git a/libjulius/src/instance.c b/libjulius/src/instance.c index 2479b064..a1847c10 100644 --- a/libjulius/src/instance.c +++ b/libjulius/src/instance.c @@ -67,6 +67,7 @@ j_mfcccalc_new(JCONF_AM *amconf) return NULL; } mfcc->cmn.load_filename = amconf->analysis.cmnload_filename; + mfcc->cmn.map_cmn = amconf->analysis.map_cmn; mfcc->cmn.update = amconf->analysis.cmn_update; mfcc->cmn.save_filename = amconf->analysis.cmnsave_filename; mfcc->cmn.map_weight = amconf->analysis.cmn_map_weight; diff --git a/libjulius/src/m_chkparam.c b/libjulius/src/m_chkparam.c index f13e6845..e661ad84 100644 --- a/libjulius/src/m_chkparam.c +++ b/libjulius/src/m_chkparam.c @@ -239,6 +239,10 @@ j_jconf_finalize(Jconf *jconf) jlog("ERROR: m_chkparam: when \"-cmnnoupdate\", initial cepstral normalisation data should be given by \"-cmnload\"\n"); ok_p = FALSE; } + if (am->analysis.map_cmn == FALSE && am->analysis.cmnload_filename == NULL) { + jlog("ERROR: m_chkparam: with \"-cmnstatic\", the static cepstral mean (and variance) should be given by \"-cmnload\"\n"); + ok_p = FALSE; + } } } diff --git a/libjulius/src/m_fusion.c b/libjulius/src/m_fusion.c index 7af9d5ee..97f46d8f 100644 --- a/libjulius/src/m_fusion.c +++ b/libjulius/src/m_fusion.c @@ -887,6 +887,7 @@ mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc) s2 = mfcc->cmn.save_filename; if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { if (amconf->analysis.cmn_update == mfcc->cmn.update + && amconf->analysis.map_cmn == mfcc->cmn.map_cmn && amconf->analysis.cmn_map_weight == mfcc->cmn.map_weight) { if (amconf->frontend.ss_alpha == mfcc->frontend.ss_alpha && amconf->frontend.ss_floor == mfcc->frontend.ss_floor @@ -1437,7 +1438,7 @@ j_final_fusion(Recog *recog) for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->cmn.load_filename) { if (mfcc->para->cmn || mfcc->para->cvn) { - mfcc->cmn.wrk = CMN_realtime_new(mfcc->para, mfcc->cmn.map_weight); + mfcc->cmn.wrk = CMN_realtime_new(mfcc->para, mfcc->cmn.map_weight, mfcc->cmn.map_cmn); if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) { jlog("ERROR: m_fusion: failed to read initial cepstral mean from \"%s\"\n", mfcc->cmn.load_filename); return FALSE; diff --git a/libjulius/src/m_info.c b/libjulius/src/m_info.c index b34bfd4a..e6d582a1 100644 --- a/libjulius/src/m_info.c +++ b/libjulius/src/m_info.c @@ -167,14 +167,26 @@ print_mfcc_info(FILE *fp, MFCCCalc *mfcc, Jconf *jconf) if (mfcc->para->cmn) { jlog("yes, "); if (jconf->decodeopt.realtime_flag) { - jlog("real-time MAP-CMN, updating mean with last %.1f sec. input\n"); - jlog(" initial mean from file = "); + if (mfcc->cmn.update == TRUE) { + jlog("real-time MAP-CMN, updating initial mean with last %d input frames\n", CPMAX); + jlog(" initial mean from file = "); + } else { + if (mfcc->cmn.map_cmn == TRUE) { + jlog("real-time MAP-CMN, static initial mean\n"); + jlog(" initial mean from file = "); + } else { + jlog("CMN with static mean\n"); + jlog(" static mean from file = "); + } + } if (mfcc->cmn.loaded) { jlog("%s\n", mfcc->cmn.load_filename); } else { jlog("N/A\n"); } - jlog(" beginning data weight = %6.2f\n", mfcc->cmn.map_weight); + if (mfcc->cmn.map_cmn == TRUE) { + jlog(" beginning data weight = %6.2f\n", mfcc->cmn.map_weight); + } } else { if (mfcc->cmn.loaded) { jlog("with a static mean\n"); diff --git a/libjulius/src/m_options.c b/libjulius/src/m_options.c index 6cf73a2d..4b3086f9 100644 --- a/libjulius/src/m_options.c +++ b/libjulius/src/m_options.c @@ -1148,6 +1148,11 @@ opt_parse(int argc, char *argv[], char *cwd, Jconf *jconf) if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.cmn_update = FALSE; continue; + } else if (strmatch(argv[i],"-cmnstatic")) { /* no map, just use static parameter */ + if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; + jconf->amnow->analysis.map_cmn = FALSE; + jconf->amnow->analysis.cmn_update = FALSE; + continue; } else if (strmatch(argv[i],"-cmnmapweight")) { /* CMN weight for MAP */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; diff --git a/libjulius/src/m_usage.c b/libjulius/src/m_usage.c index da32c7dd..8b19942d 100644 --- a/libjulius/src/m_usage.c +++ b/libjulius/src/m_usage.c @@ -204,7 +204,8 @@ j_output_argument_help(FILE *fp) fprintf(fp, " [-usepower/-nousepower] use power in fbank analysis (OFF)\n"); fprintf(fp, " [-cmnload file] load initial CMN param from file on startup\n"); fprintf(fp, " [-cmnsave file] save CMN param to file after each input\n"); - fprintf(fp, " [-cmnnoupdate] not update CMN param while recog. (use with -cmnload)\n"); + fprintf(fp, " [-cmnstatic] no MAP, use static CMN (use with -cmnload)\n"); + fprintf(fp, " [-cmnnoupdate] not update initial param while recog. (use with -cmnload)\n"); fprintf(fp, " [-cmnmapweight] weight value of initial cm for MAP-CMN (%6.2f)\n", jconf->am_root->analysis.cmn_map_weight); fprintf(fp, " [-cvn] cepstral variance normalisation (%s)\n", jconf->amnow->analysis.para.cvn ? "on" : "off"); fprintf(fp, " [-vtln alpha lowcut hicut] enable VTLN (1.0 to disable) (%f)\n", jconf->am_root->analysis.para_default.vtln_alpha); diff --git a/libjulius/src/realtime-1stpass.c b/libjulius/src/realtime-1stpass.c index cbb852a0..f856d264 100644 --- a/libjulius/src/realtime-1stpass.c +++ b/libjulius/src/realtime-1stpass.c @@ -270,7 +270,7 @@ RealTimeInit(Recog *recog) } /* MAP-CMN ÍѤνé´ü¥±¥×¥¹¥È¥é¥àÊ¿¶Ñ¤òÆɤ߹þ¤ó¤Ç½é´ü²½¤¹¤ë */ /* Initialize the initial cepstral mean data from file for MAP-CMN */ - if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight); + if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight, mfcc->cmn.map_cmn); /* -cmnload »ØÄê»þ, CMNÍѤΥ±¥×¥¹¥È¥é¥àÊ¿¶Ñ¤Î½é´üÃͤò¥Õ¥¡¥¤¥ë¤«¤éÆɤ߹þ¤à */ /* if "-cmnload", load initial cepstral mean data from file for CMN */ if (mfcc->cmn.load_filename) { diff --git a/libsent/include/sent/mfcc.h b/libsent/include/sent/mfcc.h index aa99dde4..f3fb752b 100644 --- a/libsent/include/sent/mfcc.h +++ b/libsent/include/sent/mfcc.h @@ -198,6 +198,7 @@ typedef struct { CMEAN now; ///< Work area to hold current cepstral mean and variance CMEAN all; ///< Work area to hold all cepstral mean and variance boolean loaded_from_file; ///< TRUE if loaded from file + boolean do_map; ///< TRUE when perform MAP-CMN } CMNWork; /** @@ -263,7 +264,7 @@ void WMP_deltabuf_prepare(DeltaBuf *db); boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc); boolean WMP_deltabuf_flush(DeltaBuf *db); -CMNWork *CMN_realtime_new(Value *para, float weight); +CMNWork *CMN_realtime_new(Value *para, float weight, boolean map); void CMN_realtime_free(CMNWork *c); void CMN_realtime_prepare(CMNWork *c); void CMN_realtime(CMNWork *c, float *mfcc); diff --git a/libsent/src/wav2mfcc/wav2mfcc-pipe.c b/libsent/src/wav2mfcc/wav2mfcc-pipe.c index e91394b3..903c0390 100644 --- a/libsent/src/wav2mfcc/wav2mfcc-pipe.c +++ b/libsent/src/wav2mfcc/wav2mfcc-pipe.c @@ -246,7 +246,7 @@ WMP_deltabuf_flush(DeltaBuf *db) * */ CMNWork * -CMN_realtime_new(Value *para, float weight) +CMN_realtime_new(Value *para, float weight, boolean map) { int i; @@ -277,6 +277,8 @@ CMN_realtime_new(Value *para, float weight) c->loaded_from_file = FALSE; + c->do_map = map; + if (c->var) { for(i = 0; i < c->veclen; i++) c->all.mfcc_var[i] = 0.0; } @@ -347,12 +349,18 @@ CMN_realtime(CMNWork *c, float *mfcc) for(d=0;dveclen;d++) { /* accumulate current MFCC to sum */ c->now.mfcc_sum[d] += mfcc[d]; - /* calculate map-mean */ - x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d]; - y = (double)c->now.framenum + c->cweight; - x /= y; + /* calculate mean */ + if (c->do_map) { + /* map */ + x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d]; + y = (double)c->now.framenum + c->cweight; + x /= y; + } else { + /* static */ + x = c->cmean_init[d]; + } if (c->var) { - /* calculate map-var */ + /* calculate var */ c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x); } if (c->mean && d < c->mfcc_dim) { From c11b8fc8fba2201b532081e320c8963eedb9d771 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Fri, 26 Aug 2016 19:12:56 +0900 Subject: [PATCH 11/20] fix alignment to avoid segfault --- libsent/include/sent/dnn.h | 1 + libsent/src/phmm/calc_dnn.c | 76 +++++++++++-------------------------- 2 files changed, 23 insertions(+), 54 deletions(-) diff --git a/libsent/include/sent/dnn.h b/libsent/include/sent/dnn.h index d3d57bda..ae618b83 100644 --- a/libsent/include/sent/dnn.h +++ b/libsent/include/sent/dnn.h @@ -38,6 +38,7 @@ typedef struct { int hiddennodenum; /* hidden layer node number */ int outputnodenum; /* output layer node number */ + float *invec; float *work[2]; } DNNData; diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index 9cb5d56d..5b21d0b0 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -258,8 +258,18 @@ void dnn_clear(DNNData *dnn) } if (dnn->state_prior) free(dnn->state_prior); for (i = 0; i < dnn->hnum; i++) { - if (dnn->work[i]) free(dnn->work[i]); + if (dnn->work[i]) { +#ifdef AVX + myfree_aligned(dnn->work[i]); +#else + free(dnn->work[i]); +#endif /* AVX */ + } } +#ifdef AVX + if (dnn->invec) myfree_aligned(dnn->invec); +#endif /* AVX */ + memset(dnn, 0, sizeof(DNNData)); } @@ -350,9 +360,15 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int /* allocate work area */ for (i = 0; i < dnn->hnum; i++) { +#ifdef AVX + dnn->work[i] = (float *)mymalloc_aligned(sizeof(float) * dnn->hiddennodenum, 32); +#else dnn->work[i] = (float *)mymalloc(sizeof(float) * dnn->hiddennodenum); +#endif /* AVX */ } - +#ifdef AVX + dnn->invec = (float *)mymalloc_aligned(sizeof(float) * inputnodes, 32); +#endif /* AVX */ return TRUE; } @@ -395,58 +411,6 @@ sub1(float *dst, float *src, float *w, float *b, int out, int in) #endif /* AVX */ } -/* compute outprob by DNN for the current frame and store them to current frame state outprob cache */ -static boolean dnn_calc_outprob0(HMMWork *wrk) -{ - int hidx, i, j, d, n; - float *src, *dst; - DNNLayer *h; - float x; - DNNData *dnn = wrk->OP_dnn; - - /* frame = wrk->OP_time */ - /* param = wrk->OP_param */ - /* input vector = wrk->OP_param[wrk->OP_time][] */ - /* store state outprob to wrk->last_cache[] */ - - /* feed forward through hidden layers by standard logistic function */ - src = &(wrk->OP_param->parvec[wrk->OP_time][0]); - n = 0; - for (hidx = 0; hidx < dnn->hnum; hidx++) { - h = &(dnn->h[hidx]); - dst = dnn->work[n]; - d = 0; - for (i = 0; i < h->out; i++) { - x = 0.0f; - for (j = 0; j < h->in; j++) { - x += h->w[d] * src[j]; - d++; - } - x += h->b[i]; - dst[i] = logistic_func(x); - } - src = dnn->work[n]; - if (++n > 1) n = 0; - } - /* output layer */ - d = 0; - for (i = 0; i < dnn->o.out; i++) { - x = 0.0f; - for (j = 0; j < dnn->o.in; j++) { - x += dnn->o.w[d] * src[j]; - d++; - } - x += dnn->o.b[i]; - wrk->last_cache[i] = x; - } - /* do softmax */ - /* INV_LOG_TEN * (x - addlogarray(x)) - log10(state_prior)) */ - float logprob = addlog_array(wrk->last_cache, wrk->statenum); - for (i = 0; i < wrk->statenum; i++) { - wrk->last_cache[i] = INV_LOG_TEN * (wrk->last_cache[i] - logprob) - dnn->state_prior[i]; - } -} - boolean dnn_calc_outprob(HMMWork *wrk) { int hidx, i, j, d, n; @@ -463,6 +427,10 @@ boolean dnn_calc_outprob(HMMWork *wrk) /* feed forward through hidden layers by standard logistic function */ n = 0; src = &(wrk->OP_param->parvec[wrk->OP_time][0]); +#ifdef AVX + memcpy(dnn->invec, src, sizeof(float) * dnn->inputnodenum); + src = dnn->invec; +#endif /* AVX */ dst = dnn->work[n]; for (hidx = 0; hidx < dnn->hnum; hidx++) { h = &(dnn->h[hidx]); From 2d6e0775fccfaeefff10f0133fda5aae6c86160d Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sat, 27 Aug 2016 14:24:22 +0900 Subject: [PATCH 12/20] fix memory leak --- julius/main.c | 26 -------------------------- libjulius/src/instance.c | 4 ++-- libjulius/src/realtime-1stpass.c | 2 +- libsent/include/sent/dnn.h | 4 ++-- libsent/src/phmm/calc_dnn.c | 2 ++ 5 files changed, 7 insertions(+), 31 deletions(-) diff --git a/julius/main.c b/julius/main.c index 675bea44..598d28d0 100644 --- a/julius/main.c +++ b/julius/main.c @@ -92,32 +92,6 @@ main(int argc, char *argv[]) return -1; } - -#if 0 - { - DNNData *dnn; - char *wfile[5] = { - "/home/ri/dictation-kit/model/dnn/W_l1.npy", - "/home/ri/dictation-kit/model/dnn/W_l2.npy", - "/home/ri/dictation-kit/model/dnn/W_l3.npy", - "/home/ri/dictation-kit/model/dnn/W_l4.npy", - "/home/ri/dictation-kit/model/dnn/W_l5.npy"}; - char *bfile[5] = { - "/home/ri/dictation-kit/model/dnn/bias_l1.npy", - "/home/ri/dictation-kit/model/dnn/bias_l2.npy", - "/home/ri/dictation-kit/model/dnn/bias_l3.npy", - "/home/ri/dictation-kit/model/dnn/bias_l4.npy", - "/home/ri/dictation-kit/model/dnn/bias_l5.npy"}; - - dnn = dnn_new(); - dnn_setup(dnn, 120, 11, 1320, 2004, 2048, 5, wfile, bfile, "/home/ri/dictation-kit/model/dnn/W_output.npy", "/home/ri/dictation-kit/model/dnn/bias_output.npy", "/home/ri/dictation-kit/model/dnn/prior.dnn", 1.0f, 64); - dnn_free(dnn); - exit(1); - } - -#endif - - /* add application options */ record_add_option(); module_add_option(); diff --git a/libjulius/src/instance.c b/libjulius/src/instance.c index a1847c10..8d9f6ca1 100644 --- a/libjulius/src/instance.c +++ b/libjulius/src/instance.c @@ -375,13 +375,13 @@ j_jconf_am_free(JCONF_AM *amconf) if (amconf->dnn.wfile) { for (i = 0; i < amconf->dnn.hiddenlayernum; i++) { - free(amconf->dnn.wfile[i]); + if (amconf->dnn.wfile[i]) free(amconf->dnn.wfile[i]); } free(amconf->dnn.wfile); } if (amconf->dnn.bfile) { for (i = 0; i < amconf->dnn.hiddenlayernum; i++) { - free(amconf->dnn.bfile[i]); + if (amconf->dnn.bfile[i]) free(amconf->dnn.bfile[i]); } free(amconf->dnn.bfile); } diff --git a/libjulius/src/realtime-1stpass.c b/libjulius/src/realtime-1stpass.c index f856d264..dd1deb53 100644 --- a/libjulius/src/realtime-1stpass.c +++ b/libjulius/src/realtime-1stpass.c @@ -173,7 +173,7 @@ init_param(MFCCCalc *mfcc) if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; mfcc->param->header.wshift = para->smp_period * para->frameshift; - mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ + mfcc->param->header.sampsize = para->veclen * mfcc->splice * sizeof(VECT); /* not compressed */ mfcc->param->veclen = para->veclen * mfcc->splice; /* ǧ¼±½èÍýÃæ/½ªÎ»¸å¤Ë¥»¥Ã¥È¤µ¤ì¤ëÊÑ¿ô: diff --git a/libsent/include/sent/dnn.h b/libsent/include/sent/dnn.h index ae618b83..9a41afdb 100644 --- a/libsent/include/sent/dnn.h +++ b/libsent/include/sent/dnn.h @@ -38,8 +38,8 @@ typedef struct { int hiddennodenum; /* hidden layer node number */ int outputnodenum; /* output layer node number */ - float *invec; - float *work[2]; + float *invec; /* input vector holder (32byte aligned) */ + float **work; /* working buffer for ff computation */ } DNNData; diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index 5b21d0b0..c7bfb833 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -266,6 +266,7 @@ void dnn_clear(DNNData *dnn) #endif /* AVX */ } } + free(dnn->work); #ifdef AVX if (dnn->invec) myfree_aligned(dnn->invec); #endif /* AVX */ @@ -359,6 +360,7 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int } /* allocate work area */ + dnn->work = (float **)mymalloc(sizeof(float *) * dnn->hnum); for (i = 0; i < dnn->hnum; i++) { #ifdef AVX dnn->work[i] = (float *)mymalloc_aligned(sizeof(float) * dnn->hiddennodenum, 32); From 14002a9e6fc66c0cee42ea939f6612d1e6a1ac02 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sat, 27 Aug 2016 15:07:23 +0900 Subject: [PATCH 13/20] DNN calculation optimized more --- libsent/src/phmm/calc_dnn.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index c7bfb833..ee661c3e 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -377,7 +377,6 @@ boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int static void sub1(float *dst, float *src, float *w, float *b, int out, int in) { - float x; float *s; int i, j; @@ -387,18 +386,18 @@ sub1(float *dst, float *src, float *w, float *b, int out, int in) int n = in / 8; for (i = 0; i < out; i++) { - x = 0.0f; + __m256 x = _mm256_setzero_ps(); s = src; for (j = 0; j < n; j++) { __m256 v1 = _mm256_load_ps(w); __m256 v2 = _mm256_load_ps(s); - __m256 result = _mm256_dp_ps(v1, v2, 0xff); - _mm256_store_ps(fstore, result); - x += fstore[0] + fstore[4]; + v2 = _mm256_mul_ps(v1, v2); + x = _mm256_add_ps(x, v2); w += 8; s += 8; } - *(dst++) = x + *(b++); + _mm256_store_ps(fstore, x); + *(dst++) = fstore[0] + fstore[1] + fstore[2] + fstore[3] + fstore[4] + fstore[5] + fstore[6] + fstore[7] + *(b++); } myfree_aligned(fstore); #else From 63c5bc5458c325e8beb89b6cb2e14d516ac727d7 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sat, 27 Aug 2016 17:53:19 +0900 Subject: [PATCH 14/20] loose npy header check, added AVX option to MSVC, header fix --- libsent/include/sent/hmm_calc.h | 2 +- libsent/src/phmm/calc_dnn.c | 43 +++++++-------------------------- msvc/libsent/libsent.vcxproj | 6 +++-- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/libsent/include/sent/hmm_calc.h b/libsent/include/sent/hmm_calc.h index 77806505..f2099d80 100644 --- a/libsent/include/sent/hmm_calc.h +++ b/libsent/include/sent/hmm_calc.h @@ -235,7 +235,7 @@ void dnn_clear(DNNData *dnn); void dnn_free(DNNData *dnn); boolean dnn_setup(DNNData *dnn, int veclen, int contextlen, int inputnodes, int outputnodes, int hiddennodes, int hiddenlayernum, char **wfile, char **bfile, char *output_wfile, char *output_bfile, char *priorfile, float prior_factor, int batchsize); -boolean dnn_calc_outprob(HMMWork *wrk); +void dnn_calc_outprob(HMMWork *wrk); #ifdef __cplusplus } diff --git a/libsent/src/phmm/calc_dnn.c b/libsent/src/phmm/calc_dnn.c index ee661c3e..0b0fb773 100644 --- a/libsent/src/phmm/calc_dnn.c +++ b/libsent/src/phmm/calc_dnn.c @@ -31,7 +31,6 @@ static boolean load_npy(float *array, char *filename, int x, int y) char *header; size_t len; boolean fortran_order; - int i, j; if ((fp = fopen_readfile(filename)) == NULL) { jlog("Error: load_npy: unable to open: %s\n", filename); @@ -108,48 +107,25 @@ static boolean load_npy(float *array, char *filename, int x, int y) fortran_order = FALSE; } - /* all arrays are transposed when used in the original python script, so */ - /* we can assume all data in fortran-order, and read them as is */ - if (fortran_order == FALSE) { - jlog("Error: load_npy: data array should be in fortran order: %s\n", filename); - free(header); - fclose_readfile(fp); - return FALSE; - } - char buf[100]; sprintf(buf, "'shape': (%d, %d)", x, y); if (strstr(header, buf) == NULL) { - jlog("Error: load_npy: not a (%d, %d) array? %s\n", x, y, filename); - free(header); - fclose_readfile(fp); - return FALSE; + sprintf(buf, "'shape': (%d, %d)", y, x); + if (strstr(header, buf) == NULL) { + jlog("Error: load_npy: not a (%d, %d) array? %s\n", x, y, filename); + free(header); + fclose_readfile(fp); + return FALSE; + } } free(header); -#if 1 /* just read them in the order */ if ((len = myfread(array, 4, x * y, fp)) < x * y) { jlog("Error: load_npy: failed to read %d bytes: %s\n", x * y, filename); fclose_readfile(fp); return FALSE; } -#else - float *f; - f = (float *)mymalloc(sizeof(float) * y); - for (i = 0; i < x; i++) { - if ((len = myfread(f, 4, y, fp)) < y) { - jlog("Error: load_npy: failed to read %d bytes: %s\n", y * 4, filename); - fclose_readfile(fp); - free(f); - return FALSE; - } - for (j = 0; j < y; j++) { - array[i * y + j] = f[j]; - } - } - free(f); -#endif fclose_readfile(fp); return TRUE; @@ -412,12 +388,11 @@ sub1(float *dst, float *src, float *w, float *b, int out, int in) #endif /* AVX */ } -boolean dnn_calc_outprob(HMMWork *wrk) +void dnn_calc_outprob(HMMWork *wrk) { - int hidx, i, j, d, n; + int hidx, i, n; float *src, *dst; DNNLayer *h; - float x; DNNData *dnn = wrk->OP_dnn; /* frame = wrk->OP_time */ diff --git a/msvc/libsent/libsent.vcxproj b/msvc/libsent/libsent.vcxproj index e07f5c47..872f52fd 100644 --- a/msvc/libsent/libsent.vcxproj +++ b/msvc/libsent/libsent.vcxproj @@ -1,4 +1,4 @@ - + @@ -61,6 +61,7 @@ EditAndContinue CompileAsC 4819;%(DisableSpecificWarnings) + AdvancedVectorExtensions %(AdditionalDependencies) @@ -80,6 +81,7 @@ ProgramDatabase CompileAsC 4819;%(DisableSpecificWarnings) + AdvancedVectorExtensions %(AdditionalDependencies) @@ -210,4 +212,4 @@ - + \ No newline at end of file From ef11e90a2bf59b0d3ffc5cea7ff5f155a8ca5815 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sat, 27 Aug 2016 17:56:57 +0900 Subject: [PATCH 15/20] Fix not check HMM param header in DNN --- libjulius/src/m_fusion.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libjulius/src/m_fusion.c b/libjulius/src/m_fusion.c index 97f46d8f..761b9b4e 100644 --- a/libjulius/src/m_fusion.c +++ b/libjulius/src/m_fusion.c @@ -113,22 +113,22 @@ initialize_HMM(JCONF_AM *amconf, Jconf *jconf) /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */ /* check parameter type of this acoustic HMM */ if (jconf->input.type == INPUT_WAVEFORM) { - /* Decode parameter extraction type according to the training - parameter type in the header of the given acoustic HMM */ - switch(hmminfo->opt.param_type & F_BASEMASK) { - case F_MFCC: - case F_FBANK: - case F_MELSPEC: - break; - default: - jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC ior filterbank is supported\n"); - hmminfo_free(hmminfo); - return NULL; - } if (amconf->dnn.enabled) { /* for DNN, use dnnconf */ calc_para_from_header(&(amconf->analysis.para), amconf->dnn.paramtype, amconf->dnn.veclen); } else { + /* Decode parameter extraction type according to the training + parameter type in the header of the given acoustic HMM */ + switch(hmminfo->opt.param_type & F_BASEMASK) { + case F_MFCC: + case F_FBANK: + case F_MELSPEC: + break; + default: + jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC ior filterbank is supported\n"); + hmminfo_free(hmminfo); + return NULL; + } /* set acoustic analysis parameters from HMM header */ calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); } From a1921c5c5d7670e91727dccc6c1da325341bd6cb Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sun, 28 Aug 2016 00:19:07 +0900 Subject: [PATCH 16/20] support for file input (non realtime) --- libjulius/src/wav2mfcc.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/libjulius/src/wav2mfcc.c b/libjulius/src/wav2mfcc.c index 08b2e5ec..7bc7dac9 100644 --- a/libjulius/src/wav2mfcc.c +++ b/libjulius/src/wav2mfcc.c @@ -88,6 +88,8 @@ wav2mfcc(SP16 speech[], int speechlen, Recog *recog) int len; Value *para; MFCCCalc *mfcc; + int veclen; + int t, i; /* calculate frame length from speech length, frame size and frame shift */ framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1; @@ -125,10 +127,16 @@ wav2mfcc(SP16 speech[], int speechlen, Recog *recog) for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { para = mfcc->para; + veclen = para->veclen * mfcc->splice; + + if (framenum - (mfcc->splice - 1) < 1) { + jlog("WARNING: input too short (%d samples), ignored\n", speechlen); + return FALSE; + } /* malloc new param */ param_init_content(mfcc->param); - if (param_alloc(mfcc->param, framenum, para->veclen) == FALSE) { + if (param_alloc(mfcc->param, framenum, veclen) == FALSE) { jlog("ERROR: failed to allocate memory for converted parameter vectors\n"); return FALSE; } @@ -151,19 +159,28 @@ wav2mfcc(SP16 speech[], int speechlen, Recog *recog) return FALSE; } + /* splicing */ + if (mfcc->splice > 1) { + for (t = 0; t < framenum - (mfcc->splice - 1); t++) { + for (i = 1; i < mfcc->splice; i++) { + memcpy(&(mfcc->param->parvec[t][para->veclen * i]), &(mfcc->param->parvec[t + i][0]), sizeof(VECT) * para->veclen); + } + } + } + /* set miscellaneous parameters */ - mfcc->param->header.samplenum = framenum; + mfcc->param->header.samplenum = framenum - (mfcc->splice - 1); mfcc->param->header.wshift = para->smp_period * para->frameshift; - mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ - mfcc->param->header.samptype = F_MFCC; + mfcc->param->header.sampsize = veclen * sizeof(VECT); /* not compressed */ + mfcc->param->header.samptype = para->basetype; if (para->delta) mfcc->param->header.samptype |= F_DELTA; if (para->acc) mfcc->param->header.samptype |= F_ACCL; if (para->energy) mfcc->param->header.samptype |= F_ENERGY; if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; - mfcc->param->veclen = para->veclen; - mfcc->param->samplenum = framenum; + mfcc->param->veclen = veclen; + mfcc->param->samplenum = framenum - (mfcc->splice - 1); if (mfcc->frontend.sscalc) { free(mfcc->frontend.ssbuf); From f4dd68cbea24d1ed654038ba2bc2f1cbf480aa63 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sun, 28 Aug 2016 00:53:17 +0900 Subject: [PATCH 17/20] fix dnnconf parsing, added sample dnnconf --- Sample.dnnconf | 69 +++++++++++++++++++++++++++++++++++++++++ libjulius/src/m_jconf.c | 49 ++++++++++++++++------------- 2 files changed, 96 insertions(+), 22 deletions(-) create mode 100644 Sample.dnnconf diff --git a/Sample.dnnconf b/Sample.dnnconf new file mode 100644 index 00000000..21f48612 --- /dev/null +++ b/Sample.dnnconf @@ -0,0 +1,69 @@ +#### +#### Sample DNN Configuration for DNN-HMM Decoding (-dnnconf) +#### + +#### +#### Feature Extraction +#### + +# feature type, in HTK parameter specification format +feature_type FBANK_D_A_Z + +# julius options to specify the acoustic parameter configuration +# it will be passed to Julius to set feature extractor +# +# This example indicates the typical feature extraction for DNN: +# 1. feature extraction parameter will be loaded from HTK config file +# 2. use CMN/CVN +# 3. load ceptral mean and variance from the specified file +# 4. use the values as static, not update while processing +# +feature_options -htkconf model/dnn/config.lmfb.40ch.jnas -cvn -cmnload model/dnn/norm.jnas -cmnstatic + +# feature vector length (including delta or accel, before splicing) +feature_len 120 + +# splicing length +context_len 11 + +#### +#### NN Definition +#### + +# number of input nodes (should be equal to (feature_len * context_len)) +input_nodes 1320 + +# number of output nodes (num and order should correspond to HMM definition) +output_nodes 2004 + +# number of nodes in hidden layers +hidden_nodes 2048 + +# number of hidden layers (layers excluding input and output) +hidden_layers 5 + +# weights W and biases b for hidden layers, in numpy np.save() format +# dtype of these file should be 'dnn.paramtype = param_str2code(v); - } else if (strmatch(buf, "feature_options")) { + } else if (strmatch(pp, "feature_options")) { am->dnn.optionstring = strdup(v); - } else if (strmatch(buf, "feature_len")) am->dnn.veclen = atoi(v); - else if (strmatch(buf, "context_len")) am->dnn.contextlen = atoi(v); - else if (strmatch(buf, "input_nodes")) am->dnn.inputnodes = atoi(v); - else if (strmatch(buf, "output_nodes")) am->dnn.outputnodes = atoi(v); - else if (strmatch(buf, "hidden_nodes")) am->dnn.hiddennodes = atoi(v); - else if (strmatch(buf, "hidden_layers")) { + } else if (strmatch(pp, "feature_len")) am->dnn.veclen = atoi(v); + else if (strmatch(pp, "context_len")) am->dnn.contextlen = atoi(v); + else if (strmatch(pp, "input_nodes")) am->dnn.inputnodes = atoi(v); + else if (strmatch(pp, "output_nodes")) am->dnn.outputnodes = atoi(v); + else if (strmatch(pp, "hidden_nodes")) am->dnn.hiddennodes = atoi(v); + else if (strmatch(pp, "hidden_layers")) { am->dnn.hiddenlayernum = atoi(v); am->dnn.wfile = (char **)mymalloc(sizeof(char *) * am->dnn.hiddenlayernum); am->dnn.bfile = (char **)mymalloc(sizeof(char *) * am->dnn.hiddenlayernum); @@ -616,8 +621,8 @@ dnn_config_file_parse(char *filename, JCONF_AM *am) am->dnn.wfile[i] = NULL; am->dnn.bfile[i] = NULL; } - } else if (buf[0] == 'W') { - n = atoi(&(buf[1])); + } else if (pp[0] == 'W') { + n = atoi(&(pp[1])); if (n > am->dnn.hiddenlayernum) { jlog("ERROR: dnn_config_file_parse: W%d > # of hidden_layers (%d)\n", n, am->dnn.hiddenlayernum); fclose(fp); @@ -628,8 +633,8 @@ dnn_config_file_parse(char *filename, JCONF_AM *am) return FALSE; } am->dnn.wfile[n-1] = strdup(v); - } else if (buf[0] == 'B') { - n = atoi(&(buf[1])); + } else if (pp[0] == 'B') { + n = atoi(&(pp[1])); if (n > am->dnn.hiddenlayernum) { jlog("ERROR: dnn_config_file_parse: B%d > # of hidden_layers (%d)\n", n, am->dnn.hiddenlayernum); fclose(fp); @@ -640,13 +645,13 @@ dnn_config_file_parse(char *filename, JCONF_AM *am) return FALSE; } am->dnn.bfile[n-1] = strdup(v); - } else if (strmatch(buf, "output_W")) am->dnn.output_wfile = strdup(v); - else if (strmatch(buf, "output_B")) am->dnn.output_bfile = strdup(v); - else if (strmatch(buf, "state_prior")) am->dnn.priorfile = strdup(v); - else if (strmatch(buf, "state_prior_factor")) am->dnn.prior_factor = atof(v); - else if (strmatch(buf, "batch_size")) am->dnn.batchsize = atoi(v); + } else if (strmatch(pp, "output_W")) am->dnn.output_wfile = strdup(v); + else if (strmatch(pp, "output_B")) am->dnn.output_bfile = strdup(v); + else if (strmatch(pp, "state_prior")) am->dnn.priorfile = strdup(v); + else if (strmatch(pp, "state_prior_factor")) am->dnn.prior_factor = atof(v); + else if (strmatch(pp, "batch_size")) am->dnn.batchsize = atoi(v); else { - jlog("ERROR: dnn_config_file_parse: unknown spec: %s %s\n", buf, v); + jlog("ERROR: dnn_config_file_parse: unknown spec: %s %s\n", pp, v); fclose(fp); return FALSE; } From 8cf2da90ff18e2e827900cb73371debff279d8de Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sun, 28 Aug 2016 01:08:18 +0900 Subject: [PATCH 18/20] added new options --- Sample.jconf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sample.jconf b/Sample.jconf index aae141b2..4089ab8b 100644 --- a/Sample.jconf +++ b/Sample.jconf @@ -306,6 +306,7 @@ #-cmnnoupdate # keep initial mean, disable "-cmnupdate" #-cmnmapweight 100.0 # weight for MAP-CMN #-cvn # enable variance normalization +#-cmnstatic # totally static cmn/cvn ## Vocal tract length normalization (VTLN) #-vtln 1.0 300 4800 # enable VTLN (alpha, lowerfreq, upperfreq) @@ -317,6 +318,9 @@ #-ssalpha 2.0 # alpha coef. for spectral subtraction #-ssfloor 0.5 # spectral floor coef. +## DNN-HMM definition (default disabled (= GMM-HMM)) +#-dnnconf file # DNN configuration file + ## Others #-htkconf configfile # load analysis settings from HTK Config file From 540861d73fc6fc51f8578e28408a285d3e9cea9a Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Sun, 28 Aug 2016 01:08:37 +0900 Subject: [PATCH 19/20] more descriptions --- Sample.dnnconf | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Sample.dnnconf b/Sample.dnnconf index 21f48612..0e53c50d 100644 --- a/Sample.dnnconf +++ b/Sample.dnnconf @@ -9,14 +9,19 @@ # feature type, in HTK parameter specification format feature_type FBANK_D_A_Z -# julius options to specify the acoustic parameter configuration -# it will be passed to Julius to set feature extractor +# julius options to configure the acoustic parameter extraction. # -# This example indicates the typical feature extraction for DNN: -# 1. feature extraction parameter will be loaded from HTK config file -# 2. use CMN/CVN -# 3. load ceptral mean and variance from the specified file -# 4. use the values as static, not update while processing +# The example below indicates that: +# 1. parameters should be loaded from an HTK config file, +# 2. use CMN/CVN, +# 3. load ceptral mean and variance from the specified file, +# 4. keep the cepstral mean/variance static, not update while processing +# +# the specified string will be expanded inline at the point where this +# dnnconf file is specified by "-dnnconf", and passed to Julius. +# As the same as other options in Julius, the later option will override +# former. Please check the start-up messages to check if the +# feature extraction are correctly set up. # feature_options -htkconf model/dnn/config.lmfb.40ch.jnas -cvn -cmnload model/dnn/norm.jnas -cmnstatic From ab0d9b30f0805318249974fb5256adbc4b7ba274 Mon Sep 17 00:00:00 2001 From: Akinobu Lee Date: Tue, 30 Aug 2016 23:29:42 +0900 Subject: [PATCH 20/20] update text --- 00readme-DNN.txt | 75 ++++++++++++++++++++++++++++++++++++------------ 00readme-ja.txt | 35 +++++++++++----------- 00readme.txt | 18 ++++++------ LICENSE.txt | 16 +++++------ README.md | 8 +++--- Release-ja.txt | 3 +- Release.txt | 3 +- 7 files changed, 101 insertions(+), 57 deletions(-) diff --git a/00readme-DNN.txt b/00readme-DNN.txt index 58457aeb..0a4a29c6 100644 --- a/00readme-DNN.txt +++ b/00readme-DNN.txt @@ -1,27 +1,64 @@ - +f Julius for DNN-based speech recognition + (revised 2016/08/30) (updated 2013/09/29) -A. How it works -================ +A. Julius and DNN-HMM +====================== + +From 4.4, Julius can perform DNN-HMM based recognition in two ways: + + 1. standalone: directly compute DNN for HMM inside Julius (>= 4.4) + + 2. network: receive state probabilities calculated by other process + via socket (<= 4.3.1) + +Both are described below. + + A.1. Standalone mode + ===================== + +From version 4.4, Julius is capable of performing DNN-HMM based +recognition by itself. It can read a DNN definition along with a HMM, +and can compute the network against input (spliced) feature vectors +and output the node scores of output layer for each frame, which will +be used as output probabilities of corresponding HMM states in the +HMM. All computation will be done in a single process. + +Note that the current implementation is very simple and limited. Only +basic functions are implemented for NN. Any number of hidden layers +can be defined, but the number of the nodes in the hidden layers +should be the same. No batch computation is performed: all +frame-wise. SIMD instruction (Intel AVX) is used to speed up the +computation. Only tested on Windows and Ubuntu on Intel PC. +See "libsent/src/phmm/calc_dnn.c" for the actual implementation. + +To run, you need + + 1) an HMM AM (GMM defs are ignored, only its structure is used) + 2) a DNN definition that corresponds to 1) + 3) ".dnnconf" configuration file (text) + +The .dnnconf file specifies the parameters, options, DNN definition +files, and other parameters all relating to DNN computation. A sample +file is located in the top directory of Julius archive as +"Sample.dnnconf". + +The the matrix/vector definitions should be given in ".npy" format +(i. e. python's "NumPy.save" format). Only 32bit-float little endian +datatype is acceptable. + +To perpare a model for DNN-HMM, note that the orders are important. +The order of the output nodes in the DNN should be the order of HMM +state definition id. If not, Julius won't work properly. -This version of Julius can perform, Julius can perform DNN-HMM based -recognition by receiving pre-computed state probabilities. In that mode, -Julius does not read any feature parameter vectors and compute the state -output probability of an HMM state in it, but just read the "output -probabilities vectors" of the HMM states already computed in other tools, -via socket or file. -The "output probabilities input" is called "outprob vector" in Julius, -which contains a sequence of vectors, each of them consists of -pre-computed state probabilities a vector of state-num-of-HMM dimension. + A.2. Modular mode + ===================== -The most important thing to know before using this scheme is that, -each dimension in the input outprob vector and each state in the HMM in -Julius should corresponds. In other words, the index of HMM states -and outprob vector should match. The details are described in the -following section. +Julius still has capability of receiving state output probability +vector from other process. This is an older way before 4.4. To run, you need @@ -85,8 +122,8 @@ perform DNN-based recognition, please re-convert from ASCII hmmdefs with the newest version of mkbinhmm. -D. Making outprob vector -========================== +D. Making outprob vector for Modular mode +========================================== D.1. Format of outprob vector file =================================== diff --git a/00readme-ja.txt b/00readme-ja.txt index 1eea3ca0..2524f723 100644 --- a/00readme-ja.txt +++ b/00readme-ja.txt @@ -4,7 +4,7 @@ Julius - (Rev 4.4 2016/08/20) + (Rev 4.4 2016/08/30) (Rev 4.3.1 2014/01/15) (Rev 4.3 2013/12/25) (Rev 4.2.3 2013/06/30) @@ -41,8 +41,8 @@ Julius GitHub ‚ւ̈Ús‚ɂ‚¢‚Ä ======================== -Julius‚̓o[ƒWƒ‡ƒ“4.3.1‚æ‚è GitHub ‚ÖˆÚs‚µ‚Ü‚µ‚½D -¡ŒãCÅV‚̃\[ƒXƒR[ƒhEŠeŽíŽÀsƒLƒbƒgEŠJ”­î•ñ‚ÌŒöŠJE‹¤—L‚¨‚æ‚Ñ +Julius‚Í2016”N‚æ‚è GitHub ‚ÖˆÚs‚µ‚Ü‚µ‚½D +ÅV‚̃\[ƒXƒR[ƒhEŠeŽíŽÀsƒLƒbƒgEŠJ”­î•ñ‚ÌŒöŠJE‹¤—L‚¨‚æ‚Ñ ŠJ”­ŽÒŒü‚¯‚̃tƒH[ƒ‰ƒ€‰^‰c‚Í GitHub ‚É‚Äs‚Á‚Ä‚¢‚«‚Ü‚·D Julius on GitHub @@ -58,10 +58,15 @@ Julius Julius-4.4 =========== -ƒo[ƒWƒ‡ƒ“ 4.4 ‚Å‚Í‚¢‚­‚‚©‚̃Aƒbƒvƒf[ƒg‚ÆV‹@”\‚ª’ljÁ‚³‚ê‚Ü‚µ‚½D -Vƒc[ƒ‹‚Æ‚µ‚Ä "adintool-gui" ‚Æ "binlm2arpa" ‚ª’ljÁ‚³‚ê‚Ü‚µ‚½D -‚Ü‚½C"mkbingram" ‚Å‚Í•¶ŽšƒR[ƒh•ÏŠ·o—Í‚ªs‚¦‚Ü‚·D -ƒ‚ƒWƒ…[ƒ‹ƒ‚[ƒh‚ł̓Nƒ‰ƒCƒAƒ“ƒgØ’fŽž‚É—Ž‚¿‚¸‚ÉŽŸ‚̃Nƒ‰ƒCƒAƒ“ƒgÚ‘±‚ð +ƒo[ƒWƒ‡ƒ“ 4.4 ‚Å‚Í DNN-HMM Žd—lŽž‚Ì DNN ŒvŽZ‚ð‘g‚Ýž‚ÝA’P‘Ì‚Å +DNN-HMM‚ð—p‚¢‚½ƒIƒ“ƒ‰ƒCƒ“‰¹º”FŽ¯‚ªs‚¦‚é‚悤‚É‚È‚è‚Ü‚µ‚½B +Ú×‚Í 00readme-DNN.txt ‚ð‚²——‚­‚¾‚³‚¢B + + +Vƒc[ƒ‹‚Æ‚µ‚Ä adintool ‚Ì GUI ƒo[ƒWƒ‡ƒ“‚Å‚ ‚é "adintool-gui" ‚Æ +ƒoƒCƒiƒŠN-gram‚ð ARPA Œ`Ž®‚É‹t•ÏŠ·‚·‚é "binlm2arpa" ‚ª’ljÁ‚³‚ê‚Ü‚µ‚½D +‚Ü‚½C"mkbingram" ‚ŃoƒCƒiƒŠN-gram‚ð’¼Ú•¶ŽšƒR[ƒh•ÏŠ·‚Å‚«‚Ü‚·B +ƒ‚ƒWƒ…[ƒ‹ƒ‚[ƒh‚ŃNƒ‰ƒCƒAƒ“ƒgØ’fŽž‚É—Ž‚¿‚¸‚ÉŽŸ‚̃Nƒ‰ƒCƒAƒ“ƒgÚ‘±‚ð ‘҂‚悤‚É‚È‚è‚Ü‚µ‚½D‚Ü‚½C‚¢‚­‚‚©‚̃oƒO‚ªC³‚³‚êCÅ‹ß‚ÌOS‚Å‚Ì ƒRƒ“ƒpƒCƒ‹ƒGƒ‰[‚ðC³‚µ‚Ü‚µ‚½D @@ -81,6 +86,7 @@ Julius-4.4 configure configureƒXƒNƒŠƒvƒg configure.in Sample.jconf jconf Ý’èƒtƒ@ƒCƒ‹ƒTƒ“ƒvƒ‹ + Sample.dnnconf DNN Ý’èƒtƒ@ƒCƒ‹‚̃Tƒ“ƒvƒ‹ julius/ Julius ƒ\[ƒX o libjulius/ JuliusLib ƒRƒAƒGƒ“ƒWƒ“ƒ‰ƒCƒuƒ‰ƒŠ ƒ\[ƒX libsent/ JuliusLib ”Ä—pƒ‰ƒCƒuƒ‰ƒŠ ƒ\[ƒX @@ -114,14 +120,7 @@ jconf Žg—p•û–@CŠe‹@”\‚ÌЉîC§ŒÀŽ–€“™‚ÌŽ‘—¿‚ª‚ ‚è‚Ü‚·‚Ì‚ÅC‚»‚¿‚ç‚ðŒä——‰º ‚³‚¢D - ƒz[ƒ€ƒy[ƒWFhttp://julius.sourceforge.jp/ - -‚Ü‚½Cã‹Lƒz[ƒ€ƒy[ƒW‚É‚¨‚¢‚ÄCJulius‚ð—p‚¢‚½Œ¤‹†‚âƒAƒvƒŠƒP[ƒVƒ‡ƒ“ŠJ -”­‚ÉŠÖ‚·‚éî•ñŒðŠ·‚ðs‚¤‚½‚ß‚ÌuŠJ”­ŽÒƒtƒH[ƒ‰ƒ€v‚ðÝ’u‚µ‚Ä‚¨‚è‚Ü‚·D -ÅV‚Ì Julius ‚Ì CVS XVî•ñ‚È‚Ç‚à“Še‚³‚ê‚Ü‚·D -‚Ç‚¤‚¼ƒAƒNƒZƒX‚­‚¾‚³‚¢D - - Julius Forum: http://julius.sourceforge.jp/forum/ + ƒz[ƒ€ƒy[ƒWFhttp://julius.osdn/ ƒ‰ƒCƒZƒ“ƒX @@ -143,7 +142,11 @@ Julius ˜A—æ =========== -Julius ‚ÉŠÖ‚·‚邲Ž¿–âE‚¨–â‚¢‡‚킹‚ÍCGitHub ‚ ‚é‚¢‚Í +Julius ŠJ”­‚ÉŠÖ‚·‚邲Ž¿–âE‚¨–â‚¢‡‚킹‚Í GitHub ‚ų‚Á‚Ä‚¨‚è‚Ü‚·B + + Julius on GitHub + https://github.com/julius-speech/julius + ‚ ‚é‚¢‚͉º‹L‚̃[ƒ‹ƒAƒhƒŒƒX‚Ü‚Å‚¨–â‚¢‡‚킹‰º‚³‚¢ ('at' ‚ð '@' ‚É“Ç‚Ý‘Ö‚¦‚Ä‚­‚¾‚³‚¢) diff --git a/00readme.txt b/00readme.txt index e05ffa7e..40316b1d 100644 --- a/00readme.txt +++ b/00readme.txt @@ -4,7 +4,7 @@ Julius - (Rev 4.4 2016/08/20) + (Rev 4.4 2016/08/30) (Rev 4.3.1 2014/01/15) (Rev 4.3 2013/12/25) (Rev 4.2.3 2013/06/30) @@ -58,13 +58,14 @@ What's new in Julius-4.4 Julius is now hosted on GitHub: https://github.com/julius-speech/julius -Version 4.4 includes several updates and new features. Two new -tools "adintool-gui" and "binlm2arpa" are added and "mkbingram" was -updated for audio input and binary LM conversion. Now does not exit -on client disconnection on module mode, instead it pauses itself and -wait for another client to come. It also has many bug fixes and -updates for recent OS and environments. Some documents that may help -users using Julius with DNN-HMM is also added. +Version 4.4 now supports stand-alone DNN-HMM support. (see 00readme-DNN.txt) +Other features include: +- New tools: + - adintool-gui: GUI version of adintool + - binlm2arpa: reverse convert binary N-gram to ARPA format +- "mkbingram" now support direct charset conversion of binary LM +- Now does not exit at connection lost in module mode +- Bug fixes See "Release.txt" for full list of updates. Run "configure --help=recursive" to see all configure options. @@ -84,6 +85,7 @@ Contents of Julius-4.4 configure configure script configure.in Sample.jconf Sample configuration file + Sample.dnnconf Sample DNN configuration file julius/ Julius sources libjulius/ JuliusLib core engine library sources libsent/ JuliusLib low-level library sources diff --git a/LICENSE.txt b/LICENSE.txt index 97ee38e8..4f0eab20 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -3,10 +3,10 @@ u‘åŒêœb˜A‘±‰¹º”FŽ¯ƒGƒ“ƒWƒ“ Juliusv —˜—p‹–‘ø‘ - Copyright (c) 1991-2013 ‹ž“s‘åŠw ‰ÍŒ´Œ¤‹†Žº + Copyright (c) 1991-2063 ‹ž“s‘åŠw ‰ÍŒ´Œ¤‹†Žº Copyright (c) 1997-2000 î•ñˆ—U‹»Ž–‹Æ‹¦‰ï(IPA) Copyright (c) 2000-2005 “Þ—Çæ’[‰ÈŠw‹Zp‘åŠw‰@‘åŠw Ž­–쌤‹†Žº - Copyright (c) 2005-2013 –¼ŒÃ‰®H‹Æ‘åŠw JuliusŠJ”­ƒ`[ƒ€ + Copyright (c) 2005-2016 –¼ŒÃ‰®H‹Æ‘åŠw JuliusŠJ”­ƒ`[ƒ€ ---------------------------------------------------------------------------- @@ -40,10 +40,10 @@ Julius ‚È‚­‚»‚Ì‚Ü‚Ü•\Ž¦‚µ“Y•t‚µ‚È‚¯‚ê‚΂Ȃè‚Ü‚¹‚ñB ‹L - Copyright (c) 1991-2013 ‹ž“s‘åŠw ‰ÍŒ´Œ¤‹†Žº + Copyright (c) 1991-2016 ‹ž“s‘åŠw ‰ÍŒ´Œ¤‹†Žº Copyright (c) 1997-2000 î•ñˆ—U‹»Ž–‹Æ‹¦‰ï(IPA) Copyright (c) 2000-2005 “Þ—Çæ’[‰ÈŠw‹Zp‘åŠw‰@‘åŠw Ž­–쌤‹†Žº - Copyright (c) 2005-2013 –¼ŒÃ‰®H‹Æ‘åŠw JuliusŠJ”­ƒ`[ƒ€ + Copyright (c) 2005-2016 –¼ŒÃ‰®H‹Æ‘åŠw JuliusŠJ”­ƒ`[ƒ€ 3. –{ƒ\ƒtƒgƒEƒFƒA‚ð—˜—p‚µ‚Ä“¾‚ç‚ꂽ’mŒ©‚ÉŠÖ‚µ‚Ä”­•\‚ðs‚È‚¤Û‚É‚ÍA u‘åŒêœb˜A‘±‰¹º”FŽ¯ƒGƒ“ƒWƒ“ Juliusv‚ð—˜—p‚µ‚½‚±‚Ƃ𖾋L‚µ‚ĉº‚³‚¢B @@ -79,10 +79,10 @@ Julius Large Vocabulary Continuous Speech Recognition Engine Julius - Copyright (c) 1991-2013 Kawahara Lab., Kyoto University + Copyright (c) 1991-2016 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - Copyright (c) 2005-2013 Julius project team, Nagoya Institute of Technology + Copyright (c) 2005-2016 Julius project team, Nagoya Institute of Technology "Large Vocabulary Continuous Speech Recognition Engine Julius", including Julian, is being developed at Kawahara Lab., Kyoto @@ -129,10 +129,10 @@ whatsoever. Form of copyright notice: - Copyright (c) 1991-2013 Kawahara Lab., Kyoto University + Copyright (c) 1991-2016 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - Copyright (c) 2005-2013 Julius project team, Nagoya Institute of Technology + Copyright (c) 2005-2016 Julius project team, Nagoya Institute of Technology 3. When you publish or present any results by using the Software, you must explicitly mention your use of "Large Vocabulary Continuous diff --git a/README.md b/README.md index d3a7cb17..de66bec7 100644 --- a/README.md +++ b/README.md @@ -49,12 +49,12 @@ The main developer / maintainer is Akinobu Lee (ri@nitech.ac.jp). # Download Julius -The latest release version is [4.3.1](https://github.com/julius-speech/julius/releases), released on January 15, 2014. +The latest release version is [4.4](https://github.com/julius-speech/julius/releases), released on August 30, 2016. You can get the released package from the [Release page](https://github.com/julius-speech/julius/releases). -Version 4.3.1 is a bug fix release. Several bugs has been fixed. -See the "Release.txt" file for the full list of updates. -Run with "-help" to see full list of options. +Version 4.4 supports stand-alone DNN-HMM support, and several new +tools and bug fixes are included. See the "Release.txt" file for the +full list of updates. Run with "-help" to see full list of options. # Toolkit and Assets diff --git a/Release-ja.txt b/Release-ja.txt index 29af672c..dc10710d 100644 --- a/Release-ja.txt +++ b/Release-ja.txt @@ -1,5 +1,6 @@ -4.4 (2016.08.20) +4.4 (2016.08.30) ================= +- DNN-HMM‚ÌŒvŽZ‚ðƒTƒ|[ƒg - "adintool-gui": ‰¹º“ü—̓‚ƒjƒ^GUI•t‚« adintool (adintool/README-GUI.txtŽQÆ) - "binlm2arpa": ƒoƒCƒiƒŠŒ¾Œêƒ‚ƒfƒ‹‚ðARPA‚É•ÏŠ·‚·‚é - "mkbingram" ‚ÉŒ¾Œêƒ‚ƒfƒ‹‚Ì•¶ŽšƒR[ƒh‚ð•ÏŠ·‚µ‚Äo—Í‚·‚éƒIƒvƒVƒ‡ƒ“ "-c" ‚ð’ljÁ diff --git a/Release.txt b/Release.txt index 549a04f8..3c5a2acb 100644 --- a/Release.txt +++ b/Release.txt @@ -1,5 +1,6 @@ -4.4 (2016.08.20) +4.4 (2016.08.30) ================= +- DNN-HMM computation support - "adintool-gui": adintool with input monitoring (see adintool/README-GUI.txt) - "binlm2arpa": convert binary LM to ARPA format - "mkbingram" now can convert text encoding of an LM by "-c" option