Skip to content

Commit

Permalink
add -bi-weight option
Browse files Browse the repository at this point in the history
  • Loading branch information
lmthang committed Mar 5, 2015
1 parent 898f71b commit fd43852
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 38 deletions.
2 changes: 1 addition & 1 deletion demo-bi-sg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if [ ! -d "output" ]; then
mkdir output
fi

command="./text2vec -src-train data/data.10k.de -src-lang de -tgt-train data/data.10k.en -tgt-lang en -align data/data.10k.align -output vectors.bin -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-2 -tgt-sample 1e-3 -threads 1 -binary 0 -eval 1 -iter 3"
command="./text2vec -src-train data/data.10k.de -src-lang de -tgt-train data/data.10k.en -tgt-lang en -align data/data.10k.align -output vectors.bin -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-2 -tgt-sample 1e-3 -threads 1 -binary 0 -eval 1 -iter 3 -bi-weight 2.0"
echo "time $command"
time $command

14 changes: 10 additions & 4 deletions run_bi.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh

if [[ $# -lt 8 || $# -gt 12 ]]; then
echo "`basename $0` remake outputDir trainPrefix dim useAlign numIters numThreads neg [isCbow alpha sample tgt_sample]" # [srcMonoFile tgtMonoFile monoSize anneal monoThread]
if [[ $# -lt 8 || $# -gt 13 ]]; then
echo "`basename $0` remake outputDir trainPrefix dim useAlign numIters numThreads neg [isCbow alpha sample tgt_sample bi_weight]" # [srcMonoFile tgtMonoFile monoSize anneal monoThread]
echo "neg=0: use hierarchical softmax"
exit
fi
Expand Down Expand Up @@ -30,9 +30,14 @@ tgt_sample="1e-5"
if [ $# -ge 12 ]; then
tgt_sample=${12}
fi
bi_weight="1"
if [ $# -ge 13 ]; then
bi_weight=${13}
fi

sampleStr="-sample $src_sample -tgt-sample $tgt_sample"
fimonoStr=""
otherOpts=""
monoStr=""
otherOpts="-bi-weight $bi_weight"
#if [ $# -eq 13 ]; then # mono
# monoStr="-src-train-mono ${9} -tgt-train-mono ${10} -mono-size ${11} -anneal ${12} -mono-thread ${13}"
# monoLambda=1
Expand All @@ -52,6 +57,7 @@ echo "# isCbow=$isCbow"
echo "# alphaStr=$alphaStr"
echo "# sampleStr=$sampleStr"
echo "# monoStr=$monoStr"
echo "# otherOpts=$otherOpts"
echo "# name=$name"

if [ $remake -eq 1 ]
Expand Down
83 changes: 50 additions & 33 deletions text2vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ int use_align = 0;
long long align_num_lines;
long long *align_line_blocks;

real bi_weight = 1.0; // how much we weight the cross-lingual prediction
real bi_alpha; // = alpha * weight;

// print stat of a real array
void print_real_array(real* a_syn, long long num_elements, char* name){
float min = 1000000;
Expand Down Expand Up @@ -646,7 +649,7 @@ void ProcessCbow(int sentence_position, int sentence_length, long long *sen, int
// syn1neg, table, vocab_size corresponds to the output side.
// neu1e: hidden vector error
void ProcessSkipPair(long long last_word, long long word, unsigned long long *next_random,
struct train_params *in_params, struct train_params *out_params, real *neu1e) { // , real* syn0, real* syn1, real* syn1neg
struct train_params *in_params, struct train_params *out_params, real *neu1e, real skip_alpha) {
long long d;
long long l1, l2, c, target, label;
real f, g;
Expand All @@ -671,7 +674,7 @@ void ProcessSkipPair(long long last_word, long long word, unsigned long long *ne
else if (f >= MAX_EXP) continue;
else f = expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
// 'g' is the gradient multiplied by the learning rate
g = (1 - out_params->vocab[word].code[d] - f) * alpha;
g = (1 - out_params->vocab[word].code[d] - f) * skip_alpha;
// Propagate errors output -> hidden
for (c = 0; c < layer1_size; c++) neu1e[c] += g * out_params->syn1[c + l2];
// Learn weights hidden -> output
Expand All @@ -692,9 +695,9 @@ void ProcessSkipPair(long long last_word, long long word, unsigned long long *ne
l2 = target * layer1_size;
f = 0;
for (c = 0; c < layer1_size; c++) f += in_params->syn0[c + l1] * out_params->syn1neg[c + l2];
if (f > MAX_EXP) g = (label - 1) * alpha;
else if (f < -MAX_EXP) g = (label - 0) * alpha;
else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * alpha;
if (f > MAX_EXP) g = (label - 1) * skip_alpha;
else if (f < -MAX_EXP) g = (label - 0) * skip_alpha;
else g = (label - expTable[(int)((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]) * skip_alpha;
for (c = 0; c < layer1_size; c++) neu1e[c] += g * out_params->syn1neg[c + l2];
for (c = 0; c < layer1_size; c++) out_params->syn1neg[c + l2] += g * in_params->syn0[c + l1];
}
Expand Down Expand Up @@ -730,7 +733,7 @@ void ProcessSentence(int sentence_length, long long *sen, struct train_params *s
last_word = sen[c];
if (last_word == -1) continue;

ProcessSkipPair(last_word, word, next_random, src, src, neu1e);
ProcessSkipPair(last_word, word, next_random, src, src, neu1e, alpha);
} // for a (skipgram)
} // end if cbow
} // sentence
Expand Down Expand Up @@ -777,7 +780,7 @@ void ProcessSentenceAlign(struct train_params *src, long long* src_sent, int src
if (neighbor_pos >= 0 && neighbor_pos < src_len) {
src_neighbor = src_sent[neighbor_pos];
if (src_neighbor != -1) {
ProcessSkipPair(tgt_word, src_neighbor, next_random, tgt, src, neu1e);
ProcessSkipPair(tgt_word, src_neighbor, next_random, tgt, src, neu1e, bi_alpha);
}
}

Expand All @@ -786,7 +789,7 @@ void ProcessSentenceAlign(struct train_params *src, long long* src_sent, int src
if (neighbor_pos >= 0 && neighbor_pos < tgt_len) {
tgt_neighbor = tgt_sent[neighbor_pos];
if (tgt_neighbor != -1) {
ProcessSkipPair(src_word, tgt_neighbor, next_random, src, tgt, neu1e);
ProcessSkipPair(src_word, tgt_neighbor, next_random, src, tgt, neu1e, bi_alpha);
}
}
}
Expand Down Expand Up @@ -837,14 +840,21 @@ void *TrainModelThread(void *id) {
src_last_word_count = src_word_count;
if ((debug_mode > 1)) {
now=clock();
printf("%cAlpha: %f Progress: %.2f%% Words/thread/sec: %.2fk ", 13, alpha,
(src->word_count_actual - (src->word_count_actual / src->train_words) * src->train_words)/ (real)(src->train_words + 1) * 100,
src->word_count_actual / ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000));
if (is_tgt){
printf("%cAlpha: %f, bi_alpha: %f, Progress: %.2f%% Words/thread/sec: %.2fk ", 13, alpha, bi_alpha,
(src->word_count_actual - (src->word_count_actual / src->train_words) * src->train_words)/ (real)(src->train_words + 1) * 100,
src->word_count_actual / ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000));
} else {
printf("%cAlpha: %f Progress: %.2f%% Words/thread/sec: %.2fk ", 13, alpha,
(src->word_count_actual - (src->word_count_actual / src->train_words) * src->train_words)/ (real)(src->train_words + 1) * 100,
src->word_count_actual / ((real)(now - start + 1) / (real)CLOCKS_PER_SEC * 1000));
}
fflush(stdout);
}

alpha = starting_alpha * (1 - (cur_iter * src->train_words + src->word_count_actual) / (real)(num_train_iters * src->train_words + 1));
if (alpha < starting_alpha * 0.0001) alpha = starting_alpha * 0.0001;
if (is_tgt) bi_alpha = alpha*bi_weight;
}


Expand Down Expand Up @@ -1250,9 +1260,7 @@ void TrainModel() {
assert(src->num_lines==align_num_lines);
}

int save_opt = 1;
char sum_vector_file[MAX_STRING];
char sum_vector_prefix[MAX_STRING];
int save_opt = 0;
for(cur_iter=start_iter; cur_iter<num_train_iters; cur_iter++){
start = clock();
src->word_count_actual = tgt->word_count_actual = 0;
Expand All @@ -1276,32 +1284,15 @@ void TrainModel() {
fprintf(stderr, "\n# eval %d, ", cur_iter); execute("date"); fflush(stderr);
eval_mono(src->output_file, src->lang, cur_iter);

// sum vector for negative sampling
if (save_opt==1 && hs==0){
sprintf(sum_vector_file, "%s.sumvec.%s", output_prefix, src->lang);
fprintf(stderr, "# Eval on sum vector file %s\n", sum_vector_file);
eval_mono(sum_vector_file, src->lang, cur_iter);
}

if (is_tgt) {
SaveVector(output_prefix, tgt->lang, tgt, save_opt);
eval_mono(tgt->output_file, tgt->lang, cur_iter);
// cldc
cldc(output_prefix, cur_iter);


// sum vector for negative sampling
if (save_opt==1 && hs==0){
sprintf(sum_vector_file, "%s.sumvec.%s", output_prefix, tgt->lang);
fprintf(stderr, "# Eval on sum vector file %s\n", sum_vector_file);
eval_mono(sum_vector_file, tgt->lang, cur_iter);

// cldc
sprintf(sum_vector_prefix, "%s.sumvec", output_prefix);
cldc(sum_vector_prefix, cur_iter);
}
}
}
fflush(stderr);
} // end if eval_opt
}
}

Expand Down Expand Up @@ -1442,6 +1433,9 @@ int main(int argc, char **argv) {
// tgt sample
if ((i = ArgPos((char *)"-tgt-sample", argc, argv)) > 0) tgt_sample = atof(argv[i + 1]);

// bi_weight
if ((i = ArgPos((char *)"-bi-weight", argc, argv)) > 0) bi_weight = atof(argv[i + 1]);

// number of training words (used when we have a vocab file and don't need to go through training corpus to count)
if ((i = ArgPos((char *)"-src-train-words", argc, argv)) > 0) src_train_words = atoi(argv[i + 1]);
if ((i = ArgPos((char *)"-tgt-train-words", argc, argv)) > 0) tgt_train_words = atoi(argv[i + 1]);
Expand Down Expand Up @@ -1477,6 +1471,29 @@ int main(int argc, char **argv) {
return 0;
}

// char sum_vector_file[MAX_STRING];
// char sum_vector_prefix[MAX_STRING];

// sum vector for negative sampling
// if (save_opt==1 && hs==0){
// sprintf(sum_vector_file, "%s.sumvec.%s", output_prefix, src->lang);
// fprintf(stderr, "# Eval on sum vector file %s\n", sum_vector_file);
// eval_mono(sum_vector_file, src->lang, cur_iter);
// }



// sum vector for negative sampling
// if (save_opt==1 && hs==0){
// sprintf(sum_vector_file, "%s.sumvec.%s", output_prefix, tgt->lang);
// fprintf(stderr, "# Eval on sum vector file %s\n", sum_vector_file);
// eval_mono(sum_vector_file, tgt->lang, cur_iter);
//
// // cldc
// sprintf(sum_vector_prefix, "%s.sumvec", output_prefix);
// cldc(sum_vector_prefix, cur_iter);
// }

// /************************/
// /* tgt -> src neighbor */
// /***********************/
Expand Down

0 comments on commit fd43852

Please sign in to comment.