Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Implement position-dependent weighting to fastText #2905

Draft
wants to merge 7 commits into
base: develop
Choose a base branch
from
Prev Previous commit
Fix initialization for position_dependent_vector_size < vector_size
  • Loading branch information
Witiko committed Nov 30, 2020
commit 8f9110ac4949b910183230d60ed8cf201c4c62b0
63 changes: 49 additions & 14 deletions gensim/models/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,8 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, vector_size=100
bucket = 0

self.wv = FastTextKeyedVectors(
vector_size, position_dependent_weights, position_dependent_vector_size, min_n, max_n,
bucket)
vector_size, self.position_dependent_weights, position_dependent_vector_size,
min_n, max_n, bucket)
self.wv.bucket = bucket

super(FastText, self).__init__(
Expand Down Expand Up @@ -1366,10 +1366,6 @@ def init_ngrams_weights(self, seed, window):

rand_obj = np.random.default_rng(seed=seed) # use new instance of numpy's recommended generator/algorithm

vocab_shape = (len(self), self.vector_size)
ngrams_shape = (self.bucket, self.vector_size)
positions_shape = (2 * window, self.position_dependent_vector_size)
#
# We could have initialized vectors_ngrams at construction time, but we
# do it here for two reasons:
#
Expand All @@ -1379,13 +1375,38 @@ def init_ngrams_weights(self, seed, window):
# time because the vocab is not initialized at that stage.
#
if self.position_dependent_weights:
vocab_shape = (len(self), self.position_dependent_vector_size)
ngrams_shape = (self.bucket, self.position_dependent_vector_size)
positions_shape = (2 * window, self.position_dependent_vector_size)
hi = sqrt(sqrt(3.0) / self.vector_size)
lo = -hi
self.vectors_positions = rand_obj.uniform(lo, hi, positions_shape).astype(REAL)
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)
if self.vector_size > self.position_dependent_vector_size:
vocab_shape = (len(self), self.vector_size - self.position_dependent_vector_size)
ngrams_shape = (self.bucket, self.vector_size - self.position_dependent_vector_size)
lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
self.vectors_vocab = np.concatenate(
(
self.vectors_vocab,
rand_obj.uniform(lo, hi, vocab_shape).astype(REAL),
),
axis=-1,
)
self.vectors_ngrams = np.concatenate(
(
self.vectors_ngrams,
rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL),
),
axis=-1,
)
else:
vocab_shape = (len(self), self.vector_size)
ngrams_shape = (self.bucket, self.vector_size)
lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)

def update_ngrams_weights(self, seed, old_vocab_len):
"""Update the vocabulary weights for training continuation.
Expand All @@ -1408,8 +1429,13 @@ def update_ngrams_weights(self, seed, old_vocab_len):
rand_obj.seed(seed)

new_vocab = len(self) - old_vocab_len
self.vectors_vocab = _pad_random(self.vectors_vocab, new_vocab, rand_obj,
squared=self.position_dependent_weights)
self.vectors_vocab = _pad_random(
self.vectors_vocab,
new_vocab,
rand_obj,
position_dependent_weights=self.position_dependent_weights,
position_dependent_vector_size=self.position_dependent_vector_size,
)

def init_post_load(self, fb_vectors):
"""Perform initialization after loading a native Facebook model.
Expand Down Expand Up @@ -1476,16 +1502,25 @@ def recalc_char_ngram_buckets(self):
)


def _pad_random(m, new_rows, rand, squared=False):
def _pad_random(m, new_rows, rand, position_dependent_weights=False, position_dependent_vector_size=0):
"""Pad a matrix with additional rows filled with random values."""
_, columns = m.shape
shape = (new_rows, columns)
if squared:
if position_dependent_weights:
shape = (new_rows, position_dependent_vector_size)
high = sqrt(sqrt(3.0) / columns)
low = -high
suffix = rand.uniform(low, high, shape).astype(REAL)
if columns > position_dependent_vector_size:
shape = (new_rows, columns - position_dependent_vector_size)
low, high = -1.0 / columns, 1.0 / columns
suffix = np.concatenate(
(suffix, rand.uniform(low, high, shape).astype(REAL)),
axis=-1,
)
else:
shape = (new_rows, columns)
low, high = -1.0 / columns, 1.0 / columns
suffix = rand.uniform(low, high, shape).astype(REAL)
suffix = rand.uniform(low, high, shape).astype(REAL)
return vstack([m, suffix])


Expand Down