Skip to content

Commit

Permalink
Extend test-case of dmlc#59 (dmlc#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
leezu authored Apr 22, 2018
1 parent 5ebf2d7 commit e082516
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
13 changes: 8 additions & 5 deletions gluonnlp/embedding/token_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,8 @@ def _load_embedding_serialized(self, pretrained_file_path,
if deserialized_embedding.unknown_token == self.unknown_token:
# If the unknown_token is the same, we will find it below and a
# new unknown token wont be inserted.
pass
idx_to_token = deserialized_embedding.idx_to_token
idx_to_vec = deserialized_embedding.idx_to_vec
elif self.unknown_token:
# If they are different, we need to manually replace it so that
# it is found below and no new unknown token would be inserted.
Expand All @@ -343,10 +344,12 @@ def _load_embedding_serialized(self, pretrained_file_path,
else:
# If the TokenEmbedding shall not have an unknown token, we
# just delete the one in the npz.
idx_to_token = np.delete(
deserialized_embedding.idx_to_token, obj=C.UNK_IDX, axis=0)
idx_to_vec = np.delete(
deserialized_embedding.idx_to_vec, obj=C.UNK_IDX, axis=0)
idx_to_token = (
deserialized_embedding.idx_to_token[:C.UNK_IDX] +
deserialized_embedding.idx_to_token[C.UNK_IDX + 1:])
idx_to_vec = nd.concat(
deserialized_embedding.idx_to_vec[:C.UNK_IDX],
deserialized_embedding.idx_to_vec[C.UNK_IDX + 1:])
else:
idx_to_token = deserialized_embedding.idx_to_token
idx_to_vec = deserialized_embedding.idx_to_vec
Expand Down
6 changes: 4 additions & 2 deletions tests/unittest/test_vocab_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,9 +790,11 @@ def test_token_embedding_from_serialized_file():
assert my_embed_serialize == my_embed_text


def test_token_embedding_from_file_S3_with_custom_unknown_token():
@pytest.mark.parametrize('unknown_token',
['<strangetoken>', None, nlp._constants.UNK_TOKEN])
def test_token_embedding_from_file_S3_with_custom_unknown_token(unknown_token):
embed = nlp.embedding.create('glove', source='glove.6B.50d',
unknown_token='<strangeunknonwntoken>')
unknown_token=unknown_token)


def test_token_embedding_serialization(tmpdir):
Expand Down

0 comments on commit e082516

Please sign in to comment.