From de870b2526f6511241f8c3d1a441566f628947d0 Mon Sep 17 00:00:00 2001 From: jlowryduda Date: Mon, 13 Nov 2017 14:22:45 -0500 Subject: [PATCH] use a previously unused variable, fix documentation --- conceptnet5/vectors/cli.py | 2 +- conceptnet5/vectors/miniaturize.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/conceptnet5/vectors/cli.py b/conceptnet5/vectors/cli.py index fbfa04aa..e2dca362 100644 --- a/conceptnet5/vectors/cli.py +++ b/conceptnet5/vectors/cli.py @@ -249,7 +249,7 @@ def run_export(input_filename, output_filename, language): @click.option('-k', default=300, help="Number of columns to reduce to") def run_miniaturize(input_filename, extra_vocab_filename, output_filename, k): """ - Save a smaller version of a frame, which includes frequent terms and doesn't include phrases. + Save a smaller version of a frame, which includes frequent terms and phrases. """ frame = load_hdf(input_filename) other_frame = load_hdf(extra_vocab_filename) diff --git a/conceptnet5/vectors/miniaturize.py b/conceptnet5/vectors/miniaturize.py index 1e12cc70..edea0957 100644 --- a/conceptnet5/vectors/miniaturize.py +++ b/conceptnet5/vectors/miniaturize.py @@ -60,7 +60,8 @@ def miniaturize(frame, other_vocab=None, k=300, debias=True): smaller = frame.loc[vocab] U, _S, _Vt = np.linalg.svd(smaller, full_matrices=False) redecomposed = pd.DataFrame(U[:, :k], index=vocab, dtype='f') - redecomposed = de_bias_frame(redecomposed) + if debias: + redecomposed = de_bias_frame(redecomposed) mini = (redecomposed * 64).astype(np.int8) mini.sort_index(inplace=True) return mini