Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update numpy -> 2.0 and remove deprecated scipy #3596

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
4 changes: 2 additions & 2 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
#
# We use Py3.8 here for historical reasons.
#
python-version: "3.8"
python-version: "3.9"

- name: Update pip
run: python -m pip install -U pip
Expand All @@ -35,7 +35,7 @@ jobs:
sudo apt-get -yq update
sudo apt-get -yq remove texlive-binaries --purge
sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended latexmk
sudo apt-get -yq install build-essential python3.8-dev
sudo apt-get -yq install build-essential python3.9-dev
- name: Install gensim and its dependencies
run: pip install -e .[docs]

Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/build-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,17 @@ jobs:
fail-fast: false
matrix:
include:
- {python: '3.8', os: macos-12}
- {python: '3.9', os: macos-12}
- {python: '3.10', os: macos-12}
- {python: '3.11', os: macos-12}
- {python: '3.12', os: macos-12}

- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python: '3.12', os: ubuntu-20.04}

- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}

- {python: '3.10', os: windows-2019}
- {python: '3.11', os: windows-2019}
- {python: '3.12', os: windows-2019}
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
#
# We use Py3.8 here for historical reasons.
#
python-version: "3.8"
python-version: "3.9"

- name: Update pip
run: python -m pip install -U pip
Expand All @@ -43,7 +43,7 @@ jobs:
sudo apt-get -yq update
sudo apt-get -yq remove texlive-binaries --purge
sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended latexmk
sudo apt-get -yq install build-essential python3.8-dev
sudo apt-get -yq install build-essential python3.9-dev
- name: Install gensim and its dependencies
run: pip install -e .[docs]

Expand All @@ -63,13 +63,11 @@ jobs:
fail-fast: false
matrix:
include:
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}
- {python: '3.12', os: ubuntu-20.04}

- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}
- {python: '3.10', os: windows-2019}
- {python: '3.11', os: windows-2019}
Expand Down
8 changes: 4 additions & 4 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,9 @@ def get_mean_vector(self, keys, weights=None, pre_normalize=True, post_normalize
if len(keys) == 0:
raise ValueError("cannot compute mean with no input")
if isinstance(weights, list):
weights = np.array(weights)
weights = np.array(weights, dtype=self.vectors.dtype)
if weights is None:
weights = np.ones(len(keys))
weights = np.ones(len(keys), dtype=self.vectors.dtype)
if len(keys) != weights.shape[0]: # weights is a 1-D numpy array
raise ValueError(
"keys and weights array must have same number of elements"
Expand Down Expand Up @@ -1667,7 +1667,7 @@ def save_word2vec_format(
if binary:
fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes())
else:
fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8'))
fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector.tolist())}\n".encode('utf8'))

@classmethod
def load_word2vec_format(
Expand Down Expand Up @@ -1977,7 +1977,7 @@ def _word2vec_read_text(fin, kv, counts, vocab_size, vector_size, datatype, unic

def _word2vec_line_to_vector(line, datatype, unicode_errors, encoding):
parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
word, weights = parts[0], [datatype(x) for x in parts[1:]]
word, weights = parts[0], [datatype(x).item() for x in parts[1:]]
return word, weights


Expand Down
5 changes: 3 additions & 2 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,11 @@ def update_dir_prior(prior, N, logphat, rho):
The updated prior.

"""
dtype = logphat.dtype
gradf = N * (psi(np.sum(prior)) - psi(prior) + logphat)

c = N * polygamma(1, np.sum(prior))
q = -N * polygamma(1, prior)
c = N * polygamma(1, np.sum(prior)).astype(dtype)
q = -N * polygamma(1, prior).astype(dtype)

b = np.sum(gradf / q) / (1 / c + np.sum(1 / q))

Expand Down
12 changes: 3 additions & 9 deletions gensim/models/lsimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
import numpy as np
import scipy.linalg
import scipy.sparse
from scipy.sparse import sparsetools

from gensim import interfaces, matutils, utils
from gensim.models import basemodel
Expand Down Expand Up @@ -960,10 +959,8 @@ def stochastic_svd(
m, n = corpus.shape
assert num_terms == m, f"mismatch in number of features: {m} in sparse matrix vs. {num_terms} parameter"
o = random_state.normal(0.0, 1.0, (n, samples)).astype(y.dtype) # draw a random gaussian matrix
sparsetools.csc_matvecs(
m, n, samples, corpus.indptr, corpus.indices,
corpus.data, o.ravel(), y.ravel(),
) # y = corpus * o
y = corpus.dot(o) # y = corpus * o

del o

# unlike np, scipy.sparse `astype()` copies everything, even if there is no change to dtype!
Expand Down Expand Up @@ -994,10 +991,7 @@ def stochastic_svd(
num_docs += n
logger.debug("multiplying chunk * gauss")
o = random_state.normal(0.0, 1.0, (n, samples), ).astype(dtype) # draw a random gaussian matrix
sparsetools.csc_matvecs(
m, n, samples, chunk.indptr, chunk.indices, # y = y + chunk * o
chunk.data, o.ravel(), y.ravel(),
)
y = y + chunk * o
del chunk, o
y = [y]
q, _ = matutils.qr_destroy(y) # orthonormalize the range
Expand Down
42 changes: 21 additions & 21 deletions gensim/test/test_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,11 @@ def test_full(self, num_best=None):
# Sparse array.
for i, sim in sims:
# Note that similarities are bigger than zero, as they are the 1/ 1 + distances.
self.assertTrue(numpy.alltrue(sim > 0.0))
self.assertTrue(numpy.all(sim > 0.0))
else:
self.assertTrue(sims[0] == 1.0) # Similarity of a document with itself is 0.0.
self.assertTrue(numpy.alltrue(sims[1:] > 0.0))
self.assertTrue(numpy.alltrue(sims[1:] < 1.0))
self.assertTrue(numpy.all(sims[1:] > 0.0))
self.assertTrue(numpy.all(sims[1:] < 1.0))

@unittest.skipIf(POT_EXT is False, "POT not installed")
def test_non_increasing(self):
Expand All @@ -354,24 +354,24 @@ def test_chunking(self):
sims = index[query]

for i in range(3):
self.assertTrue(numpy.alltrue(sims[i, i] == 1.0)) # Similarity of a document with itself is 0.0.
self.assertTrue(numpy.all(sims[i, i] == 1.0)) # Similarity of a document with itself is 0.0.

# test the same thing but with num_best
index.num_best = 3
sims = index[query]
for sims_temp in sims:
for i, sim in sims_temp:
self.assertTrue(numpy.alltrue(sim > 0.0))
self.assertTrue(numpy.alltrue(sim <= 1.0))
self.assertTrue(numpy.all(sim > 0.0))
self.assertTrue(numpy.all(sim <= 1.0))

@unittest.skipIf(POT_EXT is False, "POT not installed")
def test_iter(self):
# Override testIter.

index = self.cls(TEXTS, self.w2v_model)
for sims in index:
self.assertTrue(numpy.alltrue(sims >= 0.0))
self.assertTrue(numpy.alltrue(sims <= 1.0))
self.assertTrue(numpy.all(sims >= 0.0))
self.assertTrue(numpy.all(sims <= 1.0))

@unittest.skipIf(POT_EXT is False, "POT not installed")
def test_str(self):
Expand Down Expand Up @@ -399,12 +399,12 @@ def test_full(self, num_best=None):
if num_best is not None:
# Sparse array.
for i, sim in sims:
self.assertTrue(numpy.alltrue(sim <= 1.0))
self.assertTrue(numpy.alltrue(sim >= 0.0))
self.assertTrue(numpy.all(sim <= 1.0))
self.assertTrue(numpy.all(sim >= 0.0))
else:
self.assertAlmostEqual(1.0, sims[0]) # Similarity of a document with itself is 1.0.
self.assertTrue(numpy.alltrue(sims[1:] >= 0.0))
self.assertTrue(numpy.alltrue(sims[1:] < 1.0))
self.assertTrue(numpy.all(sims[1:] >= 0.0))
self.assertTrue(numpy.all(sims[1:] < 1.0))

# Corpora
for query in (
Expand All @@ -416,15 +416,15 @@ def test_full(self, num_best=None):
# Sparse array.
for result in sims:
for i, sim in result:
self.assertTrue(numpy.alltrue(sim <= 1.0))
self.assertTrue(numpy.alltrue(sim >= 0.0))
self.assertTrue(numpy.all(sim <= 1.0))
self.assertTrue(numpy.all(sim >= 0.0))
else:
for i, result in enumerate(sims):
self.assertAlmostEqual(1.0, result[i]) # Similarity of a document with itself is 1.0.
self.assertTrue(numpy.alltrue(result[:i] >= 0.0))
self.assertTrue(numpy.alltrue(result[:i] < 1.0))
self.assertTrue(numpy.alltrue(result[i + 1:] >= 0.0))
self.assertTrue(numpy.alltrue(result[i + 1:] < 1.0))
self.assertTrue(numpy.all(result[:i] >= 0.0))
self.assertTrue(numpy.all(result[:i] < 1.0))
self.assertTrue(numpy.all(result[i + 1:] >= 0.0))
self.assertTrue(numpy.all(result[i + 1:] < 1.0))

def test_non_increasing(self):
""" Check that similarities are non-increasing when `num_best` is not `None`."""
Expand All @@ -445,7 +445,7 @@ def test_chunking(self):
sims = index[query]

for i in range(3):
self.assertTrue(numpy.alltrue(sims[i, i] == 1.0)) # Similarity of a document with itself is 1.0.
self.assertTrue(numpy.all(sims[i, i] == 1.0)) # Similarity of a document with itself is 1.0.

# test the same thing but with num_best
index.num_best = 5
Expand All @@ -459,8 +459,8 @@ def test_chunking(self):
def test_iter(self):
index = self.cls(CORPUS, self.similarity_matrix)
for sims in index:
self.assertTrue(numpy.alltrue(sims >= 0.0))
self.assertTrue(numpy.alltrue(sims <= 1.0))
self.assertTrue(numpy.all(sims >= 0.0))
self.assertTrue(numpy.all(sims <= 1.0))


class TestSparseMatrixSimilarity(_TestSimilarityABC):
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ requires = [
"Cython>=0.29.32,<3.0.0",
# oldest supported Numpy for this platform is 1.17 but the oldest supported by Gensim
# is 1.18.5, remove the line when they increase oldest supported Numpy for this platform
"numpy==1.18.5; python_version=='3.8' and platform_machine not in 'arm64|aarch64'",
"oldest-supported-numpy; python_version>'3.8' or platform_machine in 'arm64|aarch64'",
# 20240604 GM: testing numpy-2.0.0 which requires python >= 3.9 (to 3.12)
"numpy==2.0.0; python_version>='3.9' and platform_machine not in 'arm64|aarch64'",
# "oldest-supported-numpy; python_version>'3.8' or platform_machine in 'arm64|aarch64'",
"scipy",
"setuptools",
"wheel",
]
10 changes: 3 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,7 @@ def run(self):
'pandas',
]

#
# see https://github.com/piskvorky/gensim/pull/3535
#
NUMPY_STR = 'numpy >= 1.18.5, < 2.0'
NUMPY_STR = 'numpy == 2.0.0'

install_requires = [
NUMPY_STR,
Expand All @@ -340,7 +337,7 @@ def run(self):

setup(
name='gensim',
version='4.3.3',
version='4.4.0a0.dev0',
description='Python framework for fast Vector Space Modelling',
long_description=LONG_DESCRIPTION,

Expand Down Expand Up @@ -373,7 +370,6 @@ def run(self):
'Environment :: Console',
'Intended Audience :: Science/Research',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
Expand All @@ -385,7 +381,7 @@ def run(self):
],

test_suite="gensim.test",
python_requires='>=3.8',
python_requires='>=3.9',
install_requires=install_requires,
tests_require=linux_testenv,
extras_require={
Expand Down
Loading