Skip to content

Commit b1d65a8

Browse files
committed
Make matrix mod 2 conversion to numpy faster
1 parent 1be0a58 commit b1d65a8

File tree

5 files changed

+191
-28
lines changed

5 files changed

+191
-28
lines changed

src/bin/sage-fixdoctests

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import re
3636
import shlex
3737
import subprocess
3838
import sys
39+
import textwrap
3940

4041
from argparse import ArgumentParser
4142
from collections import defaultdict
@@ -303,7 +304,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
303304
got = re.sub(r'(doctest:warning).*^( *DeprecationWarning:)',
304305
r'\1...\n\2',
305306
got, 1, re.DOTALL | re.MULTILINE)
306-
got = got.splitlines() # got can't be the empty string
307+
got = textwrap.dedent(got).splitlines() # got can't be the empty string
307308

308309
if args.only_tags:
309310
if args.verbose:
@@ -329,7 +330,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
329330
if not expected:
330331
indent = (len(src_in_lines[first_line_num - 1]) - len(src_in_lines[first_line_num - 1].lstrip()))
331332
append_to_line(line_num - 1,
332-
'\n' + '\n'.join('%s%s' % (' ' * indent, line.lstrip()) for line in got),
333+
'\n' + '\n'.join(' ' * indent + line for line in got),
333334
message="Adding the new output")
334335
return
335336

@@ -338,8 +339,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
338339
# has the smallest indentation after lstrip(). Note that the amount of indentation
339340
# required could be negative if the ``got`` block is indented. In this case
340341
# ``indent`` is set to zero.
341-
indent = max(0, (len(src_in_lines[line_num]) - len(src_in_lines[line_num].lstrip())
342-
- min(len(got[j]) - len(got[j].lstrip()) for j in range(len(got)))))
342+
indent = len(src_in_lines[line_num]) - len(src_in_lines[line_num].lstrip())
343343

344344
# Double check that what was expected was indeed in the source file and if
345345
# it is not then then print a warning for the user which contains the
@@ -358,7 +358,7 @@ def process_block(block, src_in_lines, file_optional_tags, venv_explainer=''):
358358
update_line(line_num, None,
359359
message="Expected something, got nothing; deleting the old output")
360360
else:
361-
update_line(line_num, '\n'.join((' ' * indent + got[i]) for i in range(len(got))),
361+
update_line(line_num, '\n'.join(' ' * indent + line for line in got),
362362
message="Replacing the old expected output with the new output")
363363

364364
# Mark any remaining `expected` lines as ``None`` so as to preserve the line numbering

src/sage/matrix/matrix1.pyx

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ import sage.modules.free_module
2525
from sage.structure.coerce cimport coercion_model
2626

2727

28+
_MISSING = object()
29+
30+
2831
cdef class Matrix(Matrix0):
2932
###################################################
3033
# Coercion to Various Systems
@@ -670,7 +673,7 @@ cdef class Matrix(Matrix0):
670673
entries = [[sib(v, 2) for v in row] for row in self.rows()]
671674
return sib.name('matrix')(self.base_ring(), entries)
672675

673-
def numpy(self, dtype=None, copy=True):
676+
def numpy(self, dtype=None, copy=_MISSING):
674677
"""
675678
Return the Numpy matrix associated to this matrix.
676679
@@ -680,10 +683,6 @@ cdef class Matrix(Matrix0):
680683
then the type will be determined as the minimum type required
681684
to hold the objects in the sequence.
682685
683-
- ``copy`` -- if `self` is already an `ndarray`, then this flag
684-
determines whether the data is copied (the default), or whether
685-
a view is constructed.
686-
687686
EXAMPLES::
688687
689688
sage: # needs numpy
@@ -708,14 +707,14 @@ cdef class Matrix(Matrix0):
708707
Type ``numpy.typecodes`` for a list of the possible
709708
typecodes::
710709
711-
sage: import numpy # needs numpy
712-
sage: numpy.typecodes.items() # needs numpy # random
710+
sage: import numpy # needs numpy
711+
sage: numpy.typecodes.items() # needs numpy # random
713712
[('All', '?bhilqpBHILQPefdgFDGSUVOMm'), ('AllFloat', 'efdgFDG'),
714713
...
715714
716715
For instance, you can see possibilities for real floating point numbers::
717716
718-
sage: numpy.typecodes['Float'] # needs numpy
717+
sage: numpy.typecodes['Float'] # needs numpy
719718
'efdg'
720719
721720
Alternatively, numpy automatically calls this function (via
@@ -733,15 +732,70 @@ cdef class Matrix(Matrix0):
733732
dtype('int64') # 64-bit
734733
sage: b.shape
735734
(3, 4)
735+
736+
TESTS::
737+
738+
sage: # needs numpy
739+
sage: matrix(3, range(12)).numpy(copy=False)
740+
doctest:warning...
741+
DeprecationWarning: passing copy argument to numpy() is deprecated
742+
See https://github.com/sagemath/sage/issues/39152 for details.
743+
array([[ 0, 1, 2, 3],
744+
[ 4, 5, 6, 7],
745+
[ 8, 9, 10, 11]])
736746
"""
747+
if copy is not _MISSING:
748+
from sage.misc.superseded import deprecation
749+
deprecation(39152, "passing copy argument to numpy() is deprecated")
737750
import numpy
738-
A = numpy.matrix(self.list(), dtype=dtype, copy=copy)
739-
return numpy.resize(A,(self.nrows(), self.ncols()))
751+
return numpy.asarray(self.list(), dtype=dtype).reshape(self.nrows(), self.ncols())
752+
753+
def __array__(self, dtype=None, copy=None):
754+
"""
755+
Define the magic ``__array__`` function so that ``numpy.array(m)`` can convert
756+
a matrix ``m`` to a numpy array. See
757+
`Interoperability with NumPy <https://numpy.org/doc/1.26/user/basics.interoperability.html>`_.
740758
741-
# Define the magic "__array__" function so that numpy.array(m) can convert
742-
# a matrix m to a numpy array.
743-
# See http://docs.scipy.org/doc/numpy/user/c-info.how-to-extend.html#converting-an-arbitrary-sequence-object
744-
__array__=numpy
759+
Note that subclasses should override :meth:`numpy`, but usually not this method.
760+
761+
INPUT:
762+
763+
- ``dtype`` -- the desired data-type for the array. If not given,
764+
then the type will be determined automatically.
765+
766+
- ``copy`` -- required for numpy 2.0 compatibility.
767+
See <https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword>`_.
768+
Note that ``copy=False`` is not supported.
769+
770+
TESTS::
771+
772+
sage: # needs numpy
773+
sage: import numpy as np
774+
sage: a = matrix(3, range(12))
775+
sage: if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
776+
....: try:
777+
....: np.array(a, copy=False) # in numpy 2.0, this raises an error
778+
....: except ValueError:
779+
....: pass
780+
....: else:
781+
....: assert False
782+
....: else:
783+
....: b = np.array(a, copy=False) # in numpy 1.26, this means "avoid copy if possible"
784+
....: # https://numpy.org/doc/1.26/reference/generated/numpy.array.html#numpy.array
785+
....: # but no-copy is not supported so it will copy anyway
786+
....: a[0,0] = 1
787+
....: assert b[0,0] == 0
788+
....: b = np.asarray(a)
789+
....: a[0,0] = 2
790+
....: assert b[0,0] == 1
791+
"""
792+
import numpy as np
793+
if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
794+
if copy is False:
795+
raise ValueError("Sage matrix cannot be converted to numpy array without copying")
796+
else:
797+
assert copy is None # numpy versions before 2.0 should not pass copy argument
798+
return self.numpy(dtype)
745799

746800
###################################################
747801
# Construction functions

src/sage/matrix/matrix_mod2_dense.pyx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ from cysignals.signals cimport sig_on, sig_str, sig_off
110110
cimport sage.matrix.matrix_dense as matrix_dense
111111
from sage.matrix.args cimport SparseEntry, MatrixArgs_init, MA_ENTRIES_NDARRAY
112112
from libc.stdio cimport *
113+
from libc.stdint cimport uintptr_t
113114
from sage.structure.element cimport (Matrix, Vector)
114115
from sage.modules.free_module_element cimport FreeModuleElement
115116
from sage.libs.gmp.random cimport *
@@ -581,6 +582,33 @@ cdef class Matrix_mod2_dense(matrix_dense.Matrix_dense): # dense or sparse
581582
return list(C)
582583
return C
583584

585+
def numpy(self, dtype=None):
586+
"""
587+
Return the Numpy matrix associated to this matrix.
588+
See :meth:`.matrix1.Matrix.numpy`.
589+
590+
EXAMPLES::
591+
592+
sage: # needs numpy
593+
sage: import numpy as np
594+
sage: np.array(matrix.identity(GF(2), 5))
595+
array([[1, 0, 0, 0, 0],
596+
[0, 1, 0, 0, 0],
597+
[0, 0, 1, 0, 0],
598+
[0, 0, 0, 1, 0],
599+
[0, 0, 0, 0, 1]], dtype=uint8)
600+
601+
TESTS:
602+
603+
Make sure it's reasonably fast (the temporary numpy array is immediately
604+
destroyed otherwise it consumes 400MB memory)::
605+
606+
sage: np.sum(np.array(matrix.identity(GF(2), 2*10^4))) # around 2s walltime # needs numpy
607+
20000
608+
"""
609+
from ..modules.numpy_util import mzd_matrix_to_numpy
610+
return mzd_matrix_to_numpy(<uintptr_t>self._entries, dtype)
611+
584612
########################################################################
585613
# LEVEL 2 functionality
586614
# * def _pickle

src/sage/matrix/matrix_numpy_dense.pyx

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -366,15 +366,12 @@ cdef class Matrix_numpy_dense(Matrix_dense):
366366

367367
def numpy(self, dtype=None):
368368
"""
369-
Return a copy of the matrix as a numpy array.
370-
371-
It uses the numpy C/api so is very fast.
369+
Return the Numpy matrix associated to this matrix.
372370
373371
INPUT:
374372
375373
- ``dtype`` -- the desired data-type for the array. If not given,
376-
then the type will be determined as the minimum type required
377-
to hold the objects in the sequence.
374+
then the type will be determined automatically.
378375
379376
EXAMPLES::
380377
@@ -424,12 +421,71 @@ cdef class Matrix_numpy_dense(Matrix_dense):
424421
[]
425422
sage: m.numpy()
426423
array([], shape=(5, 0), dtype=float64)
424+
425+
Test that a copy is always made::
426+
427+
sage: import numpy as np
428+
sage: m = matrix(RDF,2); m
429+
[0.0 0.0]
430+
[0.0 0.0]
431+
sage: m[0,0]=1
432+
sage: n=m.numpy() # should copy
433+
sage: m[0,0]=2
434+
sage: n
435+
array([[1., 0.],
436+
[0., 0.]])
437+
sage: n=numpy.array(m) # should copy
438+
sage: m[0,0]=3
439+
sage: n
440+
array([[2., 0.],
441+
[0., 0.]])
442+
sage: n=numpy.asarray(m) # should still copy
443+
sage: m[0,0]=4
444+
sage: n
445+
array([[3., 0.],
446+
[0., 0.]])
447+
sage: n=numpy.asarray(m, dtype=numpy.int64) # should copy
448+
sage: m[0,0]=5
449+
sage: n
450+
array([[4, 0],
451+
[0, 0]])
452+
453+
::
454+
455+
sage: import numpy as np
456+
sage: a = matrix(RDF, 3, range(12))
457+
sage: if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
458+
....: try:
459+
....: np.array(a, copy=False) # in numpy 2.0, this raises an error
460+
....: except ValueError:
461+
....: pass
462+
....: else:
463+
....: assert False
464+
....: else:
465+
....: b = np.array(a, copy=False) # in numpy 1.26, this means "avoid copy if possible"
466+
....: # https://numpy.org/doc/1.26/reference/generated/numpy.array.html#numpy.array
467+
....: # but no-copy is not supported so it will copy anyway
468+
....: a[0,0] = 1
469+
....: assert b[0,0] == 0
470+
....: b = np.asarray(a)
471+
....: a[0,0] = 2
472+
....: assert b[0,0] == 1
473+
474+
Make sure it's reasonably fast (the temporary numpy array is immediately
475+
destroyed otherwise it consumes 200MB memory)::
476+
477+
sage: import numpy as np
478+
sage: np.sum(np.array(matrix.identity(RDF, 5*10^3))).item() # around 2s each
479+
5000.0
480+
sage: np.sum(np.asarray(matrix.identity(RDF, 5*10^3))).item()
481+
5000.0
482+
sage: np.sum(np.asarray(matrix.identity(RDF, 5*10^3), dtype=np.uint8)).item()
483+
5000
484+
sage: np.sum(np.array(matrix.identity(CDF, 3*10^3))).item()
485+
(3000+0j)
427486
"""
428487
import numpy as np
429-
if dtype is None or self._numpy_dtype == np.dtype(dtype):
430-
return self._matrix_numpy.copy()
431-
else:
432-
return Matrix_dense.numpy(self, dtype=dtype)
488+
return np.array(self._matrix_numpy, dtype=dtype)
433489

434490
def _replace_self_with_numpy(self, numpy_matrix):
435491
"""

src/sage/modules/numpy_util.pyx

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,28 @@ cpdef int set_matrix_mod2_from_numpy(Matrix_mod2_dense a, b) except -1:
133133
return (<object>_set_matrix_mod2_from_numpy_helper)(a, b) # https://github.com/cython/cython/issues/6588
134134
except TypeError:
135135
return False
136+
137+
138+
cpdef object mzd_matrix_to_numpy(uintptr_t entries_addr, object dtype):
139+
"""
140+
Convert ``<mzd_t*>entries_addr`` to a numpy array.
141+
142+
INPUT:
143+
144+
- ``entries_addr`` -- must be a ``mzd_t*`` casted to ``uintptr_t``; the casting
145+
is necessary to pass it through Python boundary because of lazy import.
146+
Do not pass arbitrary integer value here, will crash the program.
147+
148+
- ``dtype`` -- numpy dtype. If ``None``, the result will have some convenient dtype.
149+
150+
OUTPUT: a 2-dimensional array.
151+
"""
152+
if dtype is not None:
153+
return mzd_matrix_to_numpy(entries_addr, None).astype(dtype)
154+
cdef mzd_t* entries = <mzd_t*>entries_addr
155+
cdef np.ndarray[np.uint8_t, ndim=2] result = np.empty((entries.nrows, entries.ncols), dtype=np.uint8)
156+
cdef Py_ssize_t i, j
157+
for i in range(entries.nrows):
158+
for j in range(entries.ncols):
159+
result[i, j] = mzd_read_bit(entries, i, j)
160+
return result

0 commit comments

Comments
 (0)