From ef7707947b85d141e955e844ec1800df9eddb5c2 Mon Sep 17 00:00:00 2001 From: Ilhan Polat Date: Thu, 8 Nov 2018 00:20:16 +0100 Subject: [PATCH 1/4] MAINT: Speed up get_(lapack,blas)_func --- scipy/linalg/blas.py | 59 ++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/scipy/linalg/blas.py b/scipy/linalg/blas.py index 6462db9e288d..3edef627e272 100644 --- a/scipy/linalg/blas.py +++ b/scipy/linalg/blas.py @@ -206,7 +206,6 @@ # from __future__ import division, print_function, absolute_import - __all__ = ['get_blas_funcs', 'find_best_blas_type'] import numpy as _np @@ -222,8 +221,36 @@ from scipy.linalg._fblas import * del empty_module -# 'd' will be default for 'i',.. -_type_conv = {'f': 's', 'd': 'd', 'F': 'c', 'D': 'z', 'G': 'z'} +# all numeric dtypes '?bBhHiIlLqQefdgFDGO' that are safe to be converted to + +# single precision float +# WIN : '?bBhH!!!!!!ef!!!!!!' +# NIX : '?bBhH!!!!!!ef!!!!!!' + +# double precision float +# WIN : '?bBhHiIlLqQefdg!!!!' +# NIX : '?bBhHiIlLqQefd!!!!!' + +# single precision complex +# WIN : '?bBhH!!!!!!ef!!F!!!' +# NIX : '?bBhH!!!!!!ef!!F!!!' + +# double precision complex +# WIN : '?bBhHiIlLqQefdgFDG!' +# NIX : '?bBhHiIlLqQefd!FD!!' + +_type_score = {x: 1 for x in '?bBhHef'} +_type_score.update({x: 2 for x in 'iIlLqQd'}) + +# Handle float128 and complex256 separately for non-windows systems +# otherwise it will overwrite the same key with same value +_type_score.update({'F': 3, 'D': 4, 'g': 2, 'G': 4}) + +# Final mapping to the actual prefixes and dtypes +_type_conv = {1: ('s', _np.dtype('float32')), + 2: ('d', _np.dtype('float64')), + 3: ('c', _np.dtype('complex64')), + 4: ('z', _np.dtype('complex128'))} # some convenience alias for complex functions _blas_alias = {'cnrm2': 'scnrm2', 'znrm2': 'dznrm2', @@ -270,26 +297,26 @@ def find_best_blas_type(arrays=(), dtype=None): """ dtype = _np.dtype(dtype) + max_score = _type_score.get(dtype.char, 5) prefer_fortran = False if arrays: # use the most generic type in arrays - dtypes = [ar.dtype for ar in arrays] - dtype = _np.find_common_type(dtypes, ()) - try: - index = dtypes.index(dtype) - except ValueError: - index = 0 - if arrays[index].flags['FORTRAN']: + chars = [arr.dtype.char for arr in arrays] + scores = [_type_score.get(x, 5) for x in chars] + max_score = max(scores) + ind_max_score = scores.index(max_score) + # safe upcasting for mix of float64 and complex64 --> prefix 'z' + if max_score == 3 and (2 in scores): + max_score = 4 + + if arrays[ind_max_score].flags['FORTRAN']: # prefer Fortran for leading array with column major order prefer_fortran = True - prefix = _type_conv.get(dtype.char, 'd') - if dtype.char == 'G': - # complex256 -> complex128 (i.e., C long double -> C double) - dtype = _np.dtype('D') - elif dtype.char not in 'fdFD': - dtype = _np.dtype('d') + # Get the LAPACK prefix and the corresponding dtype if not fall back + # to 'd' and double precision float. + prefix, dtype = _type_conv.get(max_score, ('d', _np.dtype('float64'))) return prefix, dtype, prefer_fortran From 4d6d7eb5836b5fccd61e4c07d1f9dcacc8f45702 Mon Sep 17 00:00:00 2001 From: Ilhan Polat Date: Thu, 8 Nov 2018 20:22:00 +0100 Subject: [PATCH 2/4] BENCH: STY: Added basic benchmarks to BLAS/LAPACK --- benchmarks/benchmarks/blas_lapack.py | 38 ++++++++++++++++++++++++++++ scipy/linalg/blas.py | 5 +++- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 benchmarks/benchmarks/blas_lapack.py diff --git a/benchmarks/benchmarks/blas_lapack.py b/benchmarks/benchmarks/blas_lapack.py new file mode 100644 index 000000000000..deb8f8971b8f --- /dev/null +++ b/benchmarks/benchmarks/blas_lapack.py @@ -0,0 +1,38 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np + +try: + import scipy.linalg.lapack as la + import scipy.linalg.blas as bla +except ImportError: + pass + +from .common import Benchmark + + +class GetBlasLapackFuncs(Benchmark): + """ + Test the speed of grabbing the correct BLAS/LAPACK routine flavor. + + In particular, upon receiving strange dtype arrays the results shouldn't + diverge too much. Hence the results here should be comparable + """ + + param_names = ['dtype1', 'dtype2', + 'dtype1_ord', 'dtype2_ord', + 'size'] + params = [ + ['b', 'G', 'd'], + ['d', 'F', '?'] + ['C', 'F'], + ['C', 'F'], + [10, 100, 1000] + ] + + def setup(self, dtype1, dtype2, dtype1_ord, dtype2_ord, size): + self.arr1 = np.empty(size, dtype=dtype1, order=dtype1_ord) + self.arr2 = np.empty(size, dtype=dtype2, order=dtype2_ord) + + def time_find_best_blas_type(self, arr1, arr2): + prefix, dtype, prefer_fortran = bla.find_best_blas_type((arr1, arr2)) diff --git a/scipy/linalg/blas.py b/scipy/linalg/blas.py index 3edef627e272..90a46961e819 100644 --- a/scipy/linalg/blas.py +++ b/scipy/linalg/blas.py @@ -206,6 +206,7 @@ # from __future__ import division, print_function, absolute_import + __all__ = ['get_blas_funcs', 'find_best_blas_type'] import numpy as _np @@ -283,6 +284,8 @@ def find_best_blas_type(arrays=(), dtype=None): prefer_fortran : bool Whether to prefer Fortran order routines over C order. + .. versionchanged:: 1.2.0 + Examples -------- >>> import scipy.linalg.blas as bla @@ -345,7 +348,7 @@ def _get_funcs(names, arrays, dtype, if prefer_fortran: module1, module2 = module2, module1 - for i, name in enumerate(names): + for name in names: func_name = prefix + name func_name = alias.get(func_name, func_name) func = getattr(module1[0], func_name, None) From 55f3a04ff65677ef13303a9fe9d4878616dfbaf0 Mon Sep 17 00:00:00 2001 From: Ilhan Polat Date: Mon, 12 Nov 2018 01:03:24 +0100 Subject: [PATCH 3/4] MAINT: Add a shortcut and skip one extra list compr. --- scipy/linalg/blas.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/scipy/linalg/blas.py b/scipy/linalg/blas.py index 90a46961e819..324b34b01c58 100644 --- a/scipy/linalg/blas.py +++ b/scipy/linalg/blas.py @@ -284,7 +284,7 @@ def find_best_blas_type(arrays=(), dtype=None): prefer_fortran : bool Whether to prefer Fortran order routines over C order. - .. versionchanged:: 1.2.0 + .. versionchanged:: 1.3.0 Examples -------- @@ -304,18 +304,22 @@ def find_best_blas_type(arrays=(), dtype=None): prefer_fortran = False if arrays: - # use the most generic type in arrays - chars = [arr.dtype.char for arr in arrays] - scores = [_type_score.get(x, 5) for x in chars] - max_score = max(scores) - ind_max_score = scores.index(max_score) - # safe upcasting for mix of float64 and complex64 --> prefix 'z' - if max_score == 3 and (2 in scores): - max_score = 4 - - if arrays[ind_max_score].flags['FORTRAN']: - # prefer Fortran for leading array with column major order - prefer_fortran = True + # In most cases, single element is passed through, quicker route + if len(arrays) == 1: + max_score = _type_score.get(arrays[0].dtype.char, 5) + prefer_fortran = arrays[0].flags['FORTRAN'] + else: + # use the most generic type in arrays + scores = [_type_score.get(x.dtype.char, 5) for x in arrays] + max_score = max(scores) + ind_max_score = scores.index(max_score) + # safe upcasting for mix of float64 and complex64 --> prefix 'z' + if max_score == 3 and (2 in scores): + max_score = 4 + + if arrays[ind_max_score].flags['FORTRAN']: + # prefer Fortran for leading array with column major order + prefer_fortran = True # Get the LAPACK prefix and the corresponding dtype if not fall back # to 'd' and double precision float. From e44e45f687d53e56f6d576d3ff9a9b6da0a6577f Mon Sep 17 00:00:00 2001 From: Ilhan Polat Date: Wed, 26 Dec 2018 21:03:51 +0100 Subject: [PATCH 4/4] DOC: Clarify the longdouble mapping details [ci skip] [ci skip] --- scipy/linalg/blas.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/scipy/linalg/blas.py b/scipy/linalg/blas.py index 324b34b01c58..04be44dc4cba 100644 --- a/scipy/linalg/blas.py +++ b/scipy/linalg/blas.py @@ -224,27 +224,16 @@ # all numeric dtypes '?bBhHiIlLqQefdgFDGO' that are safe to be converted to -# single precision float -# WIN : '?bBhH!!!!!!ef!!!!!!' -# NIX : '?bBhH!!!!!!ef!!!!!!' - -# double precision float -# WIN : '?bBhHiIlLqQefdg!!!!' -# NIX : '?bBhHiIlLqQefd!!!!!' - -# single precision complex -# WIN : '?bBhH!!!!!!ef!!F!!!' -# NIX : '?bBhH!!!!!!ef!!F!!!' - -# double precision complex -# WIN : '?bBhHiIlLqQefdgFDG!' -# NIX : '?bBhHiIlLqQefd!FD!!' +# single precision float : '?bBhH!!!!!!ef!!!!!!' +# double precision float : '?bBhHiIlLqQefdg!!!!' +# single precision complex : '?bBhH!!!!!!ef!!F!!!' +# double precision complex : '?bBhHiIlLqQefdgFDG!' _type_score = {x: 1 for x in '?bBhHef'} _type_score.update({x: 2 for x in 'iIlLqQd'}) -# Handle float128 and complex256 separately for non-windows systems -# otherwise it will overwrite the same key with same value +# Handle float128(g) and complex256(G) separately in case non-windows systems. +# On windows, the values will be rewritten to the same key with the same value. _type_score.update({'F': 3, 'D': 4, 'g': 2, 'G': 4}) # Final mapping to the actual prefixes and dtypes @@ -284,8 +273,6 @@ def find_best_blas_type(arrays=(), dtype=None): prefer_fortran : bool Whether to prefer Fortran order routines over C order. - .. versionchanged:: 1.3.0 - Examples -------- >>> import scipy.linalg.blas as bla