Skip to content

Commit

Permalink
Add an option to change the complex return type.
Browse files Browse the repository at this point in the history
ifort apparently does not return complex numbers in registers as in C/C++ (or gfortran), but instead creates a "hidden" first parameter for the return value. The option --complex-return=gnu|intel has been added, as well as a guess based on a provided FC if not specified (otherwise default to gnu). This option affects the signatures of cdotc, cdotu, zdotc, and zdotu, and a single library cannot be used with both GNU and Intel Fortran compilers. Fixes #433.
  • Loading branch information
devinamatthews committed Aug 6, 2020
1 parent 6e522e5 commit aa1dd61
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 35 deletions.
6 changes: 6 additions & 0 deletions build/bli_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,11 @@
#define BLIS_DISABLE_SHARED
#endif

#if @complex_return_intel@
#define BLIS_ENABLE_COMPLEX_RETURN_INTEL
#else
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
#endif


#endif
59 changes: 58 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,15 @@ print_usage()
echo " when debugging certain configuration issues, and/or as"
echo " a sanity check to make sure these lists are constituted"
echo " as expected."
echo " "
echo " --complex-return=gnu|intel"
echo " "
echo " Specify the way in which complex numbers are returned"
echo " from Fortran functions, either \"gnu\" (return in"
echo " registers) or \"intel\" (return via hidden argument)."
echo " If not specified and the environment variable FC is set,"
echo " attempt to determine the return type from the compiler."
echo " Otherwise, the default is \"gnu\"."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
Expand All @@ -309,6 +318,7 @@ print_usage()
echo " "
echo " CC Specifies the C compiler to use."
echo " CXX Specifies the C++ compiler to use (sandbox only)."
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
echo " RANLIB Specifies the ranlib executable to use."
echo " AR Specifies the archiver to use."
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
Expand Down Expand Up @@ -1954,6 +1964,7 @@ main()
enable_sup_handling='yes'
enable_memkind='' # The default memkind value is determined later on.
force_version='no'
complex_return='default'

# The sandbox flag and name.
sandbox_flag=''
Expand Down Expand Up @@ -2142,6 +2153,9 @@ main()
show-config-list)
show_config_list=1
;;
complex-return=*)
complex_return=${OPTARG#*=}
;;
*)
print_usage
;;
Expand Down Expand Up @@ -2996,7 +3010,49 @@ main()

enable_sandbox_01=0
fi

# Check the method used for returning complex numbers
if [ "x${complex_return}" = "xdefault" ]; then
if [ -n "${FC}" ]; then
# Determine the complex return type from the given Fortran compiler

# Query the full vendor version string output. This includes the
# version number along with (potentially) a bunch of other textual
# clutter.
# NOTE: This maybe should use merged stdout/stderr rather than only
# stdout. But it works for now.
vendor_string="$(${FC} --version 2>/dev/null)"

# Query the compiler "vendor" (ie: the compiler's simple name) and
# isolate the version number.
# The last part ({ read first rest ; echo $first ; }) is a workaround
# to OS X's egrep only returning the first match.
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })

if [ "x${fc_vendor}" = "xifort" ]; then
complex_return='intel'
elif [ "x${fc_vendor}" = "xGNU" ]; then
complex_return='gnu'
else
echo "${script_name}: unable to determine Fortran compiler vendor!"
complex_return='gnu'
fi
else
complex_return='gnu'
fi
fi

if [ "x${complex_return}" = "xgnu" ]; then
complex_return_intel01='0'
elif [ "x${complex_return}" = "xintel" ]; then
complex_return_intel01='1'
else
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
exit 1
fi

echo "${script_name}: configuring complex return type as \"${complex_return}\"."

# Variables that may contain forward slashes, such as paths, need extra
# escaping when used in sed commands. We insert those extra escape
Expand Down Expand Up @@ -3165,7 +3221,8 @@ main()
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
> "${bli_config_h_out_path}"


Expand Down
121 changes: 90 additions & 31 deletions frame/compat/bla_dot.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include "blis.h"

#ifdef BLIS_ENABLE_BLAS

//
// Define BLAS-to-BLIS interfaces.
Expand All @@ -48,45 +49,103 @@ ftype PASTEF772(ch,blasname,chc) \
const ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
return rho; \
return rho; \
}

#ifdef BLIS_ENABLE_BLAS
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
INSERT_GENTFUNCDOTR_BLAS( dot, dotv )

#if BLIS_DISABLE_COMPLEX_RETURN_INTEL

INSERT_GENTFUNCDOTC_BLAS( dot, dotv )

#else

// For the "intel" complex return type, use a hidden parameter to return the result
#undef GENTFUNCDOT
#define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \
\
void PASTEF772(ch,blasname,chc) \
( \
ftype* rhop, \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
const ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
ftype rho; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
blis_conjx, \
BLIS_NO_CONJUGATE, \
n0, \
x0, incx0, \
y0, incy0, \
&rho, \
NULL, \
NULL \
); \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
*rhop = rho; \
}

INSERT_GENTFUNCDOTC_BLAS( dot, dotv )

#endif


// -- "Black sheep" dot product function definitions --
Expand Down
22 changes: 19 additions & 3 deletions frame/include/bli_gentfunc_macro_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,19 +74,35 @@ GENTFUNCCO( scomplex, float, c, s, blasname, blisname ) \
GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )


// -- Basic one-operand macro with conjugation (used only for dot, ger) --
// -- Basic one-operand macro with conjugation (real funcs only, used only for dot, ger) --


#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
#define INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
\
GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname )


// -- Basic one-operand macro with conjugation (complex funcs only, used only for dot, ger) --


#define INSERT_GENTFUNCDOTC_BLAS( blasname, blisname ) \
\
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )


// -- Basic one-operand macro with conjugation (used only for dot, ger) --


#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
\
INSERT_GENTFUNCDOTR_BLAS( blasname, blisname ) \
INSERT_GENTFUNCDOTC_BLAS( blasname, blisname )


// -- Basic one-operand macro with real projection --


Expand Down

0 comments on commit aa1dd61

Please sign in to comment.