diff --git a/src/cpu/cpu_reducer.hpp b/src/cpu/cpu_reducer.hpp index 6c364193e08..d370775b3e5 100644 --- a/src/cpu/cpu_reducer.hpp +++ b/src/cpu/cpu_reducer.hpp @@ -198,6 +198,10 @@ struct cpu_reducer_t { void reduce_nolock(int ithr, data_t *dst); }; +// Explicit instantiations in cpu_reducer.cpp. +extern template struct cpu_reducer_t; +extern template struct cpu_reducer_t; + template struct cpu_reducer_2d_t { typedef typename prec_traits::type data_t; @@ -254,6 +258,10 @@ struct cpu_reducer_2d_t { void reduce_nolock(int ithr, data_t *dst); }; +// Explicit instantiations in cpu_reducer.cpp. +extern template struct cpu_reducer_2d_t; +extern template struct cpu_reducer_2d_t; + /** simple 1d accumulator: y[:] += x[:] */ template struct cpu_accumulator_1d_t { @@ -266,6 +274,10 @@ struct cpu_accumulator_1d_t { reducer_2d_driver_t *drv_; }; +// Explicit instantiations in cpu_reducer.cpp. +extern template struct cpu_accumulator_1d_t; +extern template struct cpu_accumulator_1d_t; + } } } diff --git a/src/cpu/gemm/gemm.hpp b/src/cpu/gemm/gemm.hpp index 3f33a37132c..5eb2f0df3b6 100644 --- a/src/cpu/gemm/gemm.hpp +++ b/src/cpu/gemm/gemm.hpp @@ -36,6 +36,18 @@ void ref_gemm(const char *transa, const char *transb, const int *M, const int *N, const int *K, const data_t *alpha, const data_t *A, const int *lda, const data_t *B, const int *ldb, const data_t *beta, data_t *C, const int *ldc, const data_t *bias); + +// Explicit instantiations in ref_gemm.cpp. +extern template +void ref_gemm(const char *transa_, const char *transb_, + const int *M_, const int *N_, const int *K_, const float *alpha_, + const float *A, const int *lda_, const float *B, const int *ldb_, + const float *beta_, float *C, const int *ldc_, const float *bias); +extern template +void ref_gemm(const char *transa_, const char *transb_, + const int *M_, const int *N_, const int *K_, const double *alpha_, + const double *A, const int *lda_, const double *B, const int *ldb_, + const double *beta_, double *C, const int *ldc_, const double *bias); #ifdef USE_CBLAS #define GEMM_IMPL_STR "gemm:blas" #else diff --git a/src/cpu/gemm/gemm_utils.hpp b/src/cpu/gemm/gemm_utils.hpp index 0888787b9ce..ca6e59993b9 100644 --- a/src/cpu/gemm/gemm_utils.hpp +++ b/src/cpu/gemm/gemm_utils.hpp @@ -51,6 +51,14 @@ template void sum_two_matrices( int m, int n, data_type *p_src, int ld_src, data_type *p_dst, int ld_dst); +// Explicit instantiations are provided in gemm_utils.cpp. +extern template +void sum_two_matrices( + int m, int n, float *p_src, int ld_src, float *p_dst, int ld_dst); +extern template +void sum_two_matrices( + int m, int n, double *p_src, int ld_src, double *p_dst, int ld_dst); + void calc_nthr_nocopy_avx512_common(int m, int n, int k, int nthrs, int *nthrs_m, int *nthrs_n, int *nthrs_k, int *BM, int *BN, int *BK); diff --git a/src/cpu/jit_avx512_common_convolution_winograd.hpp b/src/cpu/jit_avx512_common_convolution_winograd.hpp index bba30ea8944..1e98a88af10 100644 --- a/src/cpu/jit_avx512_common_convolution_winograd.hpp +++ b/src/cpu/jit_avx512_common_convolution_winograd.hpp @@ -203,6 +203,20 @@ struct _jit_avx512_common_convolution_winograd_t { const primitive_attr_t *attr_; }; +// Explicit instantiations in jit_avx512_common_convolution_winograd.cpp. +extern template void +_jit_avx512_common_convolution_winograd_t::_execute_data_W_S_G_D( + float *, float *, float *, float *); +extern template void +_jit_avx512_common_convolution_winograd_t::_execute_data_W_S_G_D( + float *, float *, float *, float *); +extern template void +_jit_avx512_common_convolution_winograd_t::_execute_data_W_SGD( + float *, float *, float *, float *); +extern template void +_jit_avx512_common_convolution_winograd_t::_execute_data_W_SGD( + float *, float *, float *, float *); + template struct _jit_avx512_common_convolution_winograd_fwd_t : _jit_avx512_common_convolution_winograd_t diff --git a/src/cpu/jit_uni_dw_conv_kernel_f32.hpp b/src/cpu/jit_uni_dw_conv_kernel_f32.hpp index 555bf025910..fd6a7369362 100644 --- a/src/cpu/jit_uni_dw_conv_kernel_f32.hpp +++ b/src/cpu/jit_uni_dw_conv_kernel_f32.hpp @@ -300,6 +300,12 @@ struct jit_uni_dw_conv_bwd_weights_kernel_f32 : public jit_generator { void generate(); }; + +// Explicit instantiations in jit_uni_dw_conv_kernel_f32.cpp. +extern template struct jit_uni_dw_conv_bwd_weights_kernel_f32; +extern template struct jit_uni_dw_conv_bwd_weights_kernel_f32; +extern template struct jit_uni_dw_conv_bwd_weights_kernel_f32; + } } } diff --git a/src/cpu/jit_uni_eltwise.hpp b/src/cpu/jit_uni_eltwise.hpp index 3f8906b25f4..32493e5bb8f 100644 --- a/src/cpu/jit_uni_eltwise.hpp +++ b/src/cpu/jit_uni_eltwise.hpp @@ -29,6 +29,11 @@ namespace mkldnn { namespace impl { namespace cpu { +// Explicit instantiations are in jit_uni_eltwise.cpp. +extern template struct jit_uni_eltwise_injector_f32; +extern template struct jit_uni_eltwise_injector_f32; +extern template struct jit_uni_eltwise_injector_f32; + struct jit_uni_eltwise_kernel_f32; template diff --git a/src/cpu/jit_uni_lrn_kernel_f32.hpp b/src/cpu/jit_uni_lrn_kernel_f32.hpp index 68270197d66..611c939beb9 100644 --- a/src/cpu/jit_uni_lrn_kernel_f32.hpp +++ b/src/cpu/jit_uni_lrn_kernel_f32.hpp @@ -173,6 +173,60 @@ struct jit_uni_lrn_bwd_kernel_f32 : public jit_generator { void(*ker)(jit_args_bwd_t *); }; +// Explicit specializations and instantiations in jit_uni_lrn_kernel_f32.cpp. +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + const struct nchw8c_across &J, float A, float K, prop_kind_t pk, + void *code_ptr, size_t code_size); +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + const struct nchw8c_across &J, float A, float K, prop_kind_t pk, + void *code_ptr, size_t code_size); +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + const struct nhwc_across &J, float A, float K, prop_kind_t pk, + void *code_ptr, size_t code_size); +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + const struct nhwc_across &J, float A, float K, prop_kind_t pk, + void *code_ptr, size_t code_size); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_body( + int tail, int HW, prop_kind_t pk, Xbyak::Ymm ymask, Xbyak::Ymm ya, + Xbyak::Ymm yb, Xbyak::Ymm yc, Xbyak::Ymm yd, Xbyak::Ymm ye, + Xbyak::Ymm ysum); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_body( + int tail, int HW, prop_kind_t pk, Xbyak::Ymm ymask, Xbyak::Ymm ya, + Xbyak::Ymm yb, Xbyak::Ymm yc, Xbyak::Ymm yd, Xbyak::Ymm ye, + Xbyak::Ymm ysum); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_tail_sse42( + int tail, Xbyak::Reg64 reg_dst, Xbyak::Xmm xtail_lo, Xbyak::Xmm xtail_hi); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_tail_sse42( + int tail, Xbyak::Reg64 reg_dst, Xbyak::Xmm xtail_lo, Xbyak::Xmm xtail_hi); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_body_sse42( + int tail, int HW, prop_kind_t pk, Xbyak::Xmm xmask_lo, Xbyak::Xmm xmask_hi, + Xbyak::Xmm xe_lo, Xbyak::Xmm xe_hi, Xbyak::Xmm xsum_lo, Xbyak::Xmm xsum_hi); +template <> +void jit_uni_lrn_fwd_kernel_f32::nchw_body_sse42( + int tail, int HW, prop_kind_t pk, Xbyak::Xmm xmask_lo, Xbyak::Xmm xmask_hi, + Xbyak::Xmm xe_lo, Xbyak::Xmm xe_hi, Xbyak::Xmm xsum_lo, Xbyak::Xmm xsum_hi); +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + struct nchw_across J, float A, float K, prop_kind_t pk, void *code_ptr, + size_t code_size); +template <> +jit_uni_lrn_fwd_kernel_f32::jit_uni_lrn_fwd_kernel_f32( + struct nchw_across J, float A, float K, prop_kind_t pk, void *code_ptr, + size_t code_size); + +extern template struct jit_uni_lrn_fwd_kernel_f32; +extern template struct jit_uni_lrn_fwd_kernel_f32; +extern template struct jit_uni_lrn_bwd_kernel_f32; + } } } diff --git a/src/cpu/jit_uni_pool_kernel_f32.hpp b/src/cpu/jit_uni_pool_kernel_f32.hpp index 79327a85214..60df28e38c5 100644 --- a/src/cpu/jit_uni_pool_kernel_f32.hpp +++ b/src/cpu/jit_uni_pool_kernel_f32.hpp @@ -184,6 +184,11 @@ struct jit_uni_pool_kernel_f32: public jit_generator { } }; +// Explicit instantiations in jit_uni_pool_kernel_f32.cpp. +extern template struct jit_uni_pool_kernel_f32; +extern template struct jit_uni_pool_kernel_f32; +extern template struct jit_uni_pool_kernel_f32; + } } }