diff --git a/bin/hipify-perl b/bin/hipify-perl index 0075ea16..d1c8f4d5 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1595,7 +1595,9 @@ sub rocSubstitutions { subst("cublasCgemm", "rocblas_cgemm", "library"); subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); subst("cublasCgemmStridedBatched", "rocblas_cgemm_strided_batched", "library"); + subst("cublasCgemm_64", "rocblas_cgemm_64", "library"); subst("cublasCgemm_v2", "rocblas_cgemm", "library"); + subst("cublasCgemm_v2_64", "rocblas_cgemm_64", "library"); subst("cublasCgemv", "rocblas_cgemv", "library"); subst("cublasCgemvBatched", "rocblas_cgemv_batched", "library"); subst("cublasCgemvBatched_64", "rocblas_cgemv_batched_64", "library"); @@ -1749,7 +1751,9 @@ sub rocSubstitutions { subst("cublasDgemm", "rocblas_dgemm", "library"); subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); subst("cublasDgemmStridedBatched", "rocblas_dgemm_strided_batched", "library"); + subst("cublasDgemm_64", "rocblas_dgemm_64", "library"); subst("cublasDgemm_v2", "rocblas_dgemm", "library"); + subst("cublasDgemm_v2_64", "rocblas_dgemm_64", "library"); subst("cublasDgemv", "rocblas_dgemv", "library"); subst("cublasDgemvBatched", "rocblas_dgemv_batched", "library"); subst("cublasDgemvBatched_64", "rocblas_dgemv_batched_64", "library"); @@ -1890,6 +1894,7 @@ sub rocSubstitutions { subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); + subst("cublasHgemm_64", "rocblas_hgemm_64", "library"); subst("cublasIcamax", "rocblas_icamax", "library"); subst("cublasIcamax_64", "rocblas_icamax_64", "library"); subst("cublasIcamax_v2", "rocblas_icamax", "library"); @@ -1972,7 +1977,9 @@ sub rocSubstitutions { subst("cublasSgemm", "rocblas_sgemm", "library"); subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); subst("cublasSgemmStridedBatched", "rocblas_sgemm_strided_batched", "library"); + subst("cublasSgemm_64", "rocblas_sgemm_64", "library"); subst("cublasSgemm_v2", "rocblas_sgemm", "library"); + subst("cublasSgemm_v2_64", "rocblas_sgemm_64", "library"); subst("cublasSgemv", "rocblas_sgemv", "library"); subst("cublasSgemvBatched", "rocblas_sgemv_batched", "library"); subst("cublasSgemvBatched_64", "rocblas_sgemv_batched_64", "library"); @@ -2117,7 +2124,9 @@ sub rocSubstitutions { subst("cublasZgemm", "rocblas_zgemm", "library"); subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); subst("cublasZgemmStridedBatched", "rocblas_zgemm_strided_batched", "library"); + subst("cublasZgemm_64", "rocblas_zgemm_64", "library"); subst("cublasZgemm_v2", "rocblas_zgemm", "library"); + subst("cublasZgemm_v2_64", "rocblas_zgemm_64", "library"); subst("cublasZgemv", "rocblas_zgemv", "library"); subst("cublasZgemvBatched", "rocblas_zgemv_batched", "library"); subst("cublasZgemvBatched_64", "rocblas_zgemv_batched_64", "library"); @@ -4317,7 +4326,9 @@ sub simpleSubstitutions { subst("cublasCgemm", "hipblasCgemm_v2", "library"); subst("cublasCgemmBatched", "hipblasCgemmBatched_v2", "library"); subst("cublasCgemmStridedBatched", "hipblasCgemmStridedBatched_v2", "library"); + subst("cublasCgemm_64", "hipblasCgemm_v2_64", "library"); subst("cublasCgemm_v2", "hipblasCgemm_v2", "library"); + subst("cublasCgemm_v2_64", "hipblasCgemm_v2_64", "library"); subst("cublasCgemv", "hipblasCgemv_v2", "library"); subst("cublasCgemvBatched", "hipblasCgemvBatched_v2", "library"); subst("cublasCgemvBatched_64", "hipblasCgemvBatched_v2_64", "library"); @@ -4473,7 +4484,9 @@ sub simpleSubstitutions { subst("cublasDgemm", "hipblasDgemm", "library"); subst("cublasDgemmBatched", "hipblasDgemmBatched", "library"); subst("cublasDgemmStridedBatched", "hipblasDgemmStridedBatched", "library"); + subst("cublasDgemm_64", "hipblasDgemm_64", "library"); subst("cublasDgemm_v2", "hipblasDgemm", "library"); + subst("cublasDgemm_v2_64", "hipblasDgemm_64", "library"); subst("cublasDgemv", "hipblasDgemv", "library"); subst("cublasDgemvBatched", "hipblasDgemvBatched", "library"); subst("cublasDgemvBatched_64", "hipblasDgemvBatched_64", "library"); @@ -4606,6 +4619,7 @@ sub simpleSubstitutions { subst("cublasHgemm", "hipblasHgemm", "library"); subst("cublasHgemmBatched", "hipblasHgemmBatched", "library"); subst("cublasHgemmStridedBatched", "hipblasHgemmStridedBatched", "library"); + subst("cublasHgemm_64", "hipblasHgemm_64", "library"); subst("cublasIcamax", "hipblasIcamax_v2", "library"); subst("cublasIcamax_64", "hipblasIcamax_v2_64", "library"); subst("cublasIcamax_v2", "hipblasIcamax_v2", "library"); @@ -4709,7 +4723,9 @@ sub simpleSubstitutions { subst("cublasSgemm", "hipblasSgemm", "library"); subst("cublasSgemmBatched", "hipblasSgemmBatched", "library"); subst("cublasSgemmStridedBatched", "hipblasSgemmStridedBatched", "library"); + subst("cublasSgemm_64", "hipblasSgemm_64", "library"); subst("cublasSgemm_v2", "hipblasSgemm", "library"); + subst("cublasSgemm_v2_64", "hipblasSgemm_64", "library"); subst("cublasSgemv", "hipblasSgemv", "library"); subst("cublasSgemvBatched", "hipblasSgemvBatched", "library"); subst("cublasSgemvBatched_64", "hipblasSgemvBatched_64", "library"); @@ -4848,7 +4864,9 @@ sub simpleSubstitutions { subst("cublasZgemm", "hipblasZgemm_v2", "library"); subst("cublasZgemmBatched", "hipblasZgemmBatched_v2", "library"); subst("cublasZgemmStridedBatched", "hipblasZgemmStridedBatched_v2", "library"); + subst("cublasZgemm_64", "hipblasZgemm_v2_64", "library"); subst("cublasZgemm_v2", "hipblasZgemm_v2", "library"); + subst("cublasZgemm_v2_64", "hipblasZgemm_v2_64", "library"); subst("cublasZgemv", "hipblasZgemv_v2", "library"); subst("cublasZgemvBatched", "hipblasZgemvBatched_v2", "library"); subst("cublasZgemvBatched_64", "hipblasZgemvBatched_v2_64", "library"); @@ -12181,8 +12199,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZher2k_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZgemm_v2_64", - "cublasZgemm_64", "cublasZgemmStridedBatched_64", "cublasZgemmBatched_64", "cublasZgemm3m_64", @@ -12217,8 +12233,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSsymm_64", "cublasSmatinvBatched", "cublasShutdown", - "cublasSgemm_v2_64", - "cublasSgemm_64", "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", @@ -12282,7 +12296,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemm_64", "cublasHgemmStridedBatched_64", "cublasHgemmBatched_64", "cublasHSSgemvStridedBatched_64", @@ -12327,8 +12340,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDsymm_v2_64", "cublasDsymm_64", "cublasDmatinvBatched", - "cublasDgemm_v2_64", - "cublasDgemm_64", "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -12368,8 +12379,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCher2k_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasCgemm_v2_64", - "cublasCgemm_64", "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", @@ -12709,8 +12718,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgetriBatched", "cublasZgetrfBatched", "cublasZgeqrfBatched", - "cublasZgemm_v2_64", - "cublasZgemm_64", "cublasZgemmStridedBatched_64", "cublasZgemmBatched_64", "cublasZgemm3m_64", @@ -12739,8 +12746,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgetriBatched", "cublasSgetrfBatched", "cublasSgeqrfBatched", - "cublasSgemm_v2_64", - "cublasSgemm_64", "cublasSgemmStridedBatched_64", "cublasSgemmGroupedBatched_64", "cublasSgemmGroupedBatched", @@ -12831,7 +12836,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasIaminEx", "cublasIamaxEx_64", "cublasIamaxEx", - "cublasHgemm_64", "cublasHgemmStridedBatched_64", "cublasHgemmBatched_64", "cublasGetVersion_v2", @@ -12868,8 +12872,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgetriBatched", "cublasDgetrfBatched", "cublasDgeqrfBatched", - "cublasDgemm_v2_64", - "cublasDgemm_64", "cublasDgemmStridedBatched_64", "cublasDgemmGroupedBatched_64", "cublasDgemmGroupedBatched", @@ -12910,8 +12912,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgetriBatched", "cublasCgetrfBatched", "cublasCgeqrfBatched", - "cublasCgemm_v2_64", - "cublasCgemm_64", "cublasCgemmStridedBatched_64", "cublasCgemmEx_64", "cublasCgemmEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2363b7e1..9ef081e8 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 5fdf8baf..067167e6 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`hipblasCgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | |`rocblas_cgemm`|1.5.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`hipblasCgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`hipblasDgemmStridedBatched`|1.8.2| | | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`hipblasDgemm_64`|6.3.0| | | |6.3.0|`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`hipblasHgemmStridedBatched`|3.0.0| | | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`hipblasHgemm_64`|6.3.0| | | |6.3.0|`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`hipblasSgemmBatched`|1.8.2| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`hipblasSgemmStridedBatched`|1.8.2| | | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`hipblasSgemm_64`|6.3.0| | | |6.3.0|`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`hipblasZgemmStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`hipblasZgemm_v2_64`|6.3.0| | | |6.3.0|`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 3b98b6bc..1db1997e 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1028,9 +1028,9 @@ |`cublasCgemmBatched_64`|12.0| | | | | | | | | | |`cublasCgemmStridedBatched`|8.0| | | |`rocblas_cgemm_strided_batched`|1.5.0| | | | | |`cublasCgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasCgemm_64`|12.0| | | | | | | | | | +|`cublasCgemm_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemm_v2`| | | | |`rocblas_cgemm`|1.5.0| | | | | -|`cublasCgemm_v2_64`|12.0| | | | | | | | | | +|`cublasCgemm_v2_64`|12.0| | | |`rocblas_cgemm_64`|6.3.0| | | |6.3.0| |`cublasCgemvBatched`|11.6| | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | @@ -1078,9 +1078,9 @@ |`cublasDgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasDgemmStridedBatched`|8.0| | | |`rocblas_dgemm_strided_batched`|1.5.0| | | | | |`cublasDgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasDgemm_64`|12.0| | | | | | | | | | +|`cublasDgemm_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemm_v2`| | | | |`rocblas_dgemm`|1.5.0| | | | | -|`cublasDgemm_v2_64`|12.0| | | | | | | | | | +|`cublasDgemm_v2_64`|12.0| | | |`rocblas_dgemm_64`|6.3.0| | | |6.3.0| |`cublasDgemvBatched`|11.6| | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | |`cublasDgemvStridedBatched`|11.6| | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | @@ -1122,7 +1122,7 @@ |`cublasHgemmBatched_64`|12.0| | | | | | | | | | |`cublasHgemmStridedBatched`|8.0| | | |`rocblas_hgemm_strided_batched`|1.5.0| | | | | |`cublasHgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasHgemm_64`|12.0| | | | | | | | | | +|`cublasHgemm_64`|12.0| | | |`rocblas_hgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm`| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemmBatched`| | | | |`rocblas_sgemm_batched`|3.5.0| | | | | |`cublasSgemmBatched_64`|12.0| | | | | | | | | | @@ -1130,9 +1130,9 @@ |`cublasSgemmGroupedBatched_64`|12.4| | | | | | | | | | |`cublasSgemmStridedBatched`|8.0| | | |`rocblas_sgemm_strided_batched`|1.5.0| | | | | |`cublasSgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasSgemm_64`|12.0| | | | | | | | | | +|`cublasSgemm_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemm_v2`| | | | |`rocblas_sgemm`|1.5.0| | | | | -|`cublasSgemm_v2_64`|12.0| | | | | | | | | | +|`cublasSgemm_v2_64`|12.0| | | |`rocblas_sgemm_64`|6.3.0| | | |6.3.0| |`cublasSgemvBatched`|11.6| | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | |`cublasSgemvStridedBatched`|11.6| | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | @@ -1174,9 +1174,9 @@ |`cublasZgemmBatched_64`|12.0| | | | | | | | | | |`cublasZgemmStridedBatched`|8.0| | | |`rocblas_zgemm_strided_batched`|1.5.0| | | | | |`cublasZgemmStridedBatched_64`|12.0| | | | | | | | | | -|`cublasZgemm_64`|12.0| | | | | | | | | | +|`cublasZgemm_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemm_v2`| | | | |`rocblas_zgemm`|1.5.0| | | | | -|`cublasZgemm_v2_64`|12.0| | | | | | | | | | +|`cublasZgemm_v2_64`|12.0| | | |`rocblas_zgemm_64`|6.3.0| | | |6.3.0| |`cublasZgemvBatched`|11.6| | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index f1a096ba..5dfc5338 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -399,15 +399,15 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // Blas3 (v1) Routines // GEMM {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgemm_64", {"hipblasSgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemm_64", {"hipblasSgemm_64", "rocblas_sgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgemm_64", {"hipblasDgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemm_64", {"hipblasDgemm_64", "rocblas_dgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm", {"hipblasCgemm_v2", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgemm_64", {"hipblasCgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemm_64", {"hipblasCgemm_v2_64", "rocblas_cgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemm", {"hipblasZgemm_v2", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgemm_64", {"hipblasZgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemm_64", {"hipblasZgemm_v2_64", "rocblas_zgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHgemm", {"hipblasHgemm", "rocblas_hgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasHgemm_64", {"hipblasHgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHgemm_64", {"hipblasHgemm_64", "rocblas_hgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, // BATCH GEMM {"cublasSgemmBatched", {"hipblasSgemmBatched", "rocblas_sgemm_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, @@ -817,17 +817,17 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // Blas3 (v2) Routines // GEMM {"cublasSgemm_v2", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasSgemm_v2_64", {"hipblasSgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemm_v2_64", {"hipblasSgemm_64", "rocblas_sgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasDgemm_v2", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasDgemm_v2_64", {"hipblasDgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasDgemm_v2_64", {"hipblasDgemm_64", "rocblas_dgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm_v2", {"hipblasCgemm_v2", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemm_v2_64", {"hipblasCgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemm_v2_64", {"hipblasCgemm_v2_64", "rocblas_cgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemm3m", {"hipblasCgemm3m", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3m_64", {"hipblasCgemm3m_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mEx", {"hipblasCgemm3mEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasCgemm3mEx_64", {"hipblasCgemm3mEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemm_v2", {"hipblasZgemm_v2", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemm_v2_64", {"hipblasZgemm_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemm_v2_64", {"hipblasZgemm_v2_64", "rocblas_zgemm_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemm3m", {"hipblasZgemm3m", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasZgemm3m_64", {"hipblasZgemm3m_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, @@ -2023,6 +2023,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasNrm2Ex_v2_64", {HIP_6020, HIP_0, HIP_0, }}, {"hipblasRotEx_v2_64", {HIP_6020, HIP_0, HIP_0, }}, {"hipblasScalEx_v2_64", {HIP_6020, HIP_0, HIP_0, }}, + {"hipblasHgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemm_v2_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2411,6 +2416,11 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_dtrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ctrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_ztrsm_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_hgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_sgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_dgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_cgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"rocblas_zgemm_64", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index ff3cdbff..d3fc1ad2 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2822,6 +2822,39 @@ int main() { // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx_v2_64(hipblasHandle_t handle, int64_t n, const void* alpha, hipDataType alphaType, void* x, hipDataType xType, int64_t incx, hipDataType executionType); // CHECK: blasStatus = hipblasScalEx_v2_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); blasStatus = cublasScalEx_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* BP, int64_t ldb, const float* beta, float* CP, int64_t ldc); + // CHECK: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = hipblasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* BP, int64_t ldb, const double* beta, double* CP, int64_t ldc); + // CHECK: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = hipblasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* BP, int64_t ldb, const hipComplex* beta, hipComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = hipblasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* BP, int64_t ldb, const hipDoubleComplex* beta, hipDoubleComplex* CP, int64_t ldc); + // CHECK: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = hipblasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasHalf* alpha, const hipblasHalf* AP, int64_t lda, const hipblasHalf* BP, int64_t ldb, const hipblasHalf* beta, hipblasHalf* CP, int64_t ldc); + // CHECK: blasStatus = hipblasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 0b975ea5..1bfd4810 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -3028,6 +3028,39 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsm_batched_64(rocblas_handle handle, rocblas_side side, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], int64_t lda, rocblas_double_complex* const B[], int64_t ldb, int64_t batch_count); // CHECK: blasStatus = rocblas_ztrsm_batched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); blasStatus = cublasZtrsmBatched_64(blasHandle, blasSideMode, blasFillMode, blasOperation, blasDiagType, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexBarray, ldb_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* B, int64_t ldb, const float* beta, float* C, int64_t ldc); + // CHECK: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + // CHECK-NEXT: blasStatus = rocblas_sgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + blasStatus = cublasSgemm_v2_64(blasHandle, transa, transb, m, n, k, &fa, &fA, lda, &fB, ldb, &fb, &fC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* B, int64_t ldb, const double* beta, double* C, int64_t ldc); + // CHECK: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + // CHECK-NEXT: blasStatus = rocblas_dgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + blasStatus = cublasDgemm_v2_64(blasHandle, transa, transb, m, n, k, &da, &dA, lda, &dB, ldb, &db, &dC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* B, int64_t ldb, const cuComplex* beta, cuComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* B, int64_t ldb, const rocblas_float_complex* beta, rocblas_float_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_cgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + blasStatus = cublasCgemm_v2_64(blasHandle, transa, transb, m, n, k, &complexa, &complexA, lda, &complexB, ldb, &complexb, &complexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* B, int64_t ldb, const cuDoubleComplex* beta, cuDoubleComplex* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* B, int64_t ldb, const rocblas_double_complex* beta, rocblas_double_complex* C, int64_t ldc); + // CHECK: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + // CHECK-NEXT: blasStatus = rocblas_zgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + blasStatus = cublasZgemm_v2_64(blasHandle, transa, transb, m, n, k, &dcomplexa, &dcomplexA, lda, &dcomplexB, ldb, &dcomplexb, &dcomplexC, ldc); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int64_t m, int64_t n, int64_t k, const __half* alpha, const __half* A, int64_t lda, const __half* B, int64_t ldb, const __half* beta, __half* C, int64_t ldc); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hgemm_64(rocblas_handle handle, rocblas_operation transA, rocblas_operation transB, int64_t m, int64_t n, int64_t k, const rocblas_half* alpha, const rocblas_half* A, int64_t lda, const rocblas_half* B, int64_t ldb, const rocblas_half* beta, rocblas_half* C, int64_t ldc); + // CHECK: blasStatus = rocblas_hgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); + blasStatus = cublasHgemm_64(blasHandle, transa, transb, m, n, k, ha, hA, lda, hB, ldb, hb, hC, ldc); #endif return 0;