@@ -72,6 +72,11 @@ BLASLONG shgemm_p = DEFAULT_GEMM_P;
72
72
#else
73
73
BLASLONG shgemm_p = SHGEMM_P ;
74
74
#endif
75
+ #if BGEMM_P == bgemm_p
76
+ BLASLONG bgemm_p = DEFAULT_GEMM_P ;
77
+ #else
78
+ BLASLONG bgemm_p = BGEMM_P ;
79
+ #endif
75
80
#if SGEMM_P == sgemm_p
76
81
BLASLONG sgemm_p = DEFAULT_GEMM_P ;
77
82
#else
@@ -103,6 +108,11 @@ BLASLONG shgemm_q = DEFAULT_GEMM_Q;
103
108
#else
104
109
BLASLONG shgemm_q = SHGEMM_Q ;
105
110
#endif
111
+ #if BGEMM_Q == bgemm_q
112
+ BLASLONG bgemm_q = DEFAULT_GEMM_Q ;
113
+ #else
114
+ BLASLONG bgemm_q = BGEMM_Q ;
115
+ #endif
106
116
#if SGEMM_Q == sgemm_q
107
117
BLASLONG sgemm_q = DEFAULT_GEMM_Q ;
108
118
#else
@@ -134,6 +144,11 @@ BLASLONG shgemm_r = DEFAULT_GEMM_R;
134
144
#else
135
145
BLASLONG shgemm_r = SHGEMM_R ;
136
146
#endif
147
+ #if BGEMM_R == bgemm_r
148
+ BLASLONG bgemm_r = DEFAULT_GEMM_R ;
149
+ #else
150
+ BLASLONG bgemm_r = BGEMM_R ;
151
+ #endif
137
152
#if SGEMM_R == sgemm_r
138
153
BLASLONG sgemm_r = DEFAULT_GEMM_R ;
139
154
#else
@@ -541,6 +556,7 @@ void blas_set_parameter(void){
541
556
542
557
#ifdef BUILD_BFLOAT16
543
558
sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (SBGEMM_Q * 4 )) - 15 ) & ~15 ;
559
+ bgemm_r = (((BUFFER_SIZE - ((BGEMM_P * BGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (BGEMM_Q * 4 )) - 15 ) & ~15 ;
544
560
#endif
545
561
#ifdef BUILD_HFLOAT16
546
562
shgemm_r = (((BUFFER_SIZE - ((SHGEMM_P * SHGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (SHGEMM_Q * 4 )) - 15 ) & ~15 ;
@@ -653,6 +669,7 @@ void blas_set_parameter(void){
653
669
654
670
#ifdef BUILD_BFLOAT16
655
671
sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (SBGEMM_Q * 4 )) - 15 ) & ~15 ;
672
+ bgemm_r = (((BUFFER_SIZE - ((BGEMM_P * BGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (BGEMM_Q * 4 )) - 15 ) & ~15 ;
656
673
#endif
657
674
#ifdef BUILD_HFLOAT16
658
675
shgemm_r = (((BUFFER_SIZE - ((SHGEMM_P * SHGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN ) & ~GEMM_ALIGN )) / (SHGEMM_Q * 4 )) - 15 ) & ~15 ;
0 commit comments