|
25 | 25 | * -EINVAL if input dimensions do not allow for multiplication. |
26 | 26 | * -ERANGE if the shift operation might cause integer overflow. |
27 | 27 | */ |
28 | | -int mat_multiply(struct mat_matrix_16b *a, struct mat_matrix_16b *b, struct mat_matrix_16b *c) |
| 28 | +int mat_multiply(struct mat_matrix_16b *a, struct mat_matrix_16b *b, |
| 29 | + struct mat_matrix_16b *c) |
29 | 30 | { |
30 | 31 | /* Validate matrix dimensions are compatible for multiplication */ |
31 | 32 | if (a->columns != b->rows || a->rows != c->rows || b->columns != c->columns) |
32 | 33 | return -EINVAL; |
33 | 34 |
|
34 | | - int64_t s; |
35 | | - int16_t *x; |
36 | | - int16_t *y; |
37 | | - int16_t *z = c->data; |
38 | | - int i, j, k; |
39 | | - int y_inc = b->columns; |
40 | | - const int shift_minus_one = a->fractions + b->fractions - c->fractions - 1; |
| 35 | + int32_t acc; /* Accumulator for dot product calculation */ |
| 36 | + int16_t *x, *y, *z = c->data; /* Pointers for matrices a, b, and c */ |
| 37 | + int i, j, k; /* Loop counters */ |
| 38 | + int y_inc = b->columns; /* Column increment for matrix b elements */ |
| 39 | + /* Calculate shift amount for adjusting fractional bits in the result */ |
| 40 | + const int shift = a->fractions + b->fractions - c->fractions - 1; |
41 | 41 |
|
42 | 42 | /* Check shift to ensure no integer overflow occurs during shifting */ |
43 | | - if (shift_minus_one < -1 || shift_minus_one > 31) |
| 43 | + if (shift < -1 || shift > 31) |
44 | 44 | return -ERANGE; |
45 | 45 |
|
46 | | - /* If all data is Q0 */ |
47 | | - if (shift_minus_one == -1) { |
| 46 | + /* Matrix multiplication loop */ |
| 47 | + if (shift == -1) { |
| 48 | + /* Special case when shift is -1 (Q0 data) */ |
48 | 49 | for (i = 0; i < a->rows; i++) { |
49 | 50 | for (j = 0; j < b->columns; j++) { |
50 | | - s = 0; |
| 51 | + /* Initialize accumulator for each element */ |
| 52 | + acc = 0; |
| 53 | + /* Set x at the start of ith row of a */ |
51 | 54 | x = a->data + a->columns * i; |
| 55 | + /* Set y at the top of jth column of b */ |
52 | 56 | y = b->data + j; |
| 57 | + /* Dot product loop */ |
53 | 58 | for (k = 0; k < b->rows; k++) { |
54 | | - s += (int32_t)(*x) * (*y); |
55 | | - x++; |
| 59 | + /* Multiply & accumulate */ |
| 60 | + acc += (int32_t)(*x++) * (*y); |
| 61 | + /* Move to next row in the current column of b */ |
56 | 62 | y += y_inc; |
57 | 63 | } |
58 | | - *z = (int16_t)s; /* For Q16.0 */ |
59 | | - z++; |
| 64 | + *z = (int16_t)acc; |
| 65 | + z++; /* Move to the next element in the output matrix */ |
60 | 66 | } |
61 | 67 | } |
62 | | - |
63 | | - return 0; |
64 | | - } |
65 | | - |
66 | | - for (i = 0; i < a->rows; i++) { |
67 | | - for (j = 0; j < b->columns; j++) { |
68 | | - s = 0; |
69 | | - x = a->data + a->columns * i; |
70 | | - y = b->data + j; |
71 | | - for (k = 0; k < b->rows; k++) { |
72 | | - s += (int32_t)(*x) * (*y); |
73 | | - x++; |
74 | | - y += y_inc; |
| 68 | + } else { |
| 69 | + /* General case for other shift values */ |
| 70 | + for (i = 0; i < a->rows; i++) { |
| 71 | + for (j = 0; j < b->columns; j++) { |
| 72 | + /* Initialize accumulator for each element */ |
| 73 | + acc = 0; |
| 74 | + /* Set x at the start of ith row of a */ |
| 75 | + x = a->data + a->columns * i; |
| 76 | + /* Set y at the top of jth column of b */ |
| 77 | + y = b->data + j; |
| 78 | + /* Dot product loop */ |
| 79 | + for (k = 0; k < b->rows; k++) { |
| 80 | + /* Multiply & accumulate */ |
| 81 | + acc += (int32_t)(*x++) * (*y); |
| 82 | + /* Move to next row in the current column of b */ |
| 83 | + y += y_inc; |
| 84 | + } |
| 85 | + *z = (int16_t)(((acc >> shift) + 1) >> 1); |
| 86 | + z++; /* Move to the next element in the output matrix */ |
75 | 87 | } |
76 | | - *z = (int16_t)(((s >> shift_minus_one) + 1) >> 1); /*Shift to Qx.y */ |
77 | | - z++; |
78 | 88 | } |
79 | 89 | } |
80 | 90 | return 0; |
|
0 commit comments