Skip to content

Commit 9ae5298

Browse files
committed
[main] Combine SME slice parameters
Previously the (alpha) SME intrinsics were documented to take two slice parameters: a 32-bit variable index and a 64-bit constant offset. However, it isn't very C-like to split an addition in this way, and as Sander points out, it isn't really consistent with the way that we handle vnum parameters.
1 parent 8f74df9 commit 9ae5298

File tree

1 file changed

+35
-44
lines changed

1 file changed

+35
-44
lines changed

main/acle.md

Lines changed: 35 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9001,9 +9001,13 @@ following it. --><span id="__arm_za_disable"></span>
90019001

90029002
The intrinsics in this section have the following properties in common:
90039003

9004-
* Every argument named `tile`, `slice_offset` or `tile_mask` must
9005-
be an integer constant expression in the range of the underlying
9006-
instruction.
9004+
* Every argument named `tile` or `tile_mask` must be an integer constant
9005+
expression in the range of the underlying instruction.
9006+
9007+
* Some SME instructions index ZA using the sum of a 32-bit general-purpose
9008+
reister (`w12` to `w15`) and a constant offset. Instead of having
9009+
arguments for the two individual fields, the associated intrinsics
9010+
have a single 32-bit index called `slice` that holds the sum.
90079011

90089012
* ZA loads and stores do not use typed pointers, since there is
90099013
no C or C++ type information associated with the contents of ZA.
@@ -9017,74 +9021,71 @@ The intrinsics in this section have the following properties in common:
90179021
``` c
90189022
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90199023
__attribute__((arm_streaming, arm_shared_za))
9020-
void svld1_hor_za8(uint64_t tile, uint32_t slice_base,
9021-
uint64_t slice_offset, svbool_t pg, const void *ptr);
9024+
void svld1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg,
9025+
const void *ptr);
90229026

90239027
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90249028
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90259029
__attribute__((arm_streaming, arm_shared_za))
9026-
void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice_base,
9027-
uint64_t slice_offset, svbool_t pg,
9030+
void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90289031
const void *ptr, int64_t vnum);
90299032

90309033
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90319034
__attribute__((arm_streaming, arm_shared_za))
9032-
void svld1_ver_za8(uint64_t tile, uint32_t slice_base,
9033-
uint64_t slice_offset, svbool_t pg, const void *ptr);
9035+
void svld1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg,
9036+
const void *ptr);
90349037

90359038
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90369039
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90379040
__attribute__((arm_streaming, arm_shared_za))
9038-
void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice_base,
9039-
uint64_t slice_offset, svbool_t pg,
9041+
void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90409042
const void *ptr, int64_t vnum);
90419043
```
90429044

90439045
#### LDR
90449046

90459047
``` c
9046-
// slice_offset fills the role of the usual vnum parameter.
90479048
__attribute__((arm_streaming_compatible, arm_shared_za))
9048-
void svldr_vnum_za(uint32_t slice_base, uint64_t slice_offset,
9049-
const void *ptr);
9049+
void svldr_za(uint32_t slice, const void *ptr);
9050+
9051+
__attribute__((arm_streaming_compatible, arm_shared_za))
9052+
void svldr_vnum_za(uint32_t slice, const void *ptr, int64_t vnum);
90509053
```
90519054

90529055
#### ST1B, ST1H, ST1W, ST1D, ST1Q
90539056

90549057
``` c
90559058
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90569059
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9057-
void svst1_hor_za8(uint64_t tile, uint32_t slice_base,
9058-
uint64_t slice_offset, svbool_t pg,
9060+
void svst1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90599061
void *ptr);
90609062

90619063
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90629064
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90639065
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9064-
void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice_base,
9065-
uint64_t slice_offset, svbool_t pg,
9066+
void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90669067
void *ptr, int64_t vnum);
90679068

90689069
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90699070
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9070-
void svst1_ver_za8(uint64_t tile, uint32_t slice_base,
9071-
uint64_t slice_offset, svbool_t pg,
9071+
void svst1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90729072
void *ptr);
90739073

90749074
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90759075
// Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90769076
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9077-
void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice_base,
9078-
uint64_t slice_offset, svbool_t pg,
9077+
void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90799078
void *ptr, int64_t vnum);
90809079
```
90819080

90829081
#### STR
90839082

90849083
``` c
9085-
// slice_offset fills the role of the usual vnum parameter.
90869084
__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za))
9087-
void svstr_vnum_za(uint32_t slice_base, uint64_t slice_offset, void *ptr);
9085+
void svstr_vnum_za(uint32_t slice, void *ptr);
9086+
9087+
__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za))
9088+
void svstr_vnum_za(uint32_t slice, void *ptr, int64_t vnum);
90889089
```
90899090

90909091
#### MOVA
@@ -9098,32 +9099,27 @@ parameter both have type `svuint8_t`.
90989099
// And similarly for u8.
90999100
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91009101
svint8_t svread_hor_za8[_s8]_m(svint8_t zd, svbool_t pg,
9101-
uint64_t tile, uint32_t slice_base,
9102-
uint64_t slice_offset);
9102+
uint64_t tile, uint32_t slice);
91039103

91049104
// And similarly for u16, bf16 and f16.
91059105
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91069106
svint16_t svread_hor_za16[_s16]_m(svint16_t zd, svbool_t pg,
9107-
uint64_t tile, uint32_t slice_base,
9108-
uint64_t slice_offset);
9107+
uint64_t tile, uint32_t slice);
91099108

91109109
// And similarly for u32 and f32.
91119110
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91129111
svint32_t svread_hor_za32[_s32]_m(svint32_t zd, svbool_t pg,
9113-
uint64_t tile, uint32_t slice_base,
9114-
uint64_t slice_offset);
9112+
uint64_t tile, uint32_t slice);
91159113

91169114
// And similarly for u64 and f64.
91179115
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91189116
svint64_t svread_hor_za64[_s64]_m(svint64_t zd, svbool_t pg,
9119-
uint64_t tile, uint32_t slice_base,
9120-
uint64_t slice_offset);
9117+
uint64_t tile, uint32_t slice);
91219118

91229119
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
91239120
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91249121
svint8_t svread_hor_za128[_s8]_m(svint8_t zd, svbool_t pg,
9125-
uint64_t tile, uint32_t slice_base,
9126-
uint64_t slice_offset);
9122+
uint64_t tile, uint32_t slice);
91279123
```
91289124

91299125
Replacing `_hor` with `_ver` gives the associated vertical forms.
@@ -9135,32 +9131,27 @@ the `zn` parameter to the `_u8` intrinsic has type `svuint8_t`.
91359131
``` c
91369132
// And similarly for u8.
91379133
__attribute__((arm_streaming, arm_shared_za))
9138-
void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice_base,
9139-
uint64_t slice_offset, svbool_t pg,
9134+
void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91409135
svint8_t zn);
91419136

91429137
// And similarly for u16, bf16 and f16.
91439138
__attribute__((arm_streaming, arm_shared_za))
9144-
void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice_base,
9145-
uint64_t slice_offset, svbool_t pg,
9139+
void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91469140
svint16_t zn);
91479141

91489142
// And similarly for u32 and f32.
91499143
__attribute__((arm_streaming, arm_shared_za))
9150-
void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice_base,
9151-
uint64_t slice_offset, svbool_t pg,
9144+
void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91529145
svint32_t zn);
91539146

91549147
// And similarly for u64 and f64.
91559148
__attribute__((arm_streaming, arm_shared_za))
9156-
void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice_base,
9157-
uint64_t slice_offset, svbool_t pg,
9149+
void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91589150
svint64_t zn);
91599151

91609152
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
91619153
__attribute__((arm_streaming, arm_shared_za))
9162-
void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice_base,
9163-
uint64_t slice_offset, svbool_t pg,
9154+
void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91649155
svint8_t zn);
91659156
```
91669157

0 commit comments

Comments
 (0)