@@ -9001,9 +9001,13 @@ following it. --><span id="__arm_za_disable"></span>
90019001
90029002The intrinsics in this section have the following properties in common:
90039003
9004- * Every argument named `tile`, `slice_offset` or `tile_mask` must
9005- be an integer constant expression in the range of the underlying
9006- instruction.
9004+ * Every argument named `tile` or `tile_mask` must be an integer constant
9005+ expression in the range of the underlying instruction.
9006+
9007+ * Some SME instructions index ZA using the sum of a 32-bit general-purpose
9008+ reister (`w12` to `w15`) and a constant offset. Instead of having
9009+ arguments for the two individual fields, the associated intrinsics
9010+ have a single 32-bit index called `slice` that holds the sum.
90079011
90089012* ZA loads and stores do not use typed pointers, since there is
90099013 no C or C++ type information associated with the contents of ZA.
@@ -9017,74 +9021,71 @@ The intrinsics in this section have the following properties in common:
90179021``` c
90189022 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90199023 __attribute__((arm_streaming, arm_shared_za))
9020- void svld1_hor_za8(uint64_t tile, uint32_t slice_base ,
9021- uint64_t slice_offset, svbool_t pg, const void *ptr);
9024+ void svld1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg ,
9025+ const void *ptr);
90229026
90239027 // Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90249028 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90259029 __attribute__((arm_streaming, arm_shared_za))
9026- void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice_base,
9027- uint64_t slice_offset, svbool_t pg,
9030+ void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90289031 const void *ptr, int64_t vnum);
90299032
90309033 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90319034 __attribute__((arm_streaming, arm_shared_za))
9032- void svld1_ver_za8(uint64_t tile, uint32_t slice_base ,
9033- uint64_t slice_offset, svbool_t pg, const void *ptr);
9035+ void svld1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg ,
9036+ const void *ptr);
90349037
90359038 // Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90369039 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90379040 __attribute__((arm_streaming, arm_shared_za))
9038- void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice_base,
9039- uint64_t slice_offset, svbool_t pg,
9041+ void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90409042 const void *ptr, int64_t vnum);
90419043```
90429044
90439045#### LDR
90449046
90459047``` c
9046- // slice_offset fills the role of the usual vnum parameter.
90479048 __attribute__((arm_streaming_compatible, arm_shared_za))
9048- void svldr_vnum_za(uint32_t slice_base, uint64_t slice_offset,
9049- const void *ptr);
9049+ void svldr_za(uint32_t slice, const void *ptr);
9050+
9051+ __attribute__((arm_streaming_compatible, arm_shared_za))
9052+ void svldr_vnum_za(uint32_t slice, const void *ptr, int64_t vnum);
90509053```
90519054
90529055#### ST1B, ST1H, ST1W, ST1D, ST1Q
90539056
90549057``` c
90559058 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90569059 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9057- void svst1_hor_za8(uint64_t tile, uint32_t slice_base,
9058- uint64_t slice_offset, svbool_t pg,
9060+ void svst1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90599061 void *ptr);
90609062
90619063 // Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90629064 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90639065 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9064- void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice_base,
9065- uint64_t slice_offset, svbool_t pg,
9066+ void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90669067 void *ptr, int64_t vnum);
90679068
90689069 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90699070 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9070- void svst1_ver_za8(uint64_t tile, uint32_t slice_base,
9071- uint64_t slice_offset, svbool_t pg,
9071+ void svst1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90729072 void *ptr);
90739073
90749074 // Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr.
90759075 // Also for _za16, _za32, _za64 and _za128 (with the same prototype).
90769076 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
9077- void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice_base,
9078- uint64_t slice_offset, svbool_t pg,
9077+ void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg,
90799078 void *ptr, int64_t vnum);
90809079```
90819080
90829081#### STR
90839082
90849083``` c
9085- // slice_offset fills the role of the usual vnum parameter.
90869084 __attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za))
9087- void svstr_vnum_za(uint32_t slice_base, uint64_t slice_offset, void *ptr);
9085+ void svstr_vnum_za(uint32_t slice, void *ptr);
9086+
9087+ __attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za))
9088+ void svstr_vnum_za(uint32_t slice, void *ptr, int64_t vnum);
90889089```
90899090
90909091#### MOVA
@@ -9098,32 +9099,27 @@ parameter both have type `svuint8_t`.
90989099 // And similarly for u8.
90999100 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91009101 svint8_t svread_hor_za8[_s8]_m(svint8_t zd, svbool_t pg,
9101- uint64_t tile, uint32_t slice_base,
9102- uint64_t slice_offset);
9102+ uint64_t tile, uint32_t slice);
91039103
91049104 // And similarly for u16, bf16 and f16.
91059105 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91069106 svint16_t svread_hor_za16[_s16]_m(svint16_t zd, svbool_t pg,
9107- uint64_t tile, uint32_t slice_base,
9108- uint64_t slice_offset);
9107+ uint64_t tile, uint32_t slice);
91099108
91109109 // And similarly for u32 and f32.
91119110 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91129111 svint32_t svread_hor_za32[_s32]_m(svint32_t zd, svbool_t pg,
9113- uint64_t tile, uint32_t slice_base,
9114- uint64_t slice_offset);
9112+ uint64_t tile, uint32_t slice);
91159113
91169114 // And similarly for u64 and f64.
91179115 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91189116 svint64_t svread_hor_za64[_s64]_m(svint64_t zd, svbool_t pg,
9119- uint64_t tile, uint32_t slice_base,
9120- uint64_t slice_offset);
9117+ uint64_t tile, uint32_t slice);
91219118
91229119 // And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
91239120 __attribute__((arm_streaming, arm_shared_za, arm_preserves_za))
91249121 svint8_t svread_hor_za128[_s8]_m(svint8_t zd, svbool_t pg,
9125- uint64_t tile, uint32_t slice_base,
9126- uint64_t slice_offset);
9122+ uint64_t tile, uint32_t slice);
91279123```
91289124
91299125Replacing `_hor` with `_ver` gives the associated vertical forms.
@@ -9135,32 +9131,27 @@ the `zn` parameter to the `_u8` intrinsic has type `svuint8_t`.
91359131``` c
91369132 // And similarly for u8.
91379133 __attribute__((arm_streaming, arm_shared_za))
9138- void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice_base,
9139- uint64_t slice_offset, svbool_t pg,
9134+ void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91409135 svint8_t zn);
91419136
91429137 // And similarly for u16, bf16 and f16.
91439138 __attribute__((arm_streaming, arm_shared_za))
9144- void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice_base,
9145- uint64_t slice_offset, svbool_t pg,
9139+ void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91469140 svint16_t zn);
91479141
91489142 // And similarly for u32 and f32.
91499143 __attribute__((arm_streaming, arm_shared_za))
9150- void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice_base,
9151- uint64_t slice_offset, svbool_t pg,
9144+ void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91529145 svint32_t zn);
91539146
91549147 // And similarly for u64 and f64.
91559148 __attribute__((arm_streaming, arm_shared_za))
9156- void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice_base,
9157- uint64_t slice_offset, svbool_t pg,
9149+ void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91589150 svint64_t zn);
91599151
91609152 // And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
91619153 __attribute__((arm_streaming, arm_shared_za))
9162- void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice_base,
9163- uint64_t slice_offset, svbool_t pg,
9154+ void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
91649155 svint8_t zn);
91659156```
91669157
0 commit comments