Skip to content

FPU notes on zig #1

Closed
Closed
@kassane

Description

@kassane

Hi @kubo39 ,

After reading ( using the translator) your notes about fpu, I have run a few tests on it.

Using zig toolchain for esp32-s3

$> ls ~/.espressif/tools/xtensa-esp-elf/esp-13.2.0_20230928/xtensa-esp-elf/lib/gcc/xtensa-esp-elf/13.2.0/esp32s3
crtbegin.o  crtend.o  crti.o  crtn.o  libgcc.a  libgcov.a  no-rtti
$> ./zig-x86_64-relsafe-espressif-linux-musl-baseline/zig build-exe -target xtensa-freestanding-none -mcpu=esp32s3 sqrt_start.zig -fno-compiler-rt $HOME/.espressif/tools/xtensa-esp-elf/esp-13.2.0_20230928/xtensa-esp-elf/lib/gcc/xtensa-esp-elf/13.2.0/esp32s3/libgcc.a $HOME/.espressif/tools/xtensa-esp-elf/esp-13.2.0_20230928/xtensa-esp-elf/lib/gcc/xtensa-esp-elf/13.2.0/esp32s3/crt*
$> xtensa-esp32s3-elf-nm sqrt_start | grep sqrt
00011338 t __ieee754_sqrtf
$> xtensa-esp32s3-elf-readelf -a ../sqrt_start | grep sqrt
    19: 00000000     0 FILE    LOCAL  DEFAULT  ABS sqrt
    31: 00011338     0 FUNC    LOCAL  HIDDEN     6 __ieee754_sqrtf
Source
extern fn __ieee754_sqrtf(a0: f32) f32;

pub export fn _start() void {
    const c = @cImport({
        @cDefine("XCHAL_HAVE_FP_SQRT", "1");
    });
    _ = c;
    const value = @import("std").math.sqrt(5.0);
    const cvalue = __ieee754_sqrtf(5.0);
    @import("std").debug.assert(value == cvalue);
}
00011344 <_start>:
   11344:       006136          entry   a1, 48
   11347:       017d            mov.n   a7, a1
   11349:       ffc5a1          l32r    a10, 11260 <__EH_FRAME_BEGIN__+0x104c>
   1134c:       ffc681          l32r    a8, 11264 <__EH_FRAME_BEGIN__+0x1050>
   1134f:       0008e0          callx8  a8
   11352:       fa8a50          wfr     f8, a10
   11355:       27a9            s32i.n  a10, a7, 8
   11357:       ffc481          l32r    a8, 11268 <__EH_FRAME_BEGIN__+0x1054>
   1135a:       fa9850          wfr     f9, a8
   1135d:       080c            movi.n  a8, 0
   1135f:       0789            s32i.n  a8, a7, 0
   11361:       180c            movi.n  a8, 1
   11363:       2b0890          oeq.s   b0, f8, f9
   11366:       1789            s32i.n  a8, a7, 4
   11368:       031076          bt      b0, 1136f <_start+0x2b>
   1136b:       0788            l32i.n  a8, a7, 0
   1136d:       1789            s32i.n  a8, a7, 4
   1136f:       17a8            l32i.n  a10, a7, 4
   11371:       ffbe81          l32r    a8, 1126c <__EH_FRAME_BEGIN__+0x1058>
   11374:       0008e0          callx8  a8
   11377:       f01d            retw.n
   11379:       0020f0          nop
__ieee754_sqrtf - asm/objdump
00011338 <__ieee754_sqrtf>:
   11338:       002136          entry   a1, 16
   1133b:       fa1250          wfr     f1, a2
   1133e:       fa2190          sqrt0.s f2, f1
   11341:       fa3030          const.s f3, 0
   11344:       6a3220          maddn.s f3, f2, f2
   11347:       fa41b0          nexp01.s        f4, f1
   1134a:       fa0330          const.s f0, 3
   1134d:       fa40e0          addexp.s        f4, f0
   11350:       6a0340          maddn.s f0, f3, f4
   11353:       fa31b0          nexp01.s        f3, f1
   11356:       fa5360          neg.s   f5, f3
   11359:       6a2020          maddn.s f2, f0, f2
   1135c:       fa0030          const.s f0, 0
   1135f:       fa6030          const.s f6, 0
   11362:       fa7030          const.s f7, 0
   11365:       6a0520          maddn.s f0, f5, f2
   11368:       6a6240          maddn.s f6, f2, f4
   1136b:       fa4330          const.s f4, 3
   1136e:       6a7420          maddn.s f7, f4, f2
   11371:       6a3000          maddn.s f3, f0, f0
   11374:       6a4620          maddn.s f4, f6, f2
   11377:       fa2760          neg.s   f2, f7
   1137a:       6a0320          maddn.s f0, f3, f2
   1137d:       6a7470          maddn.s f7, f4, f7
   11380:       fa21c0          mksadj.s        f2, f1
   11383:       fa11b0          nexp01.s        f1, f1
   11386:       6a1000          maddn.s f1, f0, f0
   11389:       fa3760          neg.s   f3, f7
   1138c:       fa02f0          addexpm.s       f0, f2
   1138f:       fa32e0          addexp.s        f3, f2
   11392:       7a0130          divn.s  f0, f1, f3
   11395:       fa2040          rfr     a2, f0
   11398:       f01d            retw.n

If try build without crt*.o

error: ld.lld: sqrt_start.o:(function _start: .text+0x5): relocation R_XTENSA_SLOT0_OP out of range: 112 is not in [-262141, 18446744073709551612]; references section '.literal'
    note: referenced by sqrt_start.zig:46
error: ld.lld: sqrt_start.o:(function _start: .text+0x8): relocation R_XTENSA_SLOT0_OP out of range: 116 is not in [-262141, 18446744073709551612]; references section '.literal'
    note: referenced by sqrt_start.zig:46

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions