Skip to content

Commit

Permalink
[threads] Add opcodes for fast interpreter (#182 from khagankhan/fast…
Browse files Browse the repository at this point in the history
…-interpreter)
  • Loading branch information
titzer authored Aug 26, 2024
2 parents ee5ba33 + 435cabe commit 8d0edb6
Show file tree
Hide file tree
Showing 35 changed files with 1,320 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/engine/CodeValidator.v3
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,9 @@ class CodeValidator(extensions: Extension.set, limits: Limits, module: Module, e
I64_ATOMIC_RMW_CMPXCHG => check_MEMARG(opcode, 8);
I32_ATOMIC_RMW8_CMPXCHG_U => check_MEMARG(opcode, 1);
I32_ATOMIC_RMW16_CMPXCHG_U => check_MEMARG(opcode, 2);
I64_ATOMIC_RMW8_CMPXCHG_U => check_MEMARG(opcode, 1);
I64_ATOMIC_RMW16_CMPXCHG_U => check_MEMARG(opcode, 2);
I64_ATOMIC_RMW32_CMPXCHG_U => check_MEMARG(opcode, 4);
CONT_NEW => {
var cont_decl = parser.readCont();
if (cont_decl == null) return;
Expand Down
209 changes: 209 additions & 0 deletions src/engine/x86-64/X86_64Interpreter.v3
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,8 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) {
genLocals();
genCallsAndRet();
genLoadsAndStores();
genAtomicLoadsAndStores();
genAtomicRMWOperations();
genCompares();
genI32Arith();
genI64Arith();
Expand Down Expand Up @@ -1302,6 +1304,33 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) {
bindHandler(Opcode.I64_STORE16);
genStore(asm.movw_m_r);
}
def genAtomicLoadsAndStores() {
// Load Seq_Cst -> MOV (from memory)
genLoad(Opcode.I32_ATOMIC_LOAD, BpTypeCode.I32.code, asm.movd_r_m);
genLoad(Opcode.I64_ATOMIC_LOAD, BpTypeCode.I64.code, asm.movq_r_m);
genLoad(Opcode.I32_ATOMIC_LOAD8_U, BpTypeCode.I32.code, asm.movbzx_r_m);
genLoad(Opcode.I32_ATOMIC_LOAD16_U, BpTypeCode.I32.code, asm.movwzx_r_m);
genLoad(Opcode.I64_ATOMIC_LOAD8_U, BpTypeCode.I64.code, asm.movbzx_r_m);
genLoad(Opcode.I64_ATOMIC_LOAD16_U, BpTypeCode.I64.code, asm.movwzx_r_m);
genLoad(Opcode.I64_ATOMIC_LOAD16_U, BpTypeCode.I64.code, asm.movwzx_r_m);
genLoad(Opcode.I64_ATOMIC_LOAD32_U, BpTypeCode.I64.code, asm.movd_r_m);
// Store Seq_Cst -> used: (LOCK) XCHG // alternative: MOV (into memory),MFENCE
// The parenthesised (LOCK) reflects the fact that the XCHG instruction on x86 (including x86-64) has an implicit LOCK prefix
bindHandler(Opcode.I32_ATOMIC_STORE);
bindHandler(Opcode.I64_ATOMIC_STORE32);
genStore(asm.xchgd_m_r);

bindHandler(Opcode.I64_ATOMIC_STORE);
genStore(asm.xchgq_m_r);

bindHandler(Opcode.I32_ATOMIC_STORE8);
bindHandler(Opcode.I64_ATOMIC_STORE8);
genStore(asm.xchgb_m_r);

bindHandler(Opcode.I32_ATOMIC_STORE16);
bindHandler(Opcode.I64_ATOMIC_STORE16);
genStore(asm.xchgw_m_r);
}
def emitI64_Load32s(dst: X86_64Gpr, addr: X86_64Addr) {
asm.movd_r_m(dst, addr);
asm.q.shl_r_i(dst, 32);
Expand Down Expand Up @@ -1521,6 +1550,186 @@ class X86_64InterpreterGen(ic: X86_64InterpreterCode, w: DataWriter) {
endHandler();
}
}
def genAtomicRMWOperations() {
// Atomic RMW Add
bindHandler(Opcode.I32_ATOMIC_RMW_ADD);
bindHandler(Opcode.I64_ATOMIC_RMW32_ADD_U);
genAtomicAdd(asm.d.xadd_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_ADD);
genAtomicAdd(asm.q.xadd_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_ADD_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_ADD_U);
genAtomicAdd(asm.xaddb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_ADD_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_ADD_U);
genAtomicAdd(asm.xaddw_m_r);
// Atomic RMW Sub
bindHandler(Opcode.I32_ATOMIC_RMW_SUB);
bindHandler(Opcode.I64_ATOMIC_RMW32_SUB_U);
genAtomicSub(asm.d.xadd_m_r, asm.d.neg_r);
bindHandler(Opcode.I64_ATOMIC_RMW_SUB);
genAtomicSub(asm.q.xadd_m_r, asm.q.neg_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_SUB_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_SUB_U);
genAtomicSub(asm.xaddb_m_r, asm.negb_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_SUB_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_SUB_U);
genAtomicSub(asm.xaddw_m_r, asm.negw_r);
// Atomic RMW And
bindHandler(Opcode.I32_ATOMIC_RMW_AND);
bindHandler(Opcode.I64_ATOMIC_RMW32_AND_U);
genAtomicBinop(asm.andd_m_r, asm.xchgd_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_AND);
genAtomicBinop(asm.andq_m_r, asm.xchgq_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_AND_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_AND_U);
genAtomicBinop(asm.andb_m_r, asm.xchgb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_AND_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_AND_U);
genAtomicBinop(asm.andw_m_r, asm.xchgw_m_r);
// Atomic RMW Or
bindHandler(Opcode.I32_ATOMIC_RMW_OR);
bindHandler(Opcode.I64_ATOMIC_RMW32_OR_U);
genAtomicBinop(asm.ord_m_r, asm.xchgd_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_OR);
genAtomicBinop(asm.orq_m_r, asm.xchgq_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_OR_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_OR_U);
genAtomicBinop(asm.orb_m_r, asm.xchgb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_OR_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_OR_U);
genAtomicBinop(asm.orw_m_r, asm.xchgw_m_r);
// Atomic RMW Xor
bindHandler(Opcode.I32_ATOMIC_RMW_XOR);
bindHandler(Opcode.I64_ATOMIC_RMW32_XOR_U);
genAtomicBinop(asm.xord_m_r, asm.xchgd_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_XOR);
genAtomicBinop(asm.xorq_m_r, asm.xchgq_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_XOR_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_XOR_U);
genAtomicBinop(asm.xorb_m_r, asm.xchgb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_XOR_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_XOR_U);
genAtomicBinop(asm.xorw_m_r, asm.xchgw_m_r);
// Atomic RMW xchg
bindHandler(Opcode.I32_ATOMIC_RMW_XCHG);
bindHandler(Opcode.I64_ATOMIC_RMW32_XCHG_U);
genAtomicExchange(asm.xchgd_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_XCHG);
genAtomicExchange(asm.xchgq_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_XCHG_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_XCHG_U);
genAtomicExchange(asm.xchgb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_XCHG_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_XCHG_U);
genAtomicExchange(asm.xchgw_m_r);
// Atomic RMW cmpxchg
bindHandler(Opcode.I32_ATOMIC_RMW_CMPXCHG);
bindHandler(Opcode.I64_ATOMIC_RMW32_CMPXCHG_U);
genAtomicCompareAndExchange(asm.d.cmpxchg_m_r);
bindHandler(Opcode.I64_ATOMIC_RMW_CMPXCHG);
genAtomicCompareAndExchange(asm.q.cmpxchg_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW8_CMPXCHG_U);
bindHandler(Opcode.I64_ATOMIC_RMW8_CMPXCHG_U);
genAtomicCompareAndExchange(asm.cmpxchgb_m_r);
bindHandler(Opcode.I32_ATOMIC_RMW16_CMPXCHG_U);
bindHandler(Opcode.I64_ATOMIC_RMW16_CMPXCHG_U);
genAtomicCompareAndExchange(asm.cmpxchgw_m_r);
}
def genAtomicOp<T>(op: (X86_64Addr, X86_64Gpr) -> T,
neg: (X86_64Gpr) -> T,
exchange: (X86_64Addr, X86_64Gpr) -> T,
isCmpAndExchange: bool) {
computeCurIpForTrap(-1);
var finish = asm.newLabel(), has_index: X86_64Label;
if (!FeatureDisable.multiMemory) { // dynamically check for memory index
has_index = asm.newLabel();
asm.q.inc_r(r_ip); // skip flags byte
asm.test_m_i(r_ip.plus(-1), BpConstants.MEMARG_INDEX_FLAG); // XXX: test byte
asm.jc_rel_near(C.NZ, has_index);
} else {
asm.q.inc_r(r_ip); // skip flags byte
}
genReadUleb32(r_tmp0); // decode offset
if (isCmpAndExchange) {
asm.movq_r_m(r_tmp1, vsph[-3].value); // read index
asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset
asm.movq_r_m(r_tmp1, vsph[-2].value); // new value for cmpxchg
spillReg(R.RAX);
asm.movq_r_m(R.RAX, vsph[-1].value);
asm.lock();
op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
// asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation
restoreReg(R.RAX); // Restore the original RAX if it was used elsewhere
decrementVsp();
} else {
asm.movq_r_m(r_tmp1, vsph[-2].value); // read index
asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset
asm.movq_r_m(r_tmp1, vsph[-1].value); // read value
if (neg != null) {
neg(r_tmp1);
} else if (exchange != null) {
exchange(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
}
asm.lock();
op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
// asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation
asm.xchgq_m_r(vsph[-1].value, r_tmp1);
}
asm.bind(finish);
endHandler();
if (has_index != null) {
asm.bind(has_index);
genReadUleb32(r_tmp0); // decode memory index
var memN = r_tmp3;
asm.movq_r_m(memN, r_instance.plus(offsets.Instance_memories));
asm.movq_r_m(memN, memN.plusR(r_tmp0, 8, offsets.Array_contents));
asm.movq_r_m(memN, memN.plus(offsets.X86_64Memory_start));
genReadUleb32(r_tmp0); // decode offset

if (isCmpAndExchange) {
asm.movq_r_m(r_tmp1, vsph[-3].value); // read index
asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset
asm.movq_r_m(r_tmp1, vsph[-2].value); // new value for cmpxchg
spillReg(R.RAX);
asm.movq_r_m(R.RAX, vsph[-1].value);
asm.lock();
op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
// asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation
restoreReg(R.RAX); // Restore the original RAX if it was used elsewhere
decrementVsp();
} else {
asm.movq_r_m(r_tmp1, vsph[-2].value); // read index
asm.q.add_r_r(r_tmp0, r_tmp1); // add index + offset
asm.movq_r_m(r_tmp1, vsph[-1].value); // read value
if (neg != null) {
neg(r_tmp1);
} else if (exchange != null) {
exchange(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
}
asm.lock();
op(r_mem0_base.plusR(r_tmp0, 1, 0), r_tmp1);
// asm.movq_r_m(r_tmp1, r_mem0_base.plusR(r_tmp0, 1, 0)); // This will return the return of the operation
asm.xchgq_m_r(vsph[-1].value, r_tmp1);
}
asm.jmp_rel_near(finish);
}
}
def genAtomicAdd<T>(add: (X86_64Addr, X86_64Gpr) -> T) {
genAtomicOp(add, null, null, false);
}
def genAtomicSub<T>(add: (X86_64Addr, X86_64Gpr) -> T, neg: (X86_64Gpr) -> T) {
genAtomicOp(add, neg, null, false);
}
def genAtomicBinop<T>(binop: (X86_64Addr, X86_64Gpr) -> T, exchange: (X86_64Addr, X86_64Gpr) -> T) {
genAtomicOp(binop, null, exchange, false);
}
def genAtomicExchange<T>(exchange: (X86_64Addr, X86_64Gpr) -> T) {
genAtomicOp(exchange, null, null, false);
}
def genAtomicCompareAndExchange<T>(cmpxchg: (X86_64Addr, X86_64Gpr) -> T) {
genAtomicOp(cmpxchg, null, null, true);
}
def genExtensions() {
bindHandler(Opcode.I32_WRAP_I64); {
genTagUpdate(BpTypeCode.I32.code);
Expand Down
19 changes: 19 additions & 0 deletions test/regress.failures
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,22 @@ test/regress/ext:gc/array_new_fixed.bin.wast
test/regress/ext:gc/br_on_non2.bin.wast
test/regress/ext:gc/br_on_non.bin.wast
test/regress/ext:gc/ref_eq.bin.wast
test/regress/ext:threads/atomic_load_i32.bin.wast
test/regress/ext:threads/atomic_store_i32.bin.wast
test/regress/ext:threads/atomic_add_i32.bin.wast
test/regress/ext:threads/atomic_sub_i32.bin.wast
test/regress/ext:threads/atomic_and_i32.bin.wast
test/regress/ext:threads/atomic_or_i32.bin.wast
test/regress/ext:threads/atomic_xor_i32.bin.wast
test/regress/ext:threads/atomic_xchg_i32.bin.wast
test/regress/ext:threads/atomic_cmpxchg_i32.bin.wast

test/regress/ext:threads/atomic_load_i64.bin.wast
test/regress/ext:threads/atomic_store_i64.bin.wast
test/regress/ext:threads/atomic_add_i64.bin.wast
test/regress/ext:threads/atomic_sub_i64.bin.wast
test/regress/ext:threads/atomic_and_i64.bin.wast
test/regress/ext:threads/atomic_or_i64.bin.wast
test/regress/ext:threads/atomic_xor_i64.bin.wast
test/regress/ext:threads/atomic_xchg_i64.bin.wast
test/regress/ext:threads/atomic_cmpxchg_i64.bin.wast
21 changes: 21 additions & 0 deletions test/regress/ext:threads/atomic_add_i32.bin.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
(module binary
"\00\61\73\6d\01\00\00\00\01\88\80\80\80\00\02\60"
"\00\00\60\00\01\7f\03\85\80\80\80\00\04\00\01\01"
"\01\05\83\80\80\80\00\01\00\01\07\d8\80\80\80\00"
"\05\06\6d\65\6d\6f\72\79\02\00\04\6d\61\69\6e\00"
"\00\13\74\65\73\74\5f\69\33\32\5f\61\74\6f\6d\69"
"\63\5f\61\64\64\00\01\15\74\65\73\74\5f\69\33\32"
"\5f\61\74\6f\6d\69\63\5f\61\64\64\5f\38\00\02\16"
"\74\65\73\74\5f\69\33\32\5f\61\74\6f\6d\69\63\5f"
"\61\64\64\5f\31\36\00\03\0a\da\80\80\80\00\04\92"
"\80\80\80\00\00\41\00\41\05\36\02\00\41\00\41\03"
"\fe\1e\02\00\1a\0b\91\80\80\80\00\00\41\00\41\05"
"\36\02\00\41\00\41\03\fe\1e\02\00\0b\91\80\80\80"
"\00\00\41\00\41\05\36\02\00\41\00\41\03\fe\20\00"
"\00\0b\91\80\80\80\00\00\41\00\41\05\36\02\00\41"
"\00\41\03\fe\21\01\00\0b"
)
(assert_return (invoke "test_i32_atomic_add") (i32.const 0x5))
(assert_return (invoke "test_i32_atomic_add_8") (i32.const 0x5))
(assert_return (invoke "test_i32_atomic_add_16") (i32.const 0x5))
(assert_return (invoke "main"))
41 changes: 41 additions & 0 deletions test/regress/ext:threads/atomic_add_i32.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
(module
(memory (export "memory") 1)
(func (export "main")
i32.const 0
i32.const 5
i32.store
i32.const 0
i32.const 3
i32.atomic.rmw.add offset=0 align=4 drop
)
(func (export "test_i32_atomic_add") (result i32)
i32.const 0
i32.const 5
i32.store
i32.const 0
i32.const 3
i32.atomic.rmw.add offset=0 align=4
)
(func (export "test_i32_atomic_add_8") (result i32)
i32.const 0
i32.const 5
i32.store
i32.const 0
i32.const 3
i32.atomic.rmw8.add_u offset=0 align=1
)
(func (export "test_i32_atomic_add_16") (result i32)
i32.const 0
i32.const 5
i32.store
i32.const 0
i32.const 3
i32.atomic.rmw16.add_u offset=0 align=2
)
)

(assert_return (invoke "test_i32_atomic_add") (i32.const 5))
(assert_return (invoke "test_i32_atomic_add_8") (i32.const 5))
(assert_return (invoke "test_i32_atomic_add_16") (i32.const 5))
(assert_return (invoke "main"))

25 changes: 25 additions & 0 deletions test/regress/ext:threads/atomic_add_i64.bin.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
(module binary
"\00\61\73\6d\01\00\00\00\01\88\80\80\80\00\02\60"
"\00\00\60\00\01\7e\03\86\80\80\80\00\05\00\01\01"
"\01\01\05\83\80\80\80\00\01\00\01\07\f1\80\80\80"
"\00\06\06\6d\65\6d\6f\72\79\02\00\04\6d\61\69\6e"
"\00\00\13\74\65\73\74\5f\69\36\34\5f\61\74\6f\6d"
"\69\63\5f\61\64\64\00\01\15\74\65\73\74\5f\69\36"
"\34\5f\61\74\6f\6d\69\63\5f\61\64\64\5f\38\00\02"
"\16\74\65\73\74\5f\69\36\34\5f\61\74\6f\6d\69\63"
"\5f\61\64\64\5f\31\36\00\03\16\74\65\73\74\5f\69"
"\36\34\5f\61\74\6f\6d\69\63\5f\61\64\64\5f\33\32"
"\00\04\0a\f0\80\80\80\00\05\92\80\80\80\00\00\41"
"\00\42\05\37\03\00\41\00\42\03\fe\1f\03\00\1a\0b"
"\91\80\80\80\00\00\41\00\42\05\37\03\00\41\00\42"
"\03\fe\1f\03\00\0b\91\80\80\80\00\00\41\00\42\05"
"\37\03\00\41\00\42\03\fe\22\00\00\0b\91\80\80\80"
"\00\00\41\00\42\05\37\03\00\41\00\42\03\fe\23\01"
"\00\0b\91\80\80\80\00\00\41\00\42\05\37\03\00\41"
"\00\42\03\fe\24\02\00\0b"
)
(assert_return (invoke "test_i64_atomic_add") (i64.const 0x5))
(assert_return (invoke "test_i64_atomic_add_8") (i64.const 0x5))
(assert_return (invoke "test_i64_atomic_add_16") (i64.const 0x5))
(assert_return (invoke "test_i64_atomic_add_32") (i64.const 0x5))
(assert_return (invoke "main"))
Loading

0 comments on commit 8d0edb6

Please sign in to comment.