Skip to content

Commit

Permalink
further optimization & accuracy, notes
Browse files Browse the repository at this point in the history
  • Loading branch information
ziplantil committed Oct 18, 2022
1 parent 84f1454 commit 520062e
Show file tree
Hide file tree
Showing 17 changed files with 307 additions and 216 deletions.
14 changes: 14 additions & 0 deletions NOTES
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Notes about w65c02s and its development:
* The RESET/RST behaviour is not completely accurate, but at least the NMOS
6502 has unpredictable behavior during a reset anyway. It seems to do 2
spurious reads of PC before the third (first read of RESET), but in addition
to that, the current instruction gets run partially and often throws complete
nonsense onto the address bus. How the CMOS 65C02 behaves is uncertain.
I have chosen not to emulate the weird RESET behavior and instead it is
triggered instantly after the end of an instruction.
* For some reason, -O2 and -O3 are consistently *slower* on gcc than -O1. This
seems to occur due to -ftree-tail-merge, which causes extra range checks to
be added to switch statements, despite __builtin_unreachable() being used.
* The case of W65C02SCE_COARSE=0 could be optimized greatly by eliminating
the "cont = 0" branch entirely, but that does not seem possible in C89, even
with compiler extensions.
4 changes: 2 additions & 2 deletions docs/defines.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ causes small deviations in the return value. For example, trying to run
300,002 cycles, but this is reflected in its return value.

By compromising on emulation resolution, coarse mode considerably increases
emulation performance (perhaps by as much as 30%, depending on the
emulation performance (perhaps by as much as 30-40%, depending on the
target system and used compiler optimizations).

## W65C02S2CE_COARSE_CYCLE_COUNTER
Expand All @@ -28,7 +28,7 @@ when used from callbacks).

If set to 1, the value returned by `w65c02s_get_cycle_count` will only be
updated after a `w65c02s_run_cycles` or `w65c02s_run_instructions` call.
This improves performance.
This improves performance (probably by about 5-10%).

## W65C02SCE_LINK
* **Default**: 0 (disabled)
Expand Down
12 changes: 9 additions & 3 deletions src/decode.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*******************************************************************************
w65c02sce -- cycle-accurate C emulator of the WDC 65C02S
by ziplantil 2022 -- under the CC0 license
version: 2022-10-16
version: 2022-10-18
decode.c - instruction decoder
*******************************************************************************/
Expand Down Expand Up @@ -45,7 +45,10 @@
#define brk MODE_STACK_BRK
#define rti MODE_STACK_RTI

static const unsigned modes[256] = {
#if __STDC_VERSION__ >= 201112L
_Alignas(256)
#endif
static const unsigned char modes[256] = {
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/*00:0F*/ brk,zix,imm,im1,wzp,zpg,wzp,zpb,phv,imm,imp,im1,wab,abs,wab,rlb,
/*10:1F*/ rel,ziy,zpi,im1,wzp,zpx,wzx,zpb,imp,aby,imp,im1,wab,abx,wax,rlb,
Expand Down Expand Up @@ -135,7 +138,10 @@ static const unsigned modes[256] = {
#define WAI OPER_WAI
#define STP OPER_STP

static const unsigned opers[256] = {
#if __STDC_VERSION__ >= 201112L
_Alignas(256)
#endif
static const unsigned char opers[256] = {
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/*00:0F*/ BRK,ORA,NOP,NOP,TSB,ORA,ASL,000,PHP,ORA,ASL,NOP,TSB,ORA,ASL,000,
/*10:1F*/ BPL,ORA,ORA,NOP,TRB,ORA,ASL,001,CLC,ORA,INC,NOP,TRB,ORA,ASL,001,
Expand Down
20 changes: 10 additions & 10 deletions src/decode.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*******************************************************************************
w65c02sce -- cycle-accurate C emulator of the WDC 65C02S
by ziplantil 2022 -- under the CC0 license
version: 2022-10-16
version: 2022-10-18
decode.h - instruction decoder
*******************************************************************************/
Expand Down Expand Up @@ -102,15 +102,15 @@
#define OPER_TXS 19 /* impl */

/* branch instrs. no NOPs use this mode, so 0 is ok */
#define OPER_BRA 0
#define OPER_BPL 1
#define OPER_BMI 2
#define OPER_BVC 3
#define OPER_BVS 4
#define OPER_BCC 5
#define OPER_BCS 6
#define OPER_BNE 7
#define OPER_BEQ 8
#define OPER_BPL 0
#define OPER_BMI 1
#define OPER_BVC 2
#define OPER_BVS 3
#define OPER_BCC 4
#define OPER_BCS 5
#define OPER_BNE 6
#define OPER_BEQ 7
#define OPER_BRA 8

/* stack instrs. no NOPs use this mode, so 0 is ok */
#define OPER_PHP 0
Expand Down
134 changes: 84 additions & 50 deletions src/execute.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*******************************************************************************
w65c02sce -- cycle-accurate C emulator of the WDC 65C02S
by ziplantil 2022 -- under the CC0 license
version: 2022-10-16
version: 2022-10-18
execute.c - instruction execution unit
*******************************************************************************/
Expand All @@ -13,50 +13,27 @@
#include "decode.h"
#include "execute.h"
#include "mode.h"
#include "modejump.h"
#include "oper.h"

static int handle_stp_wai(struct w65c02s_cpu *cpu) {
switch (CPU_STATE_EXTRACT(cpu)) {
case CPU_STATE_WAIT:
if (cpu->irq || cpu->nmi) {
w65c02si_irq_latch(cpu);
CPU_STATE_INSERT(cpu, CPU_STATE_RUN);
return 1;
} else {
return 0;
}
case CPU_STATE_STOP:
return 0;
}
return 1;
}

INLINE void handle_reset(struct w65c02s_cpu *cpu) {
/* do RESET */
cpu->in_rst = 1;
cpu->in_nmi = cpu->in_irq = 0;
cpu->cpu_state = CPU_STATE_RUN;
SET_P(P_A1, 1);
SET_P(P_B, 1);
READ(cpu->pc); /* spurious */
w65c02si_decode(cpu, 0);
}

INLINE void handle_nmi(struct w65c02s_cpu *cpu) {
/* handle NMI: treat current instruction as BRK */
cpu->in_nmi = 1;
cpu->nmi = 0;
cpu->int_trig &= ~CPU_STATE_NMI;
CPU_STATE_CLEAR_NMI(cpu);
READ(cpu->pc); /* spurious */
w65c02si_decode(cpu, 0);
}

INLINE void handle_irq(struct w65c02s_cpu *cpu) {
/* handle IRQ: treat current instruction as BRK */
cpu->in_irq = 1;
CPU_STATE_CLEAR_IRQ(cpu);
READ(cpu->pc); /* spurious */
w65c02si_decode(cpu, 0);
}

INLINE int handle_interrupt(struct w65c02s_cpu *cpu) {
Expand All @@ -68,6 +45,8 @@ INLINE int handle_interrupt(struct w65c02s_cpu *cpu) {
handle_irq(cpu);
else
return 0;
w65c02si_stall(cpu);
w65c02si_decode(cpu, 0); /* force BRK to handle interrupt */
return 1;
}

Expand All @@ -84,14 +63,41 @@ INLINE void handle_end_of_instruction(struct w65c02s_cpu *cpu) {

#if !W65C02SCE_COARSE

static int handle_stp_wai_c(struct w65c02s_cpu *cpu) {
switch (CPU_STATE_EXTRACT(cpu)) {
case CPU_STATE_WAIT:
for (;;) {
if (CPU_STATE_EXTRACT(cpu) == CPU_STATE_RESET) {
return 0;
} else if (cpu->int_trig) {
w65c02si_irq_latch(cpu);
CPU_STATE_INSERT(cpu, CPU_STATE_RUN);
return 0;
}
w65c02si_stall(cpu);
if (CYCLE_CONDITION) return 1;
}
case CPU_STATE_STOP:
for (;;) {
if (CPU_STATE_EXTRACT(cpu) == CPU_STATE_RESET) {
return 0;
}
w65c02si_stall(cpu);
if (CYCLE_CONDITION) return 1;
}
}
return 0;
}

INTERNAL unsigned long w65c02si_execute_c(struct w65c02s_cpu *cpu,
unsigned long maximum_cycles) {
uint8_t ir;
if (CPU_STATE_EXTRACT(cpu) != CPU_STATE_RUN) {
if (!handle_stp_wai(cpu)) return maximum_cycles;
}
if (UNLIKELY(!maximum_cycles)) return 0;

#if W65C02SCE_COARSE_CYCLE_COUNTER
cpu->left_cycles = maximum_cycles;
#else
cpu->target_cycles = cpu->total_cycles + maximum_cycles;
#endif
if (cpu->cycl) {
/* continue running instruction */
if (w65c02si_run_mode(cpu, CONTINUE_INSTRUCTION)) {
Expand All @@ -108,33 +114,44 @@ INTERNAL unsigned long w65c02si_execute_c(struct w65c02s_cpu *cpu,
goto next_instruction;
}

while (cpu->left_cycles) {
for (;;) {
unsigned long cyclecount;
#if W65C02SCE_COARSE_CYCLE_COUNTER
#define CYCLES_NOW (maximum_cycles - cpu->left_cycles)
#else
#define CYCLES_NOW (cpu->total_cycles - (cpu->target_cycles - maximum_cycles))
#endif
if (UNLIKELY(cpu->cpu_state != CPU_STATE_RUN)) {
if (cpu->cpu_state & CPU_STATE_STEP) {
cpu->cycl = 0;
return maximum_cycles - cpu->left_cycles;
return CYCLES_NOW;
}
bypass_step:
if (!handle_stp_wai(cpu)) return maximum_cycles;
if (handle_interrupt(cpu)) {
ir = 0;
goto decoded;
}
if (handle_stp_wai_c(cpu)) return CYCLES_NOW;
if (handle_interrupt(cpu)) goto decoded;
}

next_instruction:
w65c02si_decode(cpu, READ(cpu->pc++));
w65c02si_prerun_mode(cpu);

decoded:
#if !W65C02SCE_COARSE_CYCLE_COUNTER
++cpu->total_cycles;
#if W65C02SCE_COARSE_CYCLE_COUNTER
cyclecount = --cpu->left_cycles;
if (UNLIKELY(!cyclecount)) break;
#else
cyclecount = ++cpu->total_cycles;
if (UNLIKELY(cyclecount == cpu->target_cycles)) break;
#endif
if (!--cpu->left_cycles) break;
cyclecount = cpu->left_cycles;

cpu->cycl = 1;
if (UNLIKELY(w65c02si_run_mode(cpu, STARTING_INSTRUCTION))) {
if (cpu->cycl) {
#if W65C02SCE_COARSE_CYCLE_COUNTER
cpu->cycl += cyclecount - cpu->left_cycles;
#else
cpu->cycl += cpu->total_cycles - cyclecount;
#endif
} else {
handle_end_of_instruction(cpu);
}
Expand All @@ -147,23 +164,40 @@ INTERNAL unsigned long w65c02si_execute_c(struct w65c02s_cpu *cpu,

#else /* W65C02SCE_COARSE */

static int handle_stp_wai_i(struct w65c02s_cpu *cpu) {
switch (CPU_STATE_EXTRACT(cpu)) {
case CPU_STATE_WAIT:
/* if there is an IRQ or NMI, latch it immediately and continue */
if (cpu->int_trig) {
w65c02si_irq_latch(cpu);
CPU_STATE_INSERT(cpu, CPU_STATE_RUN);
return 0;
}
case CPU_STATE_STOP:
/* spurious read to waste a cycle */
w65c02si_stall(cpu);
#if !W65C02SCE_COARSE_CYCLE_COUNTER
++cpu->total_cycles;
#endif
return 1;
}
return 0;
}

INTERNAL unsigned long w65c02si_execute_i(struct w65c02s_cpu *cpu) {
unsigned cycles;
uint8_t ir;
if (UNLIKELY(cpu->cpu_state != CPU_STATE_RUN)) {
if (!handle_stp_wai(cpu)) return 1;
if (handle_interrupt(cpu)) {
ir = 0;
goto decoded;
}
if (handle_stp_wai_i(cpu)) return 1;
if (handle_interrupt(cpu)) goto decoded;
}
w65c02si_decode(cpu, READ(cpu->pc++));

w65c02si_decode(cpu, READ(cpu->pc++));
decoded:
w65c02si_prerun_mode(cpu);
#if !W65C02SCE_COARSE_CYCLE_COUNTER
++cpu->total_cycles;
#endif
w65c02si_prerun_mode(cpu);
cycles = w65c02si_run_mode(cpu, STARTING_INSTRUCTION);
cycles = w65c02si_run_mode(cpu);
handle_end_of_instruction(cpu);
return cycles;
}
Expand Down
3 changes: 2 additions & 1 deletion src/execute.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*******************************************************************************
w65c02sce -- cycle-accurate C emulator of the WDC 65C02S
by ziplantil 2022 -- under the CC0 license
version: 2022-10-16
version: 2022-10-18
execute.h - instruction execution unit
*******************************************************************************/
Expand All @@ -12,6 +12,7 @@
#define W65C02SCE
#include "w65c02s.h"

INTERNAL_INLINE void w65c02si_irq_update_mask(struct w65c02s_cpu *cpu);
#if !W65C02SCE_COARSE
INTERNAL unsigned long w65c02si_execute_c(struct w65c02s_cpu *cpu,
unsigned long maximum_cycles);
Expand Down
Loading

0 comments on commit 520062e

Please sign in to comment.