-
Notifications
You must be signed in to change notification settings - Fork 958
Multi-core scheduler for the RP2040 #4925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
af598e0
b0fa829
781d835
a7842af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
//go:build scheduler.tasks && cortexm | ||
//go:build (scheduler.tasks || scheduler.cores) && cortexm | ||
|
||
package task | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,17 @@ package runtime | |
|
||
import ( | ||
"device/arm" | ||
"device/rp" | ||
"internal/task" | ||
"machine" | ||
"machine/usb/cdc" | ||
"runtime/interrupt" | ||
"runtime/volatile" | ||
"unsafe" | ||
) | ||
|
||
const numCPU = 2 | ||
|
||
// machineTicks is provided by package machine. | ||
func machineTicks() uint64 | ||
|
||
|
@@ -43,6 +50,284 @@ func sleepTicks(d timeUnit) { | |
} | ||
} | ||
|
||
// Currently sleeping core, or 0xff. | ||
// Must only be accessed with the scheduler lock held. | ||
var sleepingCore uint8 = 0xff | ||
|
||
// Return whether another core is sleeping. | ||
// May only be called with the scheduler lock held. | ||
func hasSleepingCore() bool { | ||
return sleepingCore != 0xff | ||
} | ||
|
||
// Almost identical to sleepTicks, except that it will unlock/lock the scheduler | ||
// while sleeping and is interruptible by interruptSleepTicksMulticore. | ||
// This may only be called with the scheduler lock held. | ||
func sleepTicksMulticore(d timeUnit) { | ||
sleepingCore = uint8(currentCPU()) | ||
|
||
// Note: interruptSleepTicksMulticore will be able to interrupt this, since | ||
// it executes the "sev" instruction which would make sleepTicks return | ||
// immediately without sleeping. Even if it happens while configuring the | ||
// sleep operation. | ||
|
||
schedulerLock.Unlock() | ||
sleepTicks(d) | ||
schedulerLock.Lock() | ||
|
||
sleepingCore = 0xff | ||
} | ||
|
||
// Interrupt an ongoing call to sleepTicksMulticore on another core. | ||
func interruptSleepTicksMulticore(wakeup timeUnit) { | ||
arm.Asm("sev") | ||
} | ||
|
||
// Number of cores that are currently in schedulerUnlockAndWait. | ||
// It is possible for both cores to be sleeping, if the program is waiting for | ||
// an interrupt (or is deadlocked). | ||
var waitingCore uint8 | ||
|
||
// Put the scheduler to sleep, since there are no tasks to run. | ||
// This will unlock the scheduler lock, and must be called with the scheduler | ||
// lock held. | ||
func schedulerUnlockAndWait() { | ||
waitingCore++ | ||
schedulerLock.Unlock() | ||
arm.Asm("wfe") | ||
schedulerLock.Lock() | ||
waitingCore-- | ||
} | ||
|
||
// Wake another core, if one is sleeping. Must be called with the scheduler lock | ||
// held. | ||
func schedulerWake() { | ||
if waitingCore != 0 { | ||
arm.Asm("sev") | ||
} | ||
} | ||
|
||
// Return the current core number: 0 or 1. | ||
func currentCPU() uint32 { | ||
return rp.SIO.CPUID.Get() | ||
} | ||
|
||
// Start the secondary cores for this chip. | ||
// On the RP2040, there is only one other core to start. | ||
func startSecondaryCores() { | ||
// Start the second core of the RP2040. | ||
// See section 2.8.2 in the datasheet. | ||
seq := 0 | ||
for { | ||
cmd := core1StartSequence[seq] | ||
if cmd == 0 { | ||
multicore_fifo_drain() | ||
arm.Asm("sev") | ||
} | ||
multicore_fifo_push_blocking(cmd) | ||
response := multicore_fifo_pop_blocking() | ||
if cmd != response { | ||
seq = 0 | ||
continue | ||
} | ||
seq = seq + 1 | ||
if seq >= len(core1StartSequence) { | ||
break | ||
} | ||
} | ||
|
||
// Enable the FIFO interrupt for the GC stop the world phase. | ||
// We can only do this after we don't need the FIFO anymore for starting the | ||
// second core. | ||
intr := interrupt.New(rp.IRQ_SIO_IRQ_PROC0, func(intr interrupt.Interrupt) { | ||
switch rp.SIO.FIFO_RD.Get() { | ||
case 1: | ||
gcInterruptHandler(0) | ||
} | ||
}) | ||
intr.Enable() | ||
intr.SetPriority(0xff) | ||
} | ||
|
||
var core1StartSequence = [...]uint32{ | ||
0, 0, 1, | ||
uint32(uintptr(unsafe.Pointer(&__isr_vector))), | ||
uint32(uintptr(unsafe.Pointer(&stack1TopSymbol))), | ||
uint32(exportedFuncPtr(runCore1)), | ||
} | ||
|
||
//go:extern __isr_vector | ||
var __isr_vector [0]uint32 | ||
|
||
//go:extern _stack1_top | ||
var stack1TopSymbol [0]uint32 | ||
|
||
// The function that is started on the second core. | ||
// | ||
//export tinygo_runCore1 | ||
func runCore1() { | ||
// Clear sticky bit that seems to have been set while starting this core. | ||
rp.SIO.FIFO_ST.Set(rp.SIO_FIFO_ST_ROE) | ||
|
||
// Enable the FIFO interrupt, mainly used for the stop-the-world phase of | ||
// the GC. | ||
// Use the lowest possible priority (highest priority value), so that other | ||
// interrupts can still happen while the GC is running. | ||
intr := interrupt.New(rp.IRQ_SIO_IRQ_PROC1, func(intr interrupt.Interrupt) { | ||
switch rp.SIO.FIFO_RD.Get() { | ||
case 1: | ||
gcInterruptHandler(1) | ||
} | ||
}) | ||
intr.Enable() | ||
intr.SetPriority(0xff) | ||
|
||
// Now start running the scheduler on this core. | ||
schedulerLock.Lock() | ||
scheduler(false) | ||
schedulerLock.Unlock() | ||
|
||
// The main function returned. | ||
exit(0) | ||
} | ||
|
||
// The below multicore_fifo_* functions have been translated from the Raspberry | ||
// Pi Pico SDK. | ||
|
||
func multicore_fifo_rvalid() bool { | ||
return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_VLD != 0 | ||
} | ||
|
||
func multicore_fifo_wready() bool { | ||
return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_RDY != 0 | ||
} | ||
|
||
func multicore_fifo_drain() { | ||
for multicore_fifo_rvalid() { | ||
rp.SIO.FIFO_RD.Get() | ||
} | ||
} | ||
|
||
func multicore_fifo_push_blocking(data uint32) { | ||
for !multicore_fifo_wready() { | ||
} | ||
rp.SIO.FIFO_WR.Set(data) | ||
arm.Asm("sev") | ||
} | ||
|
||
func multicore_fifo_pop_blocking() uint32 { | ||
for !multicore_fifo_rvalid() { | ||
arm.Asm("wfe") | ||
} | ||
|
||
return rp.SIO.FIFO_RD.Get() | ||
} | ||
|
||
// Value used to communicate between the GC core and the other (paused) cores. | ||
var gcSignalWait volatile.Register8 | ||
|
||
// The GC interrupted this core for the stop-the-world phase. | ||
// This function handles that, and only returns after the stop-the-world phase | ||
// ended. | ||
func gcInterruptHandler(hartID uint32) { | ||
// Let the GC know we're ready. | ||
gcScanState.Add(1) | ||
arm.Asm("sev") | ||
|
||
// Wait until we get a signal to start scanning. | ||
for gcSignalWait.Get() == 0 { | ||
arm.Asm("wfe") | ||
} | ||
gcSignalWait.Set(0) | ||
|
||
// Scan the stack(s) of this core. | ||
scanCurrentStack() | ||
if !task.OnSystemStack() { | ||
// Mark system stack. | ||
markRoots(task.SystemStack(), coreStackTop(hartID)) | ||
} | ||
|
||
// Signal we've finished scanning. | ||
gcScanState.Store(1) | ||
arm.Asm("sev") | ||
|
||
// Wait until we get a signal that the stop-the-world phase has ended. | ||
for gcSignalWait.Get() == 0 { | ||
arm.Asm("wfe") | ||
} | ||
gcSignalWait.Set(0) | ||
|
||
// Signal we received the signal and are going to exit the interrupt. | ||
gcScanState.Add(1) | ||
arm.Asm("sev") | ||
} | ||
|
||
// Pause the given core by sending it an interrupt. | ||
func gcPauseCore(core uint32) { | ||
rp.SIO.FIFO_WR.Set(1) | ||
} | ||
|
||
// Signal the given core that it can resume one step. | ||
// This is called twice after gcPauseCore: the first time to scan the stack of | ||
// the core, and the second time to end the stop-the-world phase. | ||
func gcSignalCore(core uint32) { | ||
gcSignalWait.Set(1) | ||
arm.Asm("sev") | ||
} | ||
|
||
// Returns the stack top (highest address) of the system stack of the given | ||
// core. | ||
func coreStackTop(core uint32) uintptr { | ||
switch core { | ||
case 0: | ||
return uintptr(unsafe.Pointer(&stackTopSymbol)) | ||
case 1: | ||
return uintptr(unsafe.Pointer(&stack1TopSymbol)) | ||
default: | ||
runtimePanic("unexpected core") | ||
return 0 | ||
} | ||
} | ||
|
||
// These spinlocks are needed by the runtime. | ||
var ( | ||
printLock = spinLock{id: 0} | ||
schedulerLock = spinLock{id: 1} | ||
atomicsLock = spinLock{id: 2} | ||
futexLock = spinLock{id: 3} | ||
) | ||
|
||
// A hardware spinlock, one of the 32 spinlocks defined in the SIO peripheral. | ||
type spinLock struct { | ||
id uint8 | ||
} | ||
|
||
// Return the spinlock register: rp.SIO.SPINLOCKx | ||
func (l *spinLock) spinlock() *volatile.Register32 { | ||
return (*volatile.Register32)(unsafe.Add(unsafe.Pointer(&rp.SIO.SPINLOCK0), l.id*4)) | ||
} | ||
|
||
func (l *spinLock) Lock() { | ||
// Wait for the lock to be available. | ||
spinlock := l.spinlock() | ||
for spinlock.Get() == 0 { | ||
// TODO: use wfe and send an event when unlocking so the CPU can go to | ||
// sleep while waiting for the lock. | ||
// Unfortunately when doing that, time.Sleep() seems to hang somewhere. | ||
// This needs some debugging to figure out. | ||
Comment on lines
+314
to
+317
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could hide some other subtle error that affects not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "the voice of reason"? 😺 This should build with multicore I think:
Perhaps we should consider it experimental for this release? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eliasnaur from reading the comment carefully, I interpret it to mean that the implementation in this PR could be improved by using wfe, but it does not yet due to the That does not change the point you are making overall. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another implication of changing the default to multicore, is that default power use will be higher with the second core active. ⚡ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think power consumption will be affected much: when one of the cores has nothing to do it will just go to sleep. But I didn't measure this. (Should be easy to do by compiling with -scheduler=tasks and -scheduler=cores and measuring the difference). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the way it works (at least how I understand it), is that both cores are always running. The second core is just asleep on reset. With multicore support, it is pulled out of this sleep state and starts the scheduler - which in many cases will put it right back to sleep waiting for a goroutine to run. So yeah it spends a little time configuring stuff and then goes back to sleep. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That is what all of the docs and references I could find are claiming about power use. So it would appear to be fine as it is from that point of view. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As much as this feature excites me, I think @eliasnaur is probably correct that we should not make it the default yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @aykevl has a better idea of the robustness of the multi-core support than I; I merely pointed out the timing. I'm also very excited about this code, and I would be comfortable making multi-core default, if the release notes spelled out instructions for switching back to the single-core scheduler. It's just a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had already confirmed that I assume that the extra stack space is wasted in that case, so I also tested this command (note I missed the
This also worked correctly, and according to Based on this, I think we can merge this PR as it is, and just add the docs to fallback to single core. |
||
} | ||
} | ||
|
||
func (l *spinLock) Unlock() { | ||
l.spinlock().Set(0) | ||
} | ||
|
||
// Wait until a signal is received, indicating that it can resume from the | ||
// spinloop. | ||
func spinLoopWait() { | ||
arm.Asm("wfe") | ||
} | ||
|
||
func waitForEvents() { | ||
arm.Asm("wfe") | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can the two files be merged, unlocking multicore on rp2350 for (approximately) free?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably, but I don't have a RP2350 to test. I think there are small differences that need to be accounted for.
I plan on getting a RP2350 soon!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will give you one next week!