Open
Description
CircuitPython version
When running this simple code that creates several async/await tasks that run in parallel, CircuitPython will HARD_FAULT randomly roughly every several hours.
I have tested this on CircuitPython 8.0.5 and 8.1.0 and they both act similarly.
Code/REPL
# main_test_why_circuitpython_hardfaults.py
# This file will just run a simple asyncio infinite loop to see if that causes
# a hard fault after 24 hours.
import asyncio
import time
from adafruit_datetime import datetime as adatetime
class Dashboard:
def __init__(self):
print("Initting Dashboard")
# Create several infinite loop tasks that loop each 0.01 to 0.5 seconds
self.loop_task1 = asyncio.create_task(self.asyncTaskLoop("task1", 100, 0.1))
self.loop_task2 = asyncio.create_task(self.asyncTaskLoop("task2", 50, 0.2))
self.loop_task3 = asyncio.create_task(self.asyncTaskLoop("task3", 100, 0.1))
self.loop_task4 = asyncio.create_task(self.asyncTaskLoop("task4", 20, 0.5))
self.loop_task5 = asyncio.create_task(self.asyncTaskLoop("task5", 17, 0.6))
self.loop_task6 = asyncio.create_task(self.asyncTaskLoop("task6", 100, 0.1))
self.loop_task7 = asyncio.create_task(self.asyncTaskLoop("task7", 50, 0.2))
self.loop_task8 = asyncio.create_task(self.asyncTaskLoop("task8", 100, 0.1))
self.loop_task9 = asyncio.create_task(self.asyncTaskLoop("task9", 20, 0.5))
self.loop_task10 = asyncio.create_task(self.asyncTaskLoop("task10", 17, 0.6))
async def asyncTaskLoop(self, taskName, howOftenTellUsYouLooped, loopDelay):
print("{}: Starting infinite loop".format(taskName))
ctr = 0
startTime = time.monotonic()
while True:
await asyncio.sleep(loopDelay) # don't forget the await
ctr += 1
if ctr > howOftenTellUsYouLooped:
print("{} {}: Looped {} times waiting {}s on each loop. Should have taken {}s, but took {}s".format(
adatetime.now(), taskName, howOftenTellUsYouLooped, loopDelay,
loopDelay*howOftenTellUsYouLooped,
time.monotonic() - startTime))
ctr = 0
startTime = time.monotonic()
async def main():
d = Dashboard()
await asyncio.gather(
d.loop_task1, d.loop_task2, d.loop_task3, d.loop_task4, d.loop_task5,
d.loop_task6, d.loop_task7, d.loop_task8, d.loop_task9, d.loop_task10
) # Don't forget the await!
asyncio.run(main())
Behavior
You get a standard output roughly every 10 seconds as the script runs.
Initting Dashboard
task1: Starting infinite loop
task2: Starting infinite loop
task3: Starting infinite loop
task4: Starting infinite loop
task5: Starting infinite loop
task6: Starting infinite loop
task7: Starting infinite loop
task8: Starting infinite loop
task9: Starting infinite loop
task10: Starting infinite loop
2023-05-02 06:20:27 task1: Looped 100 times waiting 0.1s on each loop. Should have taken 10.0s, but took 10.0s
2023-05-02 06:20:27 task3: Looped 100 times waiting 0.1s on each loop. Should have taken 10.0s, but took 10.0234s
2023-05-02 06:20:27 task6: Looped 100 times waiting 0.1s on each loop. Should have taken 10.0s, but took 10.0234s
2023-05-02 06:20:27 task8: Looped 100 times waiting 0.1s on each loop. Should have taken 10.0s, but took 10.0313s
2023-05-02 06:20:28 task2: Looped 50 times waiting 0.2s on each loop. Should have taken 10.0s, but took 10.1563s
2023-05-02 06:20:28 task7: Looped 50 times waiting 0.2s on each loop. Should have taken 10.0s, but took 10.1563s
2023-05-02 06:20:28 task4: Looped 20 times waiting 0.5s on each loop. Should have taken 10.0s, but took 10.5234s
2023-05-02 06:20:28 task9: Looped 20 times waiting 0.5s on each loop. Should have taken 10.0s, but took 10.5234s
2023-05-02 06:20:28 task5: Looped 17 times waiting 0.6s on each loop. Should have taken 10.2s, but took 10.7813s
2023-05-02 06:20:28 task10: Looped 17 times waiting 0.6s on each loop. Should have taken 10.2s, but took 10.7969s
Eventually you'll get a hard crash at random intervals. Usually this is around 6 to 12 hours. It happens more often if you add more tasks.
From safemode.py. Kill switch off. 2023-05-01 21:45:06, supervisor.RunReason.STARTUP, microcontroller.ResetReason.SOFTWARE, supervisor.SafeModeReason.HARD_FAULT
From main.py. Kill switch off. 2023-05-01 21:44:54, supervisor.RunReason.STARTUP, microcontroller.ResetReason.SOFTWARE, supervisor.SafeModeReason.NONE
From safemode.py. Kill switch off. 2023-05-02 00:18:08, supervisor.RunReason.STARTUP, microcontroller.ResetReason.SOFTWARE, supervisor.SafeModeReason.HARD_FAULT
From main.py. Kill switch off. 2023-05-02 00:18:03, supervisor.RunReason.STARTUP, microcontroller.ResetReason.SOFTWARE, supervisor.SafeModeReason.NONE
To track these hard faults I have safemode.py reboot the device back into normal operation, but log the reboot. I also log all normal boots from main.py. Then I dish control over to the actual code posted at the top of this bug report.
main.py
# This file is initially loaded by the ESP-32 S2 CircuitPython
# We watch for a kill switch, i.e. a button on GPIO1, and if it's on we
# don't proceed with running the main program.
from killswitch.killswitch import KillSwitch
from reboot.r import RebootReason
def main():
# Log the reboot reason
r = RebootReason()
ks = KillSwitch()
if ks.isKillSwitchOn():
print("Kill switch is on, so exiting code.")
r.logToFileWithWifi("From main.py. Kill switch on.")
# Create our display hardware object so we spit out our IP address to display
import display.display
d = display.display.Display()
# Dump log file
r.dumpLogFile()
else:
print("Kill switch is off, so proceeding with running massive main code...")
r.logToFileWithWifi("From main.py. Kill switch off.")
# Create our display hardware object so we spit out our IP address to display
import display.display
d = display.display.Display()
# Dump log file
r.dumpLogFile()
# import main_kitchensink
import main_test_why_circuitpython_hardfaults
main()
safemode.py
# This file is the file initially loaded by the ESP-32 S2 CircuitPython
# after booting into safemode. We're going to just log that we went into safemode
# and then do a full reboot back to normal mode. So, we're basically overriding this
# safety measure as we're in production and we know we have good code. So, I don't
# want to get the Marble Run into a state where it's not responding. I have seen
# reboots to safe mode every several hours and I'm not sure why, so just handle this by rebooting.
from killswitch.killswitch import KillSwitch
from reboot.r import RebootReason
def main():
# Log the reboot reason
r = RebootReason()
ks = KillSwitch()
if ks.isKillSwitchOn():
print("Kill switch is on, so exiting code.")
r.logToFile("From safemode.py. Kill switch on.")
# Create our display hardware object so we spit out our IP address to display
import display.display
d = display.display.Display()
# Dump log file
r.dumpLogFile()
else:
print("Kill switch is not on, so proceeding with rebooting to normal mode...")
r.logToFile("From safemode.py. Kill switch off.")
# Create our display hardware object so we spit out our IP address to display
import display.display
d = display.display.Display()
# Dump log file
r.dumpLogFile()
r.rebootToNormalMode()
main()
reboot/r.py
"""This file will look at what the boot reason was for and then log it in a file."""
import supervisor
import microcontroller
from adafruit_datetime import datetime as adatetime
import storage
class RebootReason:
def __init__(self) -> None:
print("Running Reboot Reason")
self.fileNameLog = "rebootreason.txt"
def checkReason(self):
print("Supervisor.runtime.run_reason:", supervisor.runtime.run_reason)
# print("Supervisor.runtime.autoreload:", supervisor.runtime.autoreload)
print("Supervisor.runtime.usb_connected:", supervisor.runtime.usb_connected)
print("Microcontroller.Processor.reset_reason:", microcontroller.Processor.reset_reason)
print("Microcontroller.cpu.reset_reason:",microcontroller.cpu.reset_reason)
print("supervisor.runtime.safe_mode_reason:", supervisor.runtime.safe_mode_reason)
def logToFile(self, prefix):
# Remount so CircuitPython can write to the drive
storage.remount("/", readonly=False)
f = open(self.fileNameLog, "a")
f.write(
# "{} {}, supervisor.runtime.run_reason:{}, microcontroller.cpu.reset_reason:{}, supervisor.runtime.safe_mode_reason:{}\n".format(
"{} {}, {}, {}, {}\n".format(
prefix,
adatetime.now(),
supervisor.runtime.run_reason,
microcontroller.cpu.reset_reason,
supervisor.runtime.safe_mode_reason
))
f.flush()
f.close()
def logToFileWithWifi(self, prefix):
# Reset the datetime on the ESP32 from NTP since we have a network connection
import rtc
import socketpool
import wifi
import adafruit_ntp
pool = socketpool.SocketPool(wifi.radio)
ntp = adafruit_ntp.NTP(pool, tz_offset=-6 )
# NOTE: This changes the system time so make sure you aren't assuming that time
# doesn't jump.
rtc.RTC().datetime = ntp.datetime
# Do normal logging with correct timestamp
self.logToFile(prefix)
def dumpLogFile(self):
f = open(self.fileNameLog, "r")
for line in f:
print(line)
f.close()
def d(self):
self.dumpLogFile()
def rebootToUf2Mode(self):
microcontroller.on_next_reset(microcontroller.RunMode.UF2)
microcontroller.reset()
def rebootToSafeMode(self):
microcontroller.on_next_reset(microcontroller.RunMode.SAFE_MODE)
microcontroller.reset()
def rebootToNormalMode(self):
microcontroller.on_next_reset(microcontroller.RunMode.NORMAL)
microcontroller.reset()
def rebootToBootLoaderMode(self):
microcontroller.on_next_reset(microcontroller.RunMode.BOOTLOADER)
microcontroller.reset()
And this is the super simple killswitch.py which you really don't need, but in case you read the code above you'd want to see this basic class.
# This class handles the kill switch on the Marble Run
# The user gets to toggle the kill switch and if it's off, the main code
# runs, but if it's on then the main.py code just exits so you can debug
#
# We are literally just doing one test here on boot. So this is very
# straightforward code.
import board
import digitalio
class KillSwitch:
def __init__(self):
print("Initting Kill Switch library...")
self._pinKillSwitchPin = board.IO1
self._pinKillSwitch = digitalio.DigitalInOut(self._pinKillSwitchPin)
self._pinKillSwitch.switch_to_input(pull=digitalio.Pull.UP)
def isKillSwitchOn(self):
val = not self._pinKillSwitch.value
print("KillSwitch:", val)
return val
Description
No response
Additional information
No response