-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[lldb] Implement basic support for reverse-continue #112079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Robert O'Callahan (rocallahan) ChangesThis commit only adds support for the This feature depends on a gdbserver implementation (e.g. Patch is 65.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112079.diff 32 Files Affected:
diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h
index 1624e02070b1b2..8b8ed830b54cc0 100644
--- a/lldb/include/lldb/API/SBProcess.h
+++ b/lldb/include/lldb/API/SBProcess.h
@@ -159,6 +159,7 @@ class LLDB_API SBProcess {
lldb::SBError Destroy();
lldb::SBError Continue();
+ lldb::SBError Continue(RunDirection direction);
lldb::SBError Stop();
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index b8c53a474ba6b9..fe7fbc50fd5770 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -857,10 +857,10 @@ class Process : public std::enable_shared_from_this<Process>,
/// \see Thread:Resume()
/// \see Thread:Step()
/// \see Thread:Suspend()
- Status Resume();
+ Status Resume(lldb::RunDirection direction = lldb::eRunForward);
/// Resume a process, and wait for it to stop.
- Status ResumeSynchronous(Stream *stream);
+ Status ResumeSynchronous(Stream *stream, lldb::RunDirection direction = lldb::eRunForward);
/// Halts a running process.
///
@@ -1104,9 +1104,14 @@ class Process : public std::enable_shared_from_this<Process>,
/// \see Thread:Resume()
/// \see Thread:Step()
/// \see Thread:Suspend()
- virtual Status DoResume() {
- return Status::FromErrorStringWithFormatv(
- "error: {0} does not support resuming processes", GetPluginName());
+ virtual Status DoResume(lldb::RunDirection direction) {
+ if (direction == lldb::RunDirection::eRunForward) {
+ return Status::FromErrorStringWithFormatv(
+ "error: {0} does not support resuming processes", GetPluginName());
+ } else {
+ return Status::FromErrorStringWithFormatv(
+ "error: {0} does not support reverse execution of processes", GetPluginName());
+ }
}
/// Called after resuming a process.
@@ -2332,6 +2337,8 @@ class Process : public std::enable_shared_from_this<Process>,
bool IsRunning() const;
+ lldb::RunDirection GetLastRunDirection() { return m_last_run_direction; }
+
DynamicCheckerFunctions *GetDynamicCheckers() {
return m_dynamic_checkers_up.get();
}
@@ -2851,7 +2858,7 @@ void PruneThreadPlans();
///
/// \return
/// An Status object describing the success or failure of the resume.
- Status PrivateResume();
+ Status PrivateResume(lldb::RunDirection direction = lldb::eRunForward);
// Called internally
void CompleteAttach();
@@ -3127,6 +3134,8 @@ void PruneThreadPlans();
// m_currently_handling_do_on_removals are true,
// Resume will only request a resume, using this
// flag to check.
+ // The direction of execution from the last time this process was resumed.
+ lldb::RunDirection m_last_run_direction;
lldb::tid_t m_interrupt_tid; /// The tid of the thread that issued the async
/// interrupt, used by thread plan timeout. It
diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h
index fae90364deaf0a..072f71f6b1122f 100644
--- a/lldb/include/lldb/Target/StopInfo.h
+++ b/lldb/include/lldb/Target/StopInfo.h
@@ -142,6 +142,12 @@ class StopInfo : public std::enable_shared_from_this<StopInfo> {
static lldb::StopInfoSP
CreateStopReasonProcessorTrace(Thread &thread, const char *description);
+ // This creates a StopInfo indicating that execution stopped because
+ // it was replaying some recorded execution history, and execution reached
+ // the end of that recorded history.
+ static lldb::StopInfoSP
+ CreateStopReasonHistoryBoundary(Thread &thread, const char *description);
+
static lldb::StopInfoSP CreateStopReasonFork(Thread &thread,
lldb::pid_t child_pid,
lldb::tid_t child_tid);
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index 938f6e3abe8f2a..232d1dfdb5c9d0 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -135,6 +135,9 @@ FLAGS_ENUM(LaunchFlags){
/// Thread Run Modes.
enum RunMode { eOnlyThisThread, eAllThreads, eOnlyDuringStepping };
+/// Execution directions
+enum RunDirection { eRunForward, eRunReverse };
+
/// Byte ordering definitions.
enum ByteOrder {
eByteOrderInvalid = 0,
@@ -254,6 +257,9 @@ enum StopReason {
eStopReasonVFork,
eStopReasonVForkDone,
eStopReasonInterrupt, ///< Thread requested interrupt
+ // Indicates that execution stopped because the debugger backend relies
+ // on recorded data and we reached the end of that data.
+ eStopReasonHistoryBoundary,
};
/// Command Return Status Types.
diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
index 1784487323ad6b..732d6171320680 100644
--- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
+++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
@@ -510,8 +510,9 @@ def start(self):
self._thread.start()
def stop(self):
- self._thread.join()
- self._thread = None
+ if self._thread is not None:
+ self._thread.join()
+ self._thread = None
def get_connect_address(self):
return self._socket.get_connect_address()
diff --git a/lldb/packages/Python/lldbsuite/test/lldbgdbproxy.py b/lldb/packages/Python/lldbsuite/test/lldbgdbproxy.py
new file mode 100644
index 00000000000000..2a9592bf4545a4
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/lldbgdbproxy.py
@@ -0,0 +1,175 @@
+import logging
+import os
+import os.path
+import random
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.gdbclientutils import *
+import lldbgdbserverutils
+from lldbsuite.support import seven
+
+
+class GDBProxyTestBase(TestBase):
+ """
+ Base class for gdbserver proxy tests.
+
+ This class will setup and start a mock GDB server for the test to use.
+ It pases through requests to a regular lldb-server/debugserver and
+ forwards replies back to the LLDB under test.
+ """
+
+ """The gdbserver that we implement."""
+ server = None
+ """The inner lldb-server/debugserver process that we proxy requests into."""
+ monitor_server = None
+ monitor_sock = None
+
+ server_socket_class = TCPServerSocket
+
+ DEFAULT_TIMEOUT = 20 * (10 if ("ASAN_OPTIONS" in os.environ) else 1)
+
+ _verbose_log_handler = None
+ _log_formatter = logging.Formatter(fmt="%(asctime)-15s %(levelname)-8s %(message)s")
+
+ def setUpBaseLogging(self):
+ self.logger = logging.getLogger(__name__)
+
+ if len(self.logger.handlers) > 0:
+ return # We have set up this handler already
+
+ self.logger.propagate = False
+ self.logger.setLevel(logging.DEBUG)
+
+ # log all warnings to stderr
+ handler = logging.StreamHandler()
+ handler.setLevel(logging.WARNING)
+ handler.setFormatter(self._log_formatter)
+ self.logger.addHandler(handler)
+
+ def setUp(self):
+ TestBase.setUp(self)
+
+ self.setUpBaseLogging()
+
+ if self.isVerboseLoggingRequested():
+ # If requested, full logs go to a log file
+ log_file_name = self.getLogBasenameForCurrentTest() + "-proxy.log"
+ self._verbose_log_handler = logging.FileHandler(
+ log_file_name
+ )
+ self._verbose_log_handler.setFormatter(self._log_formatter)
+ self._verbose_log_handler.setLevel(logging.DEBUG)
+ self.logger.addHandler(self._verbose_log_handler)
+
+ lldb_server_exe = lldbgdbserverutils.get_lldb_server_exe()
+ if lldb_server_exe is None:
+ self.debug_monitor_exe = lldbgdbserverutils.get_debugserver_exe()
+ self.assertTrue(self.debug_monitor_exe is not None)
+ self.debug_monitor_extra_args = []
+ else:
+ self.debug_monitor_exe = lldb_server_exe
+ self.debug_monitor_extra_args = ["gdbserver"]
+
+ self.server = MockGDBServer(self.server_socket_class())
+ self.server.responder = self
+
+ def tearDown(self):
+ # TestBase.tearDown will kill the process, but we need to kill it early
+ # so its client connection closes and we can stop the server before
+ # finally calling the base tearDown.
+ if self.process() is not None:
+ self.process().Kill()
+ self.server.stop()
+
+ self.logger.removeHandler(self._verbose_log_handler)
+ self._verbose_log_handler = None
+
+ TestBase.tearDown(self)
+
+ def isVerboseLoggingRequested(self):
+ # We will report our detailed logs if the user requested that the "gdb-remote" channel is
+ # logged.
+ return any(("gdb-remote" in channel) for channel in lldbtest_config.channels)
+
+ def connect(self, target):
+ """
+ Create a process by connecting to the mock GDB server.
+ """
+ self.prep_debug_monitor_and_inferior()
+ self.server.start()
+
+ listener = self.dbg.GetListener()
+ error = lldb.SBError()
+ process = target.ConnectRemote(
+ listener, self.server.get_connect_url(), "gdb-remote", error
+ )
+ self.assertTrue(error.Success(), error.description)
+ self.assertTrue(process, PROCESS_IS_VALID)
+ return process
+
+ def get_next_port(self):
+ return 12000 + random.randint(0, 3999)
+
+ def prep_debug_monitor_and_inferior(self):
+ inferior_exe_path = self.getBuildArtifact("a.out")
+ self.connect_to_debug_monitor([inferior_exe_path])
+ self.assertIsNotNone(self.monitor_server)
+ self.initial_handshake()
+
+ def initial_handshake(self):
+ self.monitor_server.send_packet(seven.bitcast_to_bytes("+"))
+ reply = seven.bitcast_to_string(self.monitor_server.get_normal_packet())
+ self.assertEqual(reply, "+")
+ self.monitor_server.send_packet(seven.bitcast_to_bytes("QStartNoAckMode"))
+ reply = seven.bitcast_to_string(self.monitor_server.get_normal_packet())
+ self.assertEqual(reply, "+")
+ reply = seven.bitcast_to_string(self.monitor_server.get_normal_packet())
+ self.assertEqual(reply, "OK")
+ self.monitor_server.send_packet(seven.bitcast_to_bytes("+"))
+ reply = seven.bitcast_to_string(self.monitor_server.get_normal_packet())
+ self.assertEqual(reply, "+")
+
+ def get_debug_monitor_command_line_args(self, connect_address, launch_args):
+ return self.debug_monitor_extra_args + ["--reverse-connect", connect_address] + launch_args
+
+ def launch_debug_monitor(self, launch_args):
+ family, type, proto, _, addr = socket.getaddrinfo(
+ "localhost", 0, proto=socket.IPPROTO_TCP
+ )[0]
+ sock = socket.socket(family, type, proto)
+ sock.settimeout(self.DEFAULT_TIMEOUT)
+ sock.bind(addr)
+ sock.listen(1)
+ addr = sock.getsockname()
+ connect_address = "[{}]:{}".format(*addr)
+
+ commandline_args = self.get_debug_monitor_command_line_args(
+ connect_address, launch_args
+ )
+
+ # Start the server.
+ self.logger.info(f"Spawning monitor {commandline_args}")
+ monitor_process = self.spawnSubprocess(
+ self.debug_monitor_exe, commandline_args, install_remote=False
+ )
+ self.assertIsNotNone(monitor_process)
+
+ self.monitor_sock = sock.accept()[0]
+ self.monitor_sock.settimeout(self.DEFAULT_TIMEOUT)
+ return monitor_process
+
+ def connect_to_debug_monitor(self, launch_args):
+ monitor_process = self.launch_debug_monitor(launch_args)
+ self.monitor_server = lldbgdbserverutils.Server(self.monitor_sock, monitor_process)
+
+ def respond(self, packet):
+ """Subclasses can override this to change how packets are handled."""
+ return self.pass_through(packet)
+
+ def pass_through(self, packet):
+ self.logger.info(f"Sending packet {packet}")
+ self.monitor_server.send_packet(seven.bitcast_to_bytes(packet))
+ reply = seven.bitcast_to_string(self.monitor_server.get_normal_packet())
+ self.logger.info(f"Received reply {reply}")
+ return reply
diff --git a/lldb/packages/Python/lldbsuite/test/lldbreverse.py b/lldb/packages/Python/lldbsuite/test/lldbreverse.py
new file mode 100644
index 00000000000000..0f02fdffbdeada
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/lldbreverse.py
@@ -0,0 +1,418 @@
+import os
+import os.path
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.gdbclientutils import *
+from lldbsuite.test.lldbgdbproxy import *
+import lldbgdbserverutils
+import re
+
+
+class ThreadSnapshot:
+ def __init__(self, thread_id, registers):
+ self.thread_id = thread_id
+ self.registers = registers
+
+
+class MemoryBlockSnapshot:
+ def __init__(self, address, data):
+ self.address = address
+ self.data = data
+
+
+class StateSnapshot:
+ def __init__(self, thread_snapshots, memory):
+ self.thread_snapshots = thread_snapshots
+ self.memory = memory
+ self.thread_id = None
+
+
+class RegisterInfo:
+ def __init__(self, lldb_index, bitsize, little_endian):
+ self.lldb_index = lldb_index
+ self.bitsize = bitsize
+ self.little_endian = little_endian
+
+
+BELOW_STACK_POINTER = 16384
+ABOVE_STACK_POINTER = 4096
+
+BLOCK_SIZE = 1024
+
+SOFTWARE_BREAKPOINTS = 0
+HARDWARE_BREAKPOINTS = 1
+WRITE_WATCHPOINTS = 2
+
+
+class ReverseTestBase(GDBProxyTestBase):
+ """
+ Base class for tests that need reverse execution.
+
+ This class uses a gdbserver proxy to add very limited reverse-
+ execution capability to lldb-server/debugserver for testing
+ purposes only.
+
+ To use this class, run the inferior forward until some stopping point.
+ Then call `start_recording()` and execute forward again until reaching
+ a software breakpoint; this class records the state before each execution executes.
+ At that point, the server will accept "bc" and "bs" packets to step
+ backwards through the state.
+ When executing during recording, we only allow single-step and continue without
+ delivering a signal, and only software breakpoint stops are allowed.
+
+ We assume that while recording is enabled, the only effects of instructions
+ are on general-purpose registers (read/written by the 'g' and 'G' packets)
+ and on memory bytes between [SP - BELOW_STACK_POINTER, SP + ABOVE_STACK_POINTER).
+ """
+
+ """
+ A list of StateSnapshots in time order.
+
+ There is one snapshot per single-stepped instruction,
+ representing the state before that instruction was
+ executed. The last snapshot in the list is the
+ snapshot before the last instruction was executed.
+ This is an undo log; we snapshot a superset of the state that may have
+ been changed by the instruction's execution.
+ """
+ snapshots = None
+ recording_enabled = False
+
+ breakpoints = None
+
+ pid = None
+
+ pc_register_info = None
+ sp_register_info = None
+ general_purpose_register_info = None
+
+ def __init__(self, *args, **kwargs):
+ GDBProxyTestBase.__init__(self, *args, **kwargs)
+ self.breakpoints = [set(), set(), set(), set(), set()]
+
+ def respond(self, packet):
+ if not packet:
+ raise ValueError("Invalid empty packet")
+ if packet == self.server.PACKET_INTERRUPT:
+ # Don't send a response. We'll just run to completion.
+ return []
+ if self.is_command(packet, "qSupported", ":"):
+ reply = self.pass_through(packet)
+ return reply + ";ReverseStep+;ReverseContinue+"
+ if self.is_command(packet, "vCont", ";"):
+ if self.recording_enabled:
+ return self.continue_with_recording(packet)
+ snapshots = []
+ if packet[0] == "c" or packet[0] == "s" or packet[0] == "C" or packet[0] == "S":
+ raise ValueError("LLDB should not be sending old-style continuation packets")
+ if packet == "bc":
+ return self.reverse_continue()
+ if packet == "bs":
+ return self.reverse_step()
+ if packet == 'jThreadsInfo':
+ # Suppress this because it contains thread stop reasons which we might
+ # need to modify, and we don't want to have to implement that.
+ return ""
+ if packet[0] == "z" or packet[0] == "Z":
+ reply = self.pass_through(packet)
+ if reply == "OK":
+ self.update_breakpoints(packet)
+ return reply
+ return GDBProxyTestBase.respond(self, packet)
+
+ def start_recording(self):
+ self.recording_enabled = True
+ self.snapshots = []
+
+ def stop_recording(self):
+ """
+ Don't record when executing foward.
+
+ Reverse execution is still supported until the next forward continue.
+ """
+ self.recording_enabled = False
+
+ def is_command(self, packet, cmd, follow_token):
+ return packet == cmd or packet[0:len(cmd) + 1] == cmd + follow_token
+
+ def update_breakpoints(self, packet):
+ m = re.match("([zZ])([01234]),([0-9a-f]+),([0-9a-f]+)", packet)
+ if m is None:
+ raise ValueError("Invalid breakpoint packet: " + packet)
+ t = int(m.group(2))
+ addr = int(m.group(3), 16)
+ kind = int(m.group(4), 16)
+ if m.group(1) == 'Z':
+ self.breakpoints[t].add((addr, kind))
+ else:
+ self.breakpoints[t].discard((addr, kind))
+
+ def breakpoint_triggered_at(self, pc):
+ if any(addr == pc for addr, kind in self.breakpoints[SOFTWARE_BREAKPOINTS]):
+ return True
+ if any(addr == pc for addr, kind in self.breakpoints[HARDWARE_BREAKPOINTS]):
+ return True
+ return False
+
+ def watchpoint_triggered(self, new_value_block, current_contents):
+ """Returns the address or None."""
+ for watch_addr, kind in breakpoints[WRITE_WATCHPOINTS]:
+ for offset in range(0, kind):
+ addr = watch_addr + offset
+ if (addr >= new_value_block.address and
+ addr < new_value_block.address + len(new_value_block.data)):
+ index = addr - new_value_block.address
+ if new_value_block.data[index*2:(index + 1)*2] != current_contents[index*2:(index + 1)*2]:
+ return watch_addr
+ return None
+
+ def continue_with_recording(self, packet):
+ self.logger.debug("Continue with recording enabled")
+
+ step_packet = "vCont;s"
+ if packet == "vCont":
+ requested_step = False
+ else:
+ m = re.match("vCont;(c|s)(.*)", packet)
+ if m is None:
+ raise ValueError("Unsupported vCont packet: " + packet)
+ requested_step = m.group(1) == 's'
+ step_packet += m.group(2)
+
+ while True:
+ snapshot = self.capture_snapshot()
+ reply = self.pass_through(step_packet)
+ (stop_signal, stop_pairs) = self.parse_stop(reply)
+ if stop_signal != 5:
+ raise ValueError("Unexpected stop signal: " + reply)
+ is_swbreak = False
+ thread_id = None
+ for key, value in stop_pairs.items():
+ if key == "thread":
+ thread_id = self.parse_thread_id(value)
+ continue
+ if re.match('[0-9a-f]+', key):
+ continue
+ if key == "swbreak" or (key == "reason" and value == "breakpoint"):
+ is_swbreak = True
+ continue
+ if key in ["name", "threads", "thread-pcs", "reason"]:
+ continue
+ raise ValueError(f"Unknown stop key '{key}' in {reply}")
+ if is_swbreak:
+ self.logger.debug("Recording stopped")
+ return reply
+ if thread_id is None:
+ return ValueError("Expected thread ID: " + reply)
+ snapshot.thread_id = thread_id
+ self.snapshots.append(snaps...
[truncated]
|
✅ With the latest revision this PR passed the Python code formatter. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
9f33f8e
to
c373425
Compare
Answering #99736 (comment) and #99736 (comment):
Not that I'm aware of, unfortunately.
Darwin uses a different debug agent called |
I adapted the code from here llvm-project/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py Line 516 in a62768c
lldbgdbproxy.py :
That's supposed to select |
It should, and I suspect (I haven't seen the error msg, do you have a link?) that the problem isn't in this part of the code. If I had to guess, I'd say that this is failing because macos generates slightly different packet sequences, and the test is not expecting those. |
Makes sense. But given Mac tests don't run against PRs, and I don't have a Mac, there's no easy way for me to debug this. |
I suggest @medismailben (or an Apple colleague he nominates), runs this test locally and does their best to narrow down the problem. @rocallahan does I can do the same for process Arm Linux, but I will wait and see what turns up on Mac so we aren't duplicating the work. We can skip on some platforms but I'd really like to avoid that unless it's something fundamental about the way the test itself works. Apart from anything else, it means you don't have to tell |
Starting a build / test for this branch right now! I'll give an update when its done running. |
Looks like the test is still trying to run |
Also worth noting that the test is attaching. That's probably because on darwin we are launching the process in suspended mode and then asking the debugserver to attach to it. That's might not be what the packet forwarder expects (or is it?) |
This didn't work because we also build |
No, it doesn't. It does work on Linux AAarch64 though.
Hmm, so lldb-server is built on macOS but we're not supposed to use it? |
Yeah I was also surprised to see that but I guess it doesn't hurt. I don't think we can use it because it doesn't have any support for macOS which also explains the crash. Honestly, given that rr only works on linux aarch64, I think it would be fine if you skipped that test on Darwin platforms (by adding an |
I'd like the tests to work on Mac. I'm getting some assistance and we'll report back in a few days. |
On Oct 19, 2024, at 8:29 PM, Med Ismail Bennani ***@***.***> wrote:
@rocallahan <https://github.com/rocallahan> does rr work on Mac?
No, it doesn't. It does work on Linux AAarch64 though.
This didn't work because we also build lldb-server on macOS.
Hmm, so lldb-server is built on macOS but we're not supposed to use it?
lldb-server on Darwin works in platform mode, but not for process control. We already had debugserver working which had a very different looking codebase, and we never merged the process control parts of debugserver into lldb-server.
Jim
… Yeah I was also surprised to see that but I guess it doesn't hurt. I don't think we can use it because it doesn't have any support for macOS which also explains the crash. Honestly I think it would be fine if you skipped that test on Darwin platforms (by adding an @skipDarwin decorator at the beginning of each test_* methods). @JDevlieghere <https://github.com/JDevlieghere> @jimingham <https://github.com/jimingham> do you have any objection to that ?
—
Reply to this email directly, view it on GitHub <#112079 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/ADUPVW5UJY7MLFI5PUWIRHLZ4MPRXAVCNFSM6AAAAABP2BOVAOVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDIMRUGQ4TMOBTHE>.
You are receiving this because you were mentioned.
|
c373425
to
bb96e6d
Compare
The approach I implemented fails when evaluating a breakpoint condition requires running a function in the target process, e.g. evaluating a breakpoint condition that's a function call, or calling In this case we need the So I tried adding a virtual function An alternative approach would be to store a "current direction" in @jimingham this all sounds like your territory. |
Giving ThreadPlans a direction gets complicated when there are multiple runnable threads, because the topmost plans for different runnable threads could disagree about the execution direction, which is obviously unworkable. In my experimental branch I linked above, I resolve this by defaulting to forward execution if there's a disagreement and popping reverse-execution plans from runnable threads until all the topmost plans agree on forward execution. But this means that reverse-continue has to push a |
Maybe we can handle things like "forward execution of a function call plan while backwards execution of a thread" by having thread plans have an "actual run direction" - which is "forward" or "backwards" and a "run intention" - which is "forward", "backwards" or "no opinion". If a plan has no opinion, then we query its parent. When we're querying the threads to see who is forward going and who backwards, we'd use the "run intention". Function calling thread plans would have "no intention" so they wouldn't move their "backwards" running threads into the forward camp, requiring their ReverseContinue plans to get popped... That would mean when a thread is in the "backwards" state, its function calls have to be "stop others". But I don't think anything else would work, it's not like you are going to run some other thread in some direction to get it to try to release a lock so the thread the function is running on can make progress or anything like that, which is the reason we run function calls on all threads... |
Thanks, that sounds OK but I don't see how it would help with my main question right now:
Put it another way: if we change the plan stack state to request reverse execution (push a |
My notion was that when you push a plan that has an intention to run in a direction, if the plan above it on the stack was going in the opposite direction, it has to be popped. But plans that have "no opinion" don't change the stack above them. |
I also think in the long run we need a more flexible procedure. After all, you might be doing a backwards direction next, but hit a breakpoint while doing that. You then want to step FORWARDS from that breakpoint to check something out. Then if stepping backwards and forwards were working like lldb stepping currently works, you'd expect "continue" to return control to the backwards next to finish out the operation. So I don't think in the long run we should be kicking all backwards plans off the stack when we start to go forwards. I think instead we should use the IsPlanStale mechanism to tell when a backwards plan can no longer do its job. |
That would require that the thread wouldn't have a direction itself, but the direction would be governed by whoever explained the stop and set up the next phase of execution. Since all the threads need to work in lock step, that probably means that That's linking the thread plans from different threads, which we haven't done yet, but I think that's better than trying to hold some state outside the thread plan stack and some in it. And it should be a fairly loose coupling since the other threads are mostly asking "am I done yet" of the plan that is actually driving. |
That's in the current code in this PR but I agree that was the wrong direction. My experimental reverse-continue-thread-plan branch doesn't do this.
Right.
There's a UX issue here: in the scenario you describe (hit breakpoint during reverse-next, step forward, continue) should the reverse-step be resumed? Personally I think not. I think it's very confusing if "continue" ever causes reverse execution (and likewise if "reverse-continue" ever causes forward execution). So in this case I would say "reverse-continue" should resume the reverse-next but "continue" should pop it. This is helpful discussion but it doesn't directly address my question in #112079 (comment). Can we focus on that? |
Yes, I need someone to merge it. Thanks! |
Hmm.. could you update the patch description with the text you want the final commit to contain. I see that at least the API names have changed during the review, but maybe there are other things (?) |
This commit only adds support for the `SBProcess::ContinueInDirection()` API. A user-accessible command for this will follow in a later commit. This feature depends on a gdbserver implementation (e.g. `rr`) providing support for the `bc` and `bs` packets. `lldb-server` does not support those packets, and there is no plan to change that. For testing purposes, this commit adds a Python implementation of *very limited* record-and-reverse-execute functionality, implemented as a proxy between lldb and lldb-server in `lldbreverse.py`. This should not (and in practice cannot) be used for anything except testing.
This allows `SBProcess::ContinueInDirection()` to error out early if the target does not support any kind of reverse execution.
39b5c70
to
2dc4acb
Compare
The API names in the commit messages are correct. But I noticed that the commit message for the main commit was somehow corrupted. I've fixed that now. Or by "patch description" do you mean some other metadata? |
This is a point of confusion, the LLVM project is set up to take the PR description, #112079 (comment) (the one you see on the page where this comment is also) as the commit message for the squashed commits. As opposed to using the combination of the commits in the PR, or picking the last one, or whatever else GitHub allows. The merits of this - out of scope, but the point is that there is a chance to edit this description before it's used, but only the one merging gets that chance. So if you had the ability to merge you could do that and that's fine. But you don't so we need to know what it should be and the easiest way to do that is update the PR's description and all we have to do is click the button and accept what GitHub chooses. If you have a really good reason to keep the PR description as is (perhaps it includes a bunch of context for reviewers that might be useful for archaeology), then you can just write a new comment and we'll copy that. If this seems like I'm being overly verbose and maybe a bit insulting, that's not my intent, it's just a very tricky distinction to convey and I spent this morning also writing about it elsewhere :) . It's not perfect but it is what it is for the time being. |
If the final message is in fact just the several commit messages, and that describes the changes well, then great! Doesn't have to be a new, unique thing. |
No, this is pretty clear and helpful, thanks! I've updated the PR description. |
Thanks for jumping in with the explanation, David. Pushing the button now. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/10218 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/3828 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/197/builds/979 Here is the relevant piece of the build log for the reference
|
And pushing the revert button now. :( The remote failure I think we should just slap |
Reverts #112079 due to failures on the arm bot.
…e" (#123906) Reverts llvm/llvm-project#112079 due to failures on the arm bot.
FYI,
|
This is because the test is saving an area of memory that is readable but not writeable, even via ptrace.
If we look at AArch64's memory map there is no region above the stack:
But on x86_64 there is:
I think we can probably change the logic such that we are rounding up the amount of memory below the stack pointer, but rounding down the amount above it. Based on assumptions that the stack pointer will start 1k aligned anyway. There is also the possibility that the stack pointer is not actually 1k away from its start point in the first place, though if you're on anything but an embedded system, this seems like a reasonable assumption. I will look at the maths and find a way to fix it. |
The failure was on the arm(32), not aarch64. (but yes, ptrace-read-only memory sounds like a plausible explanation) |
Yes, I presented AArch64 as the counter example to show where it does work, but didn't make that clear. I've got a solution for this bit, but the watchpoint test is still failing one part on Arm 32 bit. |
…3906)" This reverts commit 22561cf and fixes b7b9ccf (llvm#112079). The problem is that x86_64 and Arm 32-bit have memory regions above the stack that are readable but not writeable. First Arm: ``` (lldb) memory region --all <...> [0x00000000fffcf000-0x00000000ffff0000) rw- [stack] [0x00000000ffff0000-0x00000000ffff1000) r-x [vectors] [0x00000000ffff1000-0xffffffffffffffff) --- ``` Then x86_64: ``` $ cat /proc/self/maps <...> 7ffdcd148000-7ffdcd16a000 rw-p 00000000 00:00 0 [stack] 7ffdcd193000-7ffdcd196000 r--p 00000000 00:00 0 [vvar] 7ffdcd196000-7ffdcd197000 r-xp 00000000 00:00 0 [vdso] ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] ``` Compare this to AArch64 where the test did pass: ``` $ cat /proc/self/maps <...> ffffb87dc000-ffffb87dd000 r--p 00000000 00:00 0 [vvar] ffffb87dd000-ffffb87de000 r-xp 00000000 00:00 0 [vdso] ffffb87de000-ffffb87e0000 r--p 0002a000 00:3c 76927217 /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 ffffb87e0000-ffffb87e2000 rw-p 0002c000 00:3c 76927217 /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 fffff4216000-fffff4237000 rw-p 00000000 00:00 0 [stack] ``` To solve this, look up the memory region of the stack pointer and constrain the read to within that region. Since we know the stack is all readable and writeable.
#123945 to fix the testing. |
…" (#123945) This reverts commit 22561cf and fixes b7b9ccf (#112079). The problem is that x86_64 and Arm 32-bit have memory regions above the stack that are readable but not writeable. First Arm: ``` (lldb) memory region --all <...> [0x00000000fffcf000-0x00000000ffff0000) rw- [stack] [0x00000000ffff0000-0x00000000ffff1000) r-x [vectors] [0x00000000ffff1000-0xffffffffffffffff) --- ``` Then x86_64: ``` $ cat /proc/self/maps <...> 7ffdcd148000-7ffdcd16a000 rw-p 00000000 00:00 0 [stack] 7ffdcd193000-7ffdcd196000 r--p 00000000 00:00 0 [vvar] 7ffdcd196000-7ffdcd197000 r-xp 00000000 00:00 0 [vdso] ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] ``` Compare this to AArch64 where the test did pass: ``` $ cat /proc/self/maps <...> ffffb87dc000-ffffb87dd000 r--p 00000000 00:00 0 [vvar] ffffb87dd000-ffffb87de000 r-xp 00000000 00:00 0 [vdso] ffffb87de000-ffffb87e0000 r--p 0002a000 00:3c 76927217 /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 ffffb87e0000-ffffb87e2000 rw-p 0002c000 00:3c 76927217 /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 fffff4216000-fffff4237000 rw-p 00000000 00:00 0 [stack] ``` To solve this, look up the memory region of the stack pointer (using https://lldb.llvm.org/resources/lldbgdbremote.html#qmemoryregioninfo-addr) and constrain the read to within that region. Since we know the stack is all readable and writeable. I have also added skipIfRemote to the tests, since getting them working in that context is too complex to be worth it. Memory write failures now display the range they tried to write, and register write errors will show the name of the register where possible. The patch also includes a workaround for a an issue where the test code could mistake an `x` response that happens to begin with an `O` for an output packet (stdout). This workaround will not be necessary one we start using the [new implementation](https://discourse.llvm.org/t/rfc-fixing-incompatibilties-of-the-x-packet-w-r-t-gdb/84288) of the `x` packet. --------- Co-authored-by: Pavel Labath <pavel@labath.sk>
This commit adds support for a
SBProcess::ContinueInDirection()
API. A user-accessible command for this will follow in a later commit.This feature depends on a gdbserver implementation (e.g.
rr
) providing support for thebc
andbs
packets.lldb-server
does not support those packets, and there is no plan to change that. For testing purposes, this commit adds a Python implementation of very limited record-and-reverse-execute functionality, implemented as a proxy between lldb and lldb-server inlldbreverse.py
. This should not (and in practice cannot) be used for anything except testing.The tests here are quite minimal but we test that simple breakpoints and watchpoints work as expected during reverse execution, and that conditional breakpoints and watchpoints work when the condition calls a function that must be executed in the forward direction.