@@ -148,7 +148,7 @@ func (c *checkpointer) Checkpoint(ctx context.Context, cogletCmd *exec.Cmd, wait
148148 }
149149
150150 // CRIU checkpoint (leaving process running)
151- cmd = exec .CommandContext (ctx , criuPath , "dump" , "--leave-running" , "--tcp-close" , "--images-dir" , filepath .Join (c .checkpointDir , checkpointSubdirName ), "--tree" , pid )
151+ cmd = exec .CommandContext (ctx , criuPath , "dump" , "--shell-job" , "-- leave-running" , "--tcp-close" , "--images-dir" , filepath .Join (c .checkpointDir , checkpointSubdirName ), "--tree" , pid )
152152 if err := cmd .Run (); err != nil {
153153 // Try to toggle CUDA back on. If we aren't able to restart CUDA, the process
154154 // will hang indefinitely, so we should kill it and try to start a new one
@@ -184,7 +184,7 @@ func (c *checkpointer) Restore(ctx context.Context) (*exec.Cmd, func(context.Con
184184 }
185185
186186 // Set up restore command
187- restoreCmd := exec .CommandContext (ctx , criuPath , "restore" , "--tcp-close" , "--images-dir" , filepath .Join (c .checkpointDir , checkpointSubdirName ))
187+ restoreCmd := exec .CommandContext (ctx , criuPath , "restore" , "--shell-job" , "-- tcp-close" , "--images-dir" , filepath .Join (c .checkpointDir , checkpointSubdirName ))
188188
189189 // Set up callback function once restore is started
190190 callback := func (con context.Context ) error {
0 commit comments