replicate
diff --git a/‎internal/logging/logger.go‎
Lines changed: 32 additions & 2 deletions b/‎internal/logging/logger.go‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎internal/loggingtest/test_helper.go‎
Lines changed: 34 additions & 1 deletion b/‎internal/loggingtest/test_helper.go‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎internal/loggingtest/test_helper_test.go‎
Lines changed: 20 additions & 0 deletions b/‎internal/loggingtest/test_helper_test.go‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎internal/runner/manager.go‎
Lines changed: 8 additions & 8 deletions b/‎internal/runner/manager.go‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎internal/runner/runner.go‎
Lines changed: 22 additions & 22 deletions b/‎internal/runner/runner.go‎
Lines changed: 22 additions & 22 deletions
@@ -14,6 +14,26 @@ const (
 	TraceLevel = zapcore.Level(-8) // Below Debug (-4)
 )
 
+// customLowercaseLevelEncoder handles our custom Trace level display (lowercase)
+func customLowercaseLevelEncoder(level zapcore.Level, enc zapcore.PrimitiveArrayEncoder) {
+	switch level {
+	case TraceLevel:
+		enc.AppendString("trace")
+	default:
+		zapcore.LowercaseLevelEncoder(level, enc)
+	}
+}
+
+// customColorLevelEncoder handles our custom Trace level display (with colors)
+func customColorLevelEncoder(level zapcore.Level, enc zapcore.PrimitiveArrayEncoder) {
+	switch level {
+	case TraceLevel:
+		enc.AppendString("\x1b[90mTRACE\x1b[0m") // Gray color for trace
+	default:
+		zapcore.CapitalColorLevelEncoder(level, enc)
+	}
+}
+
 // Logger embeds zap.Logger and adds Trace level support
 type Logger struct {
 	*zap.Logger
@@ -34,11 +54,11 @@ func New(name string) *Logger {
 	if isDevelopment {
 		cfg = zap.NewDevelopmentConfig()
 		cfg.Level = zap.NewAtomicLevelAt(zapcore.DebugLevel)
-		cfg.EncoderConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder
+		cfg.EncoderConfig.EncodeLevel = customColorLevelEncoder
 	} else {
 		cfg = zap.NewProductionConfig()
 		cfg.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel)
-		cfg.EncoderConfig.EncodeLevel = zapcore.LowercaseLevelEncoder
+		cfg.EncoderConfig.EncodeLevel = customLowercaseLevelEncoder
 	}
 
 	// Set log level from environment (COG_LOG_LEVEL takes precedence, fallback to LOG_LEVEL)
@@ -139,3 +159,13 @@ func (s *SugaredLogger) Trace(args ...any) {
 func (s *SugaredLogger) Tracew(msg string, keysAndValues ...any) {
 	s.Logw(TraceLevel, msg, keysAndValues...)
 }
+
+// Override With to return our custom SugaredLogger
+func (s *SugaredLogger) With(args ...any) *SugaredLogger {
+	return &SugaredLogger{SugaredLogger: s.SugaredLogger.With(args...)}
+}
+
+// Override Named to return our custom SugaredLogger
+func (s *SugaredLogger) Named(name string) *SugaredLogger {
+	return &SugaredLogger{SugaredLogger: s.SugaredLogger.Named(name)}
+}
@@ -3,15 +3,48 @@ package loggingtest
 import (
 	"testing"
 
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
 	"go.uber.org/zap/zaptest"
 
 	"github.com/replicate/cog-runtime/internal/logging"
 )
 
+// customTestLevelEncoder handles our custom Trace level display for tests
+func customTestLevelEncoder(level zapcore.Level, enc zapcore.PrimitiveArrayEncoder) {
+	switch level {
+	case logging.TraceLevel:
+		enc.AppendString("TRACE")
+	default:
+		zapcore.CapitalLevelEncoder(level, enc)
+	}
+}
+
 // NewTestLogger creates a logger for tests that outputs to t.Logf
 // Behaves exactly like zaptest.NewLogger but with trace support added
 func NewTestLogger(t *testing.T) *logging.Logger {
 	t.Helper()
-	zapLogger := zaptest.NewLogger(t)
+
+	// Create test logger with custom level encoder
+	zapLogger := zaptest.NewLogger(t,
+		zaptest.Level(logging.TraceLevel),
+		zaptest.WrapOptions(zap.WrapCore(func(core zapcore.Core) zapcore.Core {
+			// Replace the encoder to handle our custom trace level
+			enc := zapcore.NewConsoleEncoder(zapcore.EncoderConfig{
+				TimeKey:        "T",
+				LevelKey:       "L",
+				NameKey:        "N",
+				CallerKey:      "C",
+				MessageKey:     "M",
+				StacktraceKey:  "S",
+				LineEnding:     zapcore.DefaultLineEnding,
+				EncodeLevel:    customTestLevelEncoder,
+				EncodeTime:     zapcore.ISO8601TimeEncoder,
+				EncodeDuration: zapcore.StringDurationEncoder,
+				EncodeCaller:   zapcore.ShortCallerEncoder,
+			})
+			return zapcore.NewCore(enc, zapcore.AddSync(zaptest.NewTestingWriter(t)), logging.TraceLevel)
+		})),
+	)
 	return &logging.Logger{Logger: zapLogger}
 }
@@ -65,6 +65,26 @@ func TestLoggerChaining(t *testing.T) {
 	optionsLogger.Trace("options trace")
 }
 
+func TestSugaredLoggerChaining(t *testing.T) {
+	logger := NewTestLogger(t)
+	sugar := logger.Sugar()
+
+	// Test With returns our custom SugaredLogger with Trace support
+	withSugar := sugar.With("component", "test")
+	withSugar.Trace("trace with sugar chaining")
+	withSugar.Tracew("tracew with sugar chaining", "key", "value")
+
+	// Test Named returns our custom SugaredLogger with Trace support
+	namedSugar := sugar.Named("child")
+	namedSugar.Trace("trace with named sugar")
+	namedSugar.Tracew("tracew with named sugar", "key", "value")
+
+	// Test chaining both With and Named
+	chainedSugar := sugar.With("component", "test").Named("child")
+	chainedSugar.Trace("trace with full chaining")
+	chainedSugar.Tracew("tracew with full chaining", "key", "value")
+}
+
 func TestTraceLevel(t *testing.T) {
 	// Verify TraceLevel is below DebugLevel
 	if logging.TraceLevel >= zapcore.DebugLevel {
 
@@ -176,7 +176,7 @@ func (m *Manager) PredictAsync(ctx context.Context, req PredictionRequest) error
 
 	runner, err := m.assignReqToRunner(deadlineCtx, req)
 	if err != nil {
-		log.Debugw("failed to get runner for async request", "error", err)
+		log.Tracew("failed to get runner for async request", "error", err)
 		m.releaseSlot()
 		return err
 	}
@@ -197,7 +197,7 @@ func (m *Manager) PredictAsync(ctx context.Context, req PredictionRequest) error
 
 	respChan, err := runner.predict(req)
 	if err != nil {
-		log.Debugw("failed to predict", "error", err)
+		log.Tracew("failed to predict", "error", err)
 		m.releaseSlot()
 		return err
 	}
@@ -206,7 +206,7 @@ func (m *Manager) PredictAsync(ctx context.Context, req PredictionRequest) error
 	go func() {
 		defer m.releaseSlot() // Release slot after prediction completes
 		<-respChan            // Wait for prediction to complete
-		log.Debugw("async prediction completed", "prediction_id", req.ID)
+		log.Tracew("async prediction completed", "prediction_id", req.ID)
 	}()
 
 	return nil
@@ -485,7 +485,7 @@ func (m *Manager) assignReqToRunner(ctx context.Context, req PredictionRequest)
 	// First, try to find existing runner with capacity and atomically reserve slot
 	procRunner := m.findRunnerWithCapacity(ctx, req)
 	if procRunner != nil {
-		log.Debugw("allocated request to existing runner", "runner", procRunner.runnerCtx.id)
+		log.Tracew("allocated request to existing runner", "runner", procRunner.runnerCtx.id)
 		return procRunner, nil
 	}
 
@@ -824,7 +824,7 @@ func (m *Manager) Stop() error {
 				// Wait for this runner to become idle OR timeout
 				select {
 				case <-runner.readyForShutdown:
-					log.Infow("runner became idle naturally", "name", runner.runnerCtx.id)
+					log.Debugw("runner became idle naturally", "name", runner.runnerCtx.id)
 				case <-graceCtx.Done():
 					log.Warnw("grace period expired for runner", "name", runner.runnerCtx.id, "context_err", graceCtx.Err())
 				}
@@ -877,7 +877,7 @@ func (m *Manager) Status() string {
 			runner.mu.Unlock()
 			return status
 		}
-		log.Debug("default runner not found, returning STARTING")
+		log.Trace("default runner not found, returning STARTING")
 		return "STARTING"
 	}
 
@@ -1071,9 +1071,9 @@ func (m *Manager) monitorRunnerSubprocess(ctx context.Context, runnerName string
 		}
 
 		// Capture crash logs from runner and fail predictions one by one
-		log.Debugw("checking runner logs for crash", "runner_logs_count", len(runner.logs), "runner_logs", runner.logs)
+		log.Tracew("checking runner logs for crash", "runner_logs_count", len(runner.logs), "runner_logs", runner.logs)
 		crashLogs := runner.logs
-		log.Debugw("captured crash logs", "crash_logs_count", len(crashLogs), "crash_logs", crashLogs)
+		log.Tracew("captured crash logs", "crash_logs_count", len(crashLogs), "crash_logs", crashLogs)
 
 		for id, pending := range runner.pending {
 			log.Debugw("failing prediction due to setup failure", "prediction_id", id)
 
@@ -54,7 +54,7 @@ func (r *Runner) watchPredictionResponses(ctx context.Context, predictionID stri
 	for {
 		select {
 		case <-ctx.Done():
-			log.Debugw("response watcher canceled", "prediction_id", predictionID)
+			log.Tracew("response watcher canceled", "prediction_id", predictionID)
 			return
 
 		// TODO: Add inotify case when implemented
@@ -68,7 +68,7 @@ func (r *Runner) watchPredictionResponses(ctx context.Context, predictionID stri
 			// Drain IPC OUTPUT notifications - when inotify available, we blackhole these
 			// When inotify unavailable, this triggers immediate processing
 			// TODO: Only process if inotify unavailable
-			log.Debugw("received OUTPUT IPC notification", "prediction_id", predictionID)
+			log.Tracew("received OUTPUT IPC notification", "prediction_id", predictionID)
 			pollTimer.Reset(100 * time.Millisecond) // Reset polling timer since we got an event
 			if err := r.processResponseFiles(predictionID, pending, responsePattern, log); err != nil {
 				log.Errorw("failed to process response files from IPC", "prediction_id", predictionID, "error", err)
@@ -86,7 +86,7 @@ func (r *Runner) watchPredictionResponses(ctx context.Context, predictionID stri
 		completed := pending.response.Status.IsCompleted()
 		pending.mu.Unlock()
 		if completed {
-			log.Debugw("prediction completed, watcher exiting", "prediction_id", predictionID)
+			log.Tracew("prediction completed, watcher exiting", "prediction_id", predictionID)
 			return
 		}
 	}
@@ -274,7 +274,7 @@ func (r *Runner) handleResponseWebhooksAndCompletion(response *PredictionRespons
 		}
 
 		// Watcher exits - manager defer will handle webhook and cleanup
-		log.Debugw("prediction completed, watcher exiting", "prediction_id", predictionID)
+		log.Tracew("prediction completed, watcher exiting", "prediction_id", predictionID)
 		return
 	}
 }
@@ -356,25 +356,25 @@ func (r *Runner) WaitForStop() {
 func (r *Runner) GracefulShutdown() {
 	log := r.logger.Sugar()
 	if !r.shutdownWhenIdle.CompareAndSwap(false, true) {
-		log.Debugw("graceful shutdown already initiated", "runner_id", r.runnerCtx.id)
+		log.Tracew("graceful shutdown already initiated", "runner_id", r.runnerCtx.id)
 		return
 	}
 
 	r.mu.RLock()
 	shouldSignal := (r.status == StatusReady && len(r.pending) == 0)
 	r.mu.RUnlock()
 
-	log.Debugw("graceful shutdown initiated", "runner_id", r.runnerCtx.id, "status", r.status, "pending_count", len(r.pending), "should_signal", shouldSignal)
+	log.Tracew("graceful shutdown initiated", "runner_id", r.runnerCtx.id, "status", r.status, "pending_count", len(r.pending), "should_signal", shouldSignal)
 
 	if shouldSignal {
 		if r.readyForShutdown == nil {
 			log.Warnw("readyForShutdown channel is nil, cannot signal shutdown readiness", "runner_id", r.runnerCtx.id)
 		} else {
 			select {
 			case <-r.readyForShutdown:
-				log.Debugw("readyForShutdown already closed", "runner_id", r.runnerCtx.id)
+				log.Tracew("readyForShutdown already closed", "runner_id", r.runnerCtx.id)
 			default:
-				log.Debugw("closing readyForShutdown channel", "runner_id", r.runnerCtx.id)
+				log.Tracew("closing readyForShutdown channel", "runner_id", r.runnerCtx.id)
 				close(r.readyForShutdown)
 			}
 		}
@@ -407,7 +407,7 @@ func (r *Runner) Start(ctx context.Context) error {
 		return fmt.Errorf("failed to start subprocess: %w", err)
 	}
 
-	log.Debugw("runner process started successfully", "pid", cmd.Process.Pid)
+	log.Tracew("runner process started successfully", "pid", cmd.Process.Pid)
 
 	return nil
 }
@@ -437,7 +437,7 @@ func (r *Runner) setupLogCapture() error {
 			line := scanner.Text()
 			r.logStdout(line)
 		}
-		r.logger.Debug("finished stdout log capture")
+		r.logger.Trace("finished stdout log capture")
 	})
 
 	wg.Go(func() {
@@ -446,7 +446,7 @@ func (r *Runner) setupLogCapture() error {
 			line := scanner.Text()
 			r.logStderr(line)
 		}
-		r.logger.Debug("finished stderr log capture")
+		r.logger.Trace("finished stderr log capture")
 	})
 
 	// Signal when both pipes are closed (with double-close protection)
@@ -792,15 +792,15 @@ func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, error)
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
-	log.Debugw("runner.predict called", "prediction_id", req.ID, "status", r.status)
+	log.Tracew("runner.predict called", "prediction_id", req.ID, "status", r.status)
 
 	// Prediction must be pre-allocated by manager
 	pending, exists := r.pending[req.ID]
 	if !exists {
 		return nil, fmt.Errorf("prediction %s not allocated", req.ID)
 	}
 
-	log.Debugw("prediction found in pending", "prediction_id", req.ID)
+	log.Tracew("prediction found in pending", "prediction_id", req.ID)
 
 	// Process input paths (base64 and URL inputs)
 	inputPaths := make([]string, 0)
@@ -829,13 +829,13 @@ func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, error)
 		return nil, fmt.Errorf("failed to write request file: %w", err)
 	}
 
-	log.Debugw("wrote prediction request file", "prediction_id", req.ID, "path", requestPath, "working_dir", r.runnerCtx.workingdir, "request_data", string(requestData))
+	log.Tracew("wrote prediction request file", "prediction_id", req.ID, "path", requestPath, "working_dir", r.runnerCtx.workingdir, "request_data", string(requestData))
 
 	// Debug: Check if file actually exists and list directory contents
 	if _, err := os.Stat(requestPath); err != nil {
-		log.Debugw("ERROR: written request file does not exist", "prediction_id", req.ID, "path", requestPath, "error", err)
+		log.Tracew("ERROR: written request file does not exist", "prediction_id", req.ID, "path", requestPath, "error", err)
 	} else {
-		log.Debugw("confirmed request file exists", "prediction_id", req.ID, "path", requestPath)
+		log.Tracew("confirmed request file exists", "prediction_id", req.ID, "path", requestPath)
 	}
 
 	// Debug: List all files in working directory
@@ -844,13 +844,13 @@ func (r *Runner) predict(req PredictionRequest) (chan PredictionResponse, error)
 		for i, entry := range entries {
 			fileNames[i] = entry.Name()
 		}
-		log.Debugw("working directory contents after write", "prediction_id", req.ID, "working_dir", r.runnerCtx.workingdir, "files", fileNames)
+		log.Tracew("working directory contents after write", "prediction_id", req.ID, "working_dir", r.runnerCtx.workingdir, "files", fileNames)
 	}
 
 	// Update pending prediction with request details
 	pending.request = req
 
-	log.Debugw("returning prediction channel", "prediction_id", req.ID)
+	log.Tracew("returning prediction channel", "prediction_id", req.ID)
 	return pending.c, nil
 }
 
@@ -970,21 +970,21 @@ func (r *Runner) updateSetupResult() {
 	}
 
 	setupResultPath := filepath.Join(r.runnerCtx.workingdir, "setup_result.json")
-	log.Debug("reading setup_result.json", "path", setupResultPath)
+	log.Trace("reading setup_result.json", "path", setupResultPath)
 
 	// Try to read additional setup result data from file
 	var setupResultFromFile SetupResult
 	if err := r.readJSON(setupResultPath, &setupResultFromFile); err != nil {
-		log.Debugw("failed to read setup_result.json, assuming success", "error", err)
+		log.Tracew("failed to read setup_result.json, assuming success", "error", err)
 		// If setup_result.json doesn't exist, assume setup succeeded and status is ready
 		r.setupResult.Status = SetupSucceeded
 		r.setupResult.Schema = "" // Will be populated by updateSchema if available
 		r.status = StatusReady
-		log.Debugw("setup result not found, assuming success", "status", r.status.String())
+		log.Tracew("setup result not found, assuming success", "status", r.status.String())
 		return
 	}
 
-	log.Debugw("successfully read setup_result.json", "status", setupResultFromFile.Status, "schema_length", len(setupResultFromFile.Schema))
+	log.Tracew("successfully read setup_result.json", "status", setupResultFromFile.Status, "schema_length", len(setupResultFromFile.Schema))
 
 	// Update setup result with data from file, preserving logs that were already set
 	r.setupResult.Status = setupResultFromFile.Status