|
3 | 3 | package main |
4 | 4 |
|
5 | 5 | import ( |
6 | | - "bytes" |
7 | 6 | "context" |
8 | 7 | "crypto/rand" |
9 | 8 | "encoding/hex" |
|
47 | 46 | trayAPIKey = "" // API key generated for core communication |
48 | 47 | shutdownComplete = make(chan struct{}) // Signal when shutdown is complete |
49 | 48 | shutdownOnce sync.Once |
50 | | - |
51 | | - errDockerPaused = errors.New("docker engine paused") |
52 | | - errDockerUnavailable = errors.New("docker engine unavailable") |
53 | 49 | ) |
54 | 50 |
|
55 | 51 | // getLogDir returns the standard log directory for the current OS. |
@@ -1013,10 +1009,6 @@ type CoreProcessLauncher struct { |
1013 | 1009 | healthMonitor *monitor.HealthMonitor |
1014 | 1010 |
|
1015 | 1011 | coreOwnership coreOwnershipMode |
1016 | | - |
1017 | | - dockerRetryMu sync.Mutex |
1018 | | - dockerRetryCancel context.CancelFunc |
1019 | | - dockerReconnectPending bool |
1020 | 1012 | } |
1021 | 1013 |
|
1022 | 1014 | // NewCoreProcessLauncher creates a new core process launcher |
@@ -1108,10 +1100,13 @@ func (cpl *CoreProcessLauncher) handleStateTransitions(ctx context.Context, tran |
1108 | 1100 | cpl.handleDBLockedError() |
1109 | 1101 |
|
1110 | 1102 | case state.StateCoreErrorDocker: |
1111 | | - cpl.handleDockerUnavailable(ctx) |
| 1103 | + // Docker errors are handled by the core, not the tray |
| 1104 | + // The tray should monitor Docker status via API, not block on it |
| 1105 | + cpl.logger.Warn("Core reported Docker error - this should be handled by core, not tray") |
1112 | 1106 |
|
1113 | 1107 | case state.StateCoreRecoveringDocker: |
1114 | | - cpl.handleDockerRecovering() |
| 1108 | + // Docker recovery is handled by the core, not the tray |
| 1109 | + cpl.logger.Info("Core recovering from Docker issues") |
1115 | 1110 |
|
1116 | 1111 | case state.StateCoreErrorConfig: |
1117 | 1112 | cpl.handleConfigError() |
@@ -1223,18 +1218,9 @@ func (cpl *CoreProcessLauncher) safeHandleReconnecting(ctx context.Context) { |
1223 | 1218 | func (cpl *CoreProcessLauncher) handleLaunchCore(ctx context.Context) { |
1224 | 1219 | cpl.logger.Info("Launching mcpproxy core process") |
1225 | 1220 |
|
1226 | | - // Stop any pending Docker retry loop before attempting a new launch |
1227 | | - cpl.cancelDockerRetry() |
1228 | | - |
1229 | | - // Ensure Docker engine is available before launching core (most upstreams depend on it) |
1230 | | - if runtime.GOOS == platformDarwin || runtime.GOOS == platformWindows { |
1231 | | - if err := cpl.ensureDockerAvailable(ctx); err != nil { |
1232 | | - cpl.logger.Error("Docker engine unavailable", zap.Error(err)) |
1233 | | - cpl.stateMachine.SetError(err) |
1234 | | - cpl.stateMachine.SendEvent(state.EventDockerUnavailable) |
1235 | | - return |
1236 | | - } |
1237 | | - } |
| 1221 | + // NOTE: We do NOT check Docker availability here - that's the core's responsibility! |
| 1222 | + // The core will handle Docker isolation gracefully and fall back to direct execution. |
| 1223 | + // The tray should not block core launch based on Docker status. |
1238 | 1224 |
|
1239 | 1225 | // Stop existing process monitor if running |
1240 | 1226 | if cpl.processMonitor != nil { |
@@ -1424,9 +1410,8 @@ func (cpl *CoreProcessLauncher) monitorAPIConnection(ctx context.Context, alread |
1424 | 1410 | func (cpl *CoreProcessLauncher) handleConnected() { |
1425 | 1411 | cpl.logger.Info("Core process fully connected and operational") |
1426 | 1412 |
|
1427 | | - if cpl.consumeDockerReconnectPending() { |
1428 | | - go cpl.triggerForceReconnect("docker_recovered") |
1429 | | - } |
| 1413 | + // Docker reconnection is handled by the core's own recovery logic |
| 1414 | + // The tray just monitors status via the Docker status API endpoint |
1430 | 1415 | } |
1431 | 1416 |
|
1432 | 1417 | // handleReconnecting handles reconnection attempts |
@@ -1496,161 +1481,9 @@ func findNextAvailablePort(start, end int) (int, error) { |
1496 | 1481 | return 0, fmt.Errorf("no free port in range %d-%d", start, end) |
1497 | 1482 | } |
1498 | 1483 |
|
1499 | | -// handleDockerUnavailable handles scenarios where Docker Desktop is paused or unavailable. |
1500 | | -func (cpl *CoreProcessLauncher) handleDockerUnavailable(ctx context.Context) { |
1501 | | - lastErr := cpl.stateMachine.GetLastError() |
1502 | | - if lastErr != nil { |
1503 | | - cpl.logger.Warn("Docker engine unavailable - waiting for recovery", zap.Error(lastErr)) |
1504 | | - } else { |
1505 | | - cpl.logger.Warn("Docker engine unavailable - waiting for recovery") |
1506 | | - } |
1507 | | - |
1508 | | - cpl.dockerRetryMu.Lock() |
1509 | | - if cpl.dockerRetryCancel != nil { |
1510 | | - cpl.dockerRetryCancel() |
1511 | | - } |
1512 | | - retryCtx, cancel := context.WithCancel(ctx) |
1513 | | - cpl.dockerRetryCancel = cancel |
1514 | | - cpl.dockerRetryMu.Unlock() |
1515 | | - |
1516 | | - go func() { |
1517 | | - ticker := time.NewTicker(5 * time.Second) |
1518 | | - defer ticker.Stop() |
1519 | | - for { |
1520 | | - select { |
1521 | | - case <-retryCtx.Done(): |
1522 | | - return |
1523 | | - case <-ticker.C: |
1524 | | - if err := cpl.ensureDockerAvailable(retryCtx); err == nil { |
1525 | | - cpl.logger.Info("Docker engine available - retrying core launch") |
1526 | | - cpl.setDockerReconnectPending(true) |
1527 | | - cpl.cancelDockerRetry() |
1528 | | - cpl.stateMachine.SendEvent(state.EventRetry) |
1529 | | - return |
1530 | | - } else if err != nil { |
1531 | | - cpl.logger.Debug("Docker still unavailable", zap.Error(err)) |
1532 | | - } |
1533 | | - } |
1534 | | - } |
1535 | | - }() |
1536 | | -} |
1537 | | - |
1538 | | -// cancelDockerRetry stops any pending Docker retry loop. |
1539 | | -func (cpl *CoreProcessLauncher) cancelDockerRetry() { |
1540 | | - cpl.dockerRetryMu.Lock() |
1541 | | - if cpl.dockerRetryCancel != nil { |
1542 | | - cpl.dockerRetryCancel() |
1543 | | - cpl.dockerRetryCancel = nil |
1544 | | - } |
1545 | | - cpl.dockerRetryMu.Unlock() |
1546 | | -} |
1547 | | - |
1548 | | -// handleDockerRecovering handles the Docker recovery state when Docker becomes available again. |
1549 | | -func (cpl *CoreProcessLauncher) handleDockerRecovering() { |
1550 | | - cpl.logger.Info("Docker engine recovered - preparing to reconnect servers") |
1551 | | - // The state machine will automatically transition to StateLaunchingCore |
1552 | | - // after the timeout defined in StateCoreRecoveringDocker (10s) |
1553 | | -} |
1554 | | - |
1555 | | -func (cpl *CoreProcessLauncher) setDockerReconnectPending(pending bool) { |
1556 | | - cpl.dockerRetryMu.Lock() |
1557 | | - cpl.dockerReconnectPending = pending |
1558 | | - cpl.dockerRetryMu.Unlock() |
1559 | | -} |
1560 | | - |
1561 | | -func (cpl *CoreProcessLauncher) consumeDockerReconnectPending() bool { |
1562 | | - cpl.dockerRetryMu.Lock() |
1563 | | - pending := cpl.dockerReconnectPending |
1564 | | - if pending { |
1565 | | - cpl.dockerReconnectPending = false |
1566 | | - } |
1567 | | - cpl.dockerRetryMu.Unlock() |
1568 | | - return pending |
1569 | | -} |
1570 | | - |
1571 | | -// ensureDockerAvailable verifies Docker Desktop is running and responsive. |
1572 | | -func (cpl *CoreProcessLauncher) ensureDockerAvailable(ctx context.Context) error { |
1573 | | - checkCtx := ctx |
1574 | | - if checkCtx == nil { |
1575 | | - checkCtx = context.Background() |
1576 | | - } |
1577 | | - timeoutCtx, cancel := context.WithTimeout(checkCtx, 3*time.Second) |
1578 | | - defer cancel() |
1579 | | - |
1580 | | - cmd := exec.CommandContext(timeoutCtx, "docker", "info", "--format", "{{json .ServerVersion}}") |
1581 | | - var stderr bytes.Buffer |
1582 | | - cmd.Stdout = &bytes.Buffer{} |
1583 | | - cmd.Stderr = &stderr |
1584 | | - |
1585 | | - if err := cmd.Run(); err != nil { |
1586 | | - stderrStr := strings.TrimSpace(stderr.String()) |
1587 | | - lower := strings.ToLower(stderrStr) |
1588 | | - |
1589 | | - switch { |
1590 | | - case strings.Contains(lower, "docker desktop is manually paused"): |
1591 | | - if stderrStr == "" { |
1592 | | - stderrStr = "Docker Desktop is manually paused" |
1593 | | - } |
1594 | | - return fmt.Errorf("%w: %s", errDockerPaused, stderrStr) |
1595 | | - case strings.Contains(lower, "is the docker daemon running"), |
1596 | | - strings.Contains(lower, "cannot connect to the docker daemon"), |
1597 | | - strings.Contains(lower, "error during connect"), |
1598 | | - strings.Contains(lower, "connectex"), |
1599 | | - errors.Is(err, context.DeadlineExceeded): |
1600 | | - if stderrStr == "" { |
1601 | | - stderrStr = "Docker daemon is not responding" |
1602 | | - } |
1603 | | - return fmt.Errorf("%w: %s", errDockerUnavailable, stderrStr) |
1604 | | - } |
1605 | | - |
1606 | | - var execErr *exec.Error |
1607 | | - if errors.As(err, &execErr) { |
1608 | | - return fmt.Errorf("%w: %v", errDockerUnavailable, execErr) |
1609 | | - } |
1610 | | - |
1611 | | - if exitErr, ok := err.(*exec.ExitError); ok { |
1612 | | - if stderrStr == "" { |
1613 | | - stderrStr = exitErr.Error() |
1614 | | - } |
1615 | | - return fmt.Errorf("%w: %s", errDockerUnavailable, stderrStr) |
1616 | | - } |
1617 | | - |
1618 | | - if stderrStr != "" { |
1619 | | - return fmt.Errorf("%w: %s", errDockerUnavailable, stderrStr) |
1620 | | - } |
1621 | | - |
1622 | | - return fmt.Errorf("%w: %v", errDockerUnavailable, err) |
1623 | | - } |
1624 | | - |
1625 | | - return nil |
1626 | | -} |
1627 | | - |
1628 | | -func (cpl *CoreProcessLauncher) triggerForceReconnect(reason string) { |
1629 | | - if cpl.apiClient == nil { |
1630 | | - return |
1631 | | - } |
1632 | | - |
1633 | | - const maxAttempts = 3 |
1634 | | - for attempt := 1; attempt <= maxAttempts; attempt++ { |
1635 | | - if err := cpl.apiClient.ForceReconnectAllServers(reason); err != nil { |
1636 | | - cpl.logger.Warn("Failed to trigger upstream reconnection after Docker recovery", |
1637 | | - zap.String("reason", reason), |
1638 | | - zap.Int("attempt", attempt), |
1639 | | - zap.Error(err)) |
1640 | | - time.Sleep(2 * time.Second) |
1641 | | - continue |
1642 | | - } |
1643 | | - |
1644 | | - cpl.logger.Info("Triggered upstream reconnection after Docker recovery", |
1645 | | - zap.String("reason", reason), |
1646 | | - zap.Int("attempt", attempt)) |
1647 | | - return |
1648 | | - } |
1649 | | - |
1650 | | - cpl.logger.Error("Exhausted attempts to trigger upstream reconnection after Docker recovery", |
1651 | | - zap.String("reason", reason), |
1652 | | - zap.Int("attempts", maxAttempts)) |
1653 | | -} |
| 1484 | +// Docker availability checking and recovery is handled by the core, not the tray. |
| 1485 | +// The tray monitors Docker status via the core's API (monitorDockerStatus function) |
| 1486 | +// and shows notifications to the user, but never blocks core launch on Docker availability. |
1654 | 1487 |
|
1655 | 1488 | // handleDBLockedError handles database locked errors |
1656 | 1489 | func (cpl *CoreProcessLauncher) handleDBLockedError() { |
@@ -1703,9 +1536,6 @@ func (cpl *CoreProcessLauncher) handleShutdown() { |
1703 | 1536 | cpl.trayApp.SetConnectionState(tray.ConnectionStateDisconnected) |
1704 | 1537 | } |
1705 | 1538 |
|
1706 | | - // Stop any Docker retry loop |
1707 | | - cpl.cancelDockerRetry() |
1708 | | - |
1709 | 1539 | // Stop SSE connection before killing core |
1710 | 1540 | // This prevents SSE from detecting disconnection and trying to reconnect |
1711 | 1541 | if cpl.apiClient != nil { |
|
0 commit comments