Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(#739): add probe check in client and operator #763

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
47 changes: 44 additions & 3 deletions client/gefyra/api/bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
from typing import List, Dict, TYPE_CHECKING

from gefyra.exceptions import CommandTimeoutError, GefyraBridgeError
from kubernetes.client.exceptions import ApiException

if TYPE_CHECKING:
from gefyra.configuration import ClientConfiguration
from gefyra.types import GefyraBridge


from .utils import stopwatch, wrap_bridge
from .utils import get_workload_type, stopwatch, wrap_bridge

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -37,12 +38,12 @@ def get_pods_to_intercept(


def check_workloads(
pods_to_intercept,
pods_to_intercept: dict,
workload_type: str,
workload_name: str,
container_name: str,
namespace: str,
config,
config: "ClientConfiguration",
):
from gefyra.cluster.resources import check_pod_valid_for_bridge

Expand All @@ -57,11 +58,51 @@ def check_workloads(
f"Could not find {workload_type}/{workload_name} to bridge. Available"
f" {workload_type}: {', '.join(cleaned_names)}"
)

if container_name not in [
container for c_list in pods_to_intercept.values() for container in c_list
]:
raise RuntimeError(f"Could not find container {container_name} to bridge.")

# Validate workload and probes
api = config.K8S_APP_API
core_api = config.K8S_CORE_API
try:
reconstructed_workload_type = get_workload_type(workload_type)
if reconstructed_workload_type == "pod":
workload = core_api.read_namespaced_pod(workload_name, namespace)
elif reconstructed_workload_type == "deployment":
workload = api.read_namespaced_deployment(workload_name, namespace)
elif reconstructed_workload_type == "statefulset":
workload = api.read_namespaced_stateful_set(workload_name, namespace)
except ApiException as e:
raise RuntimeError(
f"Error fetching workload {workload_type}/{workload_name}: {e}"
)

containers = (
workload.spec.template.spec.containers
if hasattr(workload.spec, "template")
else workload.spec.containers
)
target_container = next((c for c in containers if c.name == container_name), None)
if not target_container:
raise RuntimeError(
f"Container {container_name} not found in workload {workload_type}/{workload_name}."
)

def validate_http_probe(probe, probe_type):
if probe and probe.http_get is None:
raise RuntimeError(
f"{probe_type} in container {container_name} does not use httpGet. "
f"Only HTTP-based probes are supported."
)

# Check for HTTP probes only
validate_http_probe(target_container.liveness_probe, "LivenessProbe")
validate_http_probe(target_container.readiness_probe, "ReadinessProbe")
validate_http_probe(target_container.startup_probe, "StartupProbe")

for name in pod_names:
check_pod_valid_for_bridge(config, name, namespace, container_name)

Expand Down
11 changes: 5 additions & 6 deletions operator/gefyra/bridge/carrier/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from typing import Any, Dict, List, Optional
from gefyra.bridge.exceptions import BridgeInstallException
from gefyra.utils import exec_command_pod
import kubernetes as k8s

Expand Down Expand Up @@ -139,11 +140,9 @@ def _patch_pod_with_carrier(
self._get_all_probes(container),
)
):
self.logger.error(
"Not all of the probes to be handled are currently"
" supported by Gefyra"
raise BridgeInstallException(
message="Not all of the probes to be handled are currently supported by Gefyra"
)
return False, pod
if (
container.image
== f"{self.configuration.CARRIER_IMAGE}:{self.configuration.CARRIER_IMAGE_TAG}"
Expand All @@ -157,8 +156,8 @@ def _patch_pod_with_carrier(
container.image = f"{self.configuration.CARRIER_IMAGE}:{self.configuration.CARRIER_IMAGE_TAG}"
break
else:
raise RuntimeError(
f"Could not found container {self.container} in Pod {self.pod}"
raise BridgeInstallException(
message=f"Could not found container {self.container} in Pod {self.pod}"
)
self.logger.info(
f"Now patching Pod {self.pod}; container {self.container} with Carrier"
Expand Down
5 changes: 5 additions & 0 deletions operator/gefyra/bridge/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from gefyra.exceptions import BridgeException


class BridgeInstallException(BridgeException):
pass
18 changes: 16 additions & 2 deletions operator/gefyra/bridgestate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
import kubernetes as k8s
from statemachine import State, StateMachine


from gefyra.base import GefyraStateObject, StateControllerMixin
from gefyra.configuration import OperatorConfiguration

from gefyra.bridge.exceptions import BridgeInstallException
from gefyra.exceptions import BridgeException


class GefyraBridgeObject(GefyraStateObject):
plural = "gefyrabridges"
Expand Down Expand Up @@ -121,7 +123,11 @@ def _install_provider(self):
It installs the bridge provider
:return: Nothing
"""
self.bridge_provider.install()
try:
self.bridge_provider.install()
except BridgeInstallException as be:
self.logger.debug(f"Encountered: {be}")
self.send("impair", exception=be)

def _wait_for_provider(self):
if not self.bridge_provider.ready():
Expand Down Expand Up @@ -189,3 +195,11 @@ def on_remove(self):
def on_restore(self):
self.bridge_provider.uninstall()
self.send("terminate")

def on_impair(self, exception: Optional[BridgeException] = None):
self.logger.error(f"Failed from {self.current_state}")
self.post_event(
reason=f"Failed from {self.current_state}",
message=exception.message,
_type="Warning",
)
5 changes: 5 additions & 0 deletions operator/gefyra/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class BridgeException(Exception):
message: str

def __init__(self, message: str):
self.message = message
2 changes: 1 addition & 1 deletion operator/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions operator/tests/e2e/test_create_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,39 @@ def test_b_cleanup_bridges_routes(
namespace="gefyra",
timeout=60,
)


def test_c_fail_create_not_supported_bridges(
demo_backend_image, demo_frontend_image, carrier_image, operator: AClusterManager
):
k3d = operator
k3d.load_image(demo_backend_image)
k3d.load_image(demo_frontend_image)
k3d.load_image(carrier_image)

k3d.kubectl(["create", "namespace", "demo-failing"])
k3d.wait("ns/demo-failing", "jsonpath='{.status.phase}'=Active")
k3d.apply("tests/fixtures/demo_pods_not_supported.yaml")
k3d.wait(
"pod/frontend",
"condition=ready",
namespace="demo-failing",
timeout=60,
)

k3d.apply("tests/fixtures/a_gefyra_bridge_failing.yaml")
# bridge should be in error state
k3d.wait(
"gefyrabridges.gefyra.dev/bridge-a",
"jsonpath=.state=ERROR",
namespace="gefyra",
timeout=20,
)

# applying the bridge shouldn't have worked
k3d.wait(
"pod/frontend",
"condition=ready",
namespace="demo-failing",
timeout=60,
)
14 changes: 14 additions & 0 deletions operator/tests/fixtures/a_gefyra_bridge_failing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: gefyra.dev/v1
kind: gefyrabridge
metadata:
name: bridge-a
namespace: gefyra
provider: carrier
connectionProvider: stowaway
client: client-a
targetNamespace: demo-failing
targetPod: frontend
targetContainer: frontend
portMappings:
- "8080:80"
destinationIP: "192.168.101.1"
38 changes: 38 additions & 0 deletions operator/tests/fixtures/demo_pods_not_supported.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: v1
kind: Namespace
metadata:
name: demo-failing
---
apiVersion: v1
kind: Pod
metadata:
name: frontend
namespace: demo-failing
labels:
app: frontend
spec:
containers:
- name: frontend
image: quay.io/gefyra/gefyra-demo-frontend
imagePullPolicy: IfNotPresent
ports:
- name: web
containerPort: 5003
protocol: TCP
env:
- name: SVC_URL
value: "backend.demo.svc.cluster.local:5002"
livenessProbe:
exec:
command:
- ls
- /tmp
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
exec:
command:
- ls
- /tmp
initialDelaySeconds: 5
periodSeconds: 5
Loading