BerriAI · krrishdholakia · Dec 18, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/docs/my-website/docs/proxy/guardrails/lakera_ai.md b/docs/my-website/docs/proxy/guardrails/lakera_ai.md
@@ -29,6 +29,13 @@ guardrails:
       mode: "pre_call"
       api_key: os.environ/LAKERA_API_KEY
       api_base: os.environ/LAKERA_API_BASE
+  - guardrail_name: "lakera-monitor"
+    litellm_params:
+      guardrail: lakera_v2
+      mode: "pre_call"
+      on_flagged: "monitor"  # Log violations but don't block
+      api_key: os.environ/LAKERA_API_KEY
+      api_base: os.environ/LAKERA_API_BASE
 
 ```
 
@@ -144,6 +151,7 @@ guardrails:
       # breakdown: Optional[bool] = True,
       # metadata: Optional[Dict] = None,
       # dev_info: Optional[bool] = True,
+      # on_flagged: Optional[str] = "block",  # "block" or "monitor"
 ```
 
 - `api_base`: (Optional[str]) The base of the Lakera integration. Defaults to `https://api.lakera.ai` 
@@ -153,3 +161,6 @@ guardrails:
 - `breakdown`: (Optional[bool]) When true the response will return a breakdown list of the detectors that were run, as defined in the policy, and whether each of them detected something or not.
 - `metadata`: (Optional[Dict]) Metadata tags can be attached to screening requests as an object that can contain any arbitrary key-value pairs. 
 - `dev_info`: (Optional[bool]) When true the response will return an object with developer information about the build of Lakera Guard.
+- `on_flagged`: (Optional[str]) Action to take when content is flagged. Defaults to `"block"`. 
+  - `"block"`: Raises an HTTP 400 exception when violations are detected (default behavior)
+  - `"monitor"`: Logs violations but allows the request to proceed. Useful for tuning security policies without blocking legitimate requests.
diff --git a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai_v2.py b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai_v2.py
@@ -33,6 +33,7 @@ def __init__(
         breakdown: Optional[bool] = True,
         metadata: Optional[Dict] = None,
         dev_info: Optional[bool] = True,
+        on_flagged: Optional[str] = "block",
         **kwargs,
     ):
         """
@@ -48,6 +49,7 @@ def __init__(
             breakdown: Optional[bool] = True,
             metadata: Optional[Dict] = None,
             dev_info: Optional[bool] = True,
+            on_flagged: Optional[str] = "block", Action to take when content is flagged: "block" or "monitor"
         """
         self.async_handler = get_async_httpx_client(
             llm_provider=httpxSpecialProvider.GuardrailCallback
@@ -61,6 +63,7 @@ def __init__(
         self.breakdown: Optional[bool] = breakdown
         self.metadata: Optional[Dict] = metadata
         self.dev_info: Optional[bool] = dev_info
+        self.on_flagged = on_flagged or "block"
         super().__init__(**kwargs)
 
     async def call_v2_guard(
@@ -228,10 +231,17 @@ async def async_pre_call_hook(
                     "Lakera AI: Masked PII in messages instead of blocking request"
                 )
             else:
-                # If there are other violations or not set to mask PII, raise exception
-                raise self._get_http_exception_for_blocked_guardrail(
-                    lakera_guardrail_response
-                )
+                # Check on_flagged setting
+                if self.on_flagged == "monitor":
+                    verbose_proxy_logger.warning(
+                        "Lakera Guardrail: Monitoring mode - violation detected but allowing request"
+                    )
+                    # Log violation but continue
+                elif self.on_flagged == "block":
+                    # If there are other violations or not set to mask PII, raise exception
+                    raise self._get_http_exception_for_blocked_guardrail(
+                        lakera_guardrail_response
+                    )
 
         #########################################################
         ########## 3. Add the guardrail to the applied guardrails header ##########
@@ -286,10 +296,17 @@ async def async_moderation_hook(
                     "Lakera AI: Masked PII in messages instead of blocking request"
                 )
             else:
-                # If there are other violations or not set to mask PII, raise exception
-                raise self._get_http_exception_for_blocked_guardrail(
-                    lakera_guardrail_response
-                )
+                # Check on_flagged setting
+                if self.on_flagged == "monitor":
+                    verbose_proxy_logger.warning(
+                        "Lakera Guardrail: Monitoring mode - violation detected but allowing request"
+                    )
+                    # Log violation but continue
+                elif self.on_flagged == "block":
+                    # If there are other violations or not set to mask PII, raise exception
+                    raise self._get_http_exception_for_blocked_guardrail(
+                        lakera_guardrail_response
+                    )
 
         #########################################################
         ########## 3. Add the guardrail to the applied guardrails header ##########

diff --git a/litellm/proxy/guardrails/guardrail_initializers.py b/litellm/proxy/guardrails/guardrail_initializers.py
@@ -65,6 +65,7 @@ def initialize_lakera_v2(litellm_params: LitellmParams, guardrail: Guardrail):
         breakdown=litellm_params.breakdown,
         metadata=litellm_params.metadata,
         dev_info=litellm_params.dev_info,
+        on_flagged=litellm_params.on_flagged,
     )
     litellm.logging_callback_manager.add_litellm_callback(_lakera_v2_callback)
     return _lakera_v2_callback

diff --git a/litellm/types/guardrails.py b/litellm/types/guardrails.py
@@ -391,6 +391,10 @@ class LakeraV2GuardrailConfigModel(BaseModel):
         default=True,
         description="Whether to include developer information in the response",
     )
+    on_flagged: Optional[Literal["block", "monitor"]] = Field(
+        default="block",
+        description="Action to take when content is flagged: 'block' (raise exception) or 'monitor' (log only)",
+    )
 
 
 class LassoGuardrailConfigModel(BaseModel):

diff --git a/tests/guardrails_tests/test_lakera_v2.py b/tests/guardrails_tests/test_lakera_v2.py
@@ -231,3 +231,132 @@ async def test_lakera_blocks_flagged_content_with_user_scenario():
         assert lakera_response["metadata"]["request_uuid"] == "b7cd4c8a-28aa-4285-a245-2befee514dbf"
         assert len(lakera_response["breakdown"]) == 16  # All the breakdown items from the user's scenario
 
+
+@pytest.mark.asyncio
+async def test_lakera_monitor_mode_allows_flagged_content():
+    """Test that monitor mode logs violations but allows requests to proceed."""
+
+    lakera_guardrail = LakeraAIGuardrail(
+        api_key="test_key",
+        on_flagged="monitor",  # Monitor mode
+    )
+
+    # Mock response with violations
+    mock_response = {
+        'payload': [],
+        'flagged': True,
+        'breakdown': [
+            {'detector_type': 'moderated_content/violence', 'detected': True, 'message_id': 0},
+            {'detector_type': 'prompt_attack', 'detected': True, 'message_id': 0},
+        ]
+    }
+
+    with patch.object(lakera_guardrail, 'call_v2_guard', new_callable=AsyncMock) as mock_call:
+        mock_call.return_value = (mock_response, {})
+
+        data = {
+            "messages": [
+                {"role": "user", "content": "Some harmful content"}
+            ],
+            "model": "gpt-3.5-turbo",
+            "metadata": {}
+        }
+
+        user_api_key_dict = UserAPIKeyAuth(api_key="test_key")
+        cache = DualCache()
+
+        # Should NOT raise an exception in monitor mode
+        result = await lakera_guardrail.async_pre_call_hook(
+            user_api_key_dict=user_api_key_dict,
+            cache=cache,
+            data=data,
+            call_type="completion"
+        )
+
+        # Verify request was allowed through
+        assert result is not None
+        assert "messages" in result
+
+
+@pytest.mark.asyncio
+async def test_lakera_block_mode_raises_exception():
+    """Test that block mode (default) raises HTTPException for violations."""
+
+    lakera_guardrail = LakeraAIGuardrail(
+        api_key="test_key",
+        on_flagged="block",  # Block mode (default)
+    )
+
+    mock_response = {
+        'payload': [],
+        'flagged': True,
+        'breakdown': [
+            {'detector_type': 'moderated_content/violence', 'detected': True, 'message_id': 0},
+        ]
+    }
+
+    with patch.object(lakera_guardrail, 'call_v2_guard', new_callable=AsyncMock) as mock_call:
+        mock_call.return_value = (mock_response, {})
+
+        data = {
+            "messages": [
+                {"role": "user", "content": "Harmful content"}
+            ],
+            "model": "gpt-3.5-turbo",
+            "metadata": {}
+        }
+
+        user_api_key_dict = UserAPIKeyAuth(api_key="test_key")
+        cache = DualCache()
+
+        # Should raise HTTPException in block mode
+        with pytest.raises(HTTPException) as exc_info:
+            await lakera_guardrail.async_pre_call_hook(
+                user_api_key_dict=user_api_key_dict,
+                cache=cache,
+                data=data,
+                call_type="completion"
+            )
+
+        assert exc_info.value.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_lakera_monitor_mode_during_call():
+    """Test monitor mode works with during_call (moderation_hook)."""
+
+    lakera_guardrail = LakeraAIGuardrail(
+        api_key="test_key",
+        on_flagged="monitor",
+    )
+
+    mock_response = {
+        'payload': [],
+        'flagged': True,
+        'breakdown': [
+            {'detector_type': 'prompt_attack', 'detected': True, 'message_id': 0},
+        ]
+    }
+
+    with patch.object(lakera_guardrail, 'call_v2_guard', new_callable=AsyncMock) as mock_call:
+        mock_call.return_value = (mock_response, {})
+
+        data = {
+            "messages": [
+                {"role": "user", "content": "Test content"}
+            ],
+            "model": "gpt-3.5-turbo",
+            "metadata": {}
+        }
+
+        user_api_key_dict = UserAPIKeyAuth(api_key="test_key")
+
+        # Should NOT raise exception in monitor mode
+        result = await lakera_guardrail.async_moderation_hook(
+            data=data,
+            user_api_key_dict=user_api_key_dict,
+            call_type="completion"
+        )
+
+        assert result is not None
+