fix(core): fix validation for input variables in f-string templates, restrict functionality supported by jinja2, mustache templates (#34038)

eyurtsev · web-flow · commit fa7789d6c212 · 2025-11-19T16:52:32.000-05:00
* Fix validation for input variables in f-string templates
* Restrict functionality of features supported by jinja2 and mustache
templates
diff --git a/libs/core/langchain_core/prompts/string.py b/libs/core/langchain_core/prompts/string.py
@@ -17,9 +17,66 @@
 from langchain_core.utils.interactive_env import is_interactive_env
 
 try:
-    from jinja2 import Environment, meta
+    from jinja2 import meta
+    from jinja2.exceptions import SecurityError
     from jinja2.sandbox import SandboxedEnvironment
 
+    class _RestrictedSandboxedEnvironment(SandboxedEnvironment):
+        """A more restrictive Jinja2 sandbox that blocks all attribute/method access.
+
+        This sandbox only allows simple variable lookups, no attribute or method access.
+        This prevents template injection attacks via methods like parse_raw().
+        """
+
+        def is_safe_attribute(self, _obj: Any, _attr: str, _value: Any) -> bool:
+            """Block ALL attribute access for security.
+
+            Only allow accessing variables directly from the context dict,
+            no attribute access on those objects.
+
+            Args:
+                _obj: The object being accessed (unused, always blocked).
+                _attr: The attribute name (unused, always blocked).
+                _value: The attribute value (unused, always blocked).
+
+            Returns:
+                False - all attribute access is blocked.
+            """
+            # Block all attribute access
+            return False
+
+        def is_safe_callable(self, _obj: Any) -> bool:
+            """Block all method calls for security.
+
+            Args:
+                _obj: The object being checked (unused, always blocked).
+
+            Returns:
+                False - all callables are blocked.
+            """
+            return False
+
+        def getattr(self, obj: Any, attribute: str) -> Any:
+            """Override getattr to block all attribute access.
+
+            Args:
+                obj: The object.
+                attribute: The attribute name.
+
+            Returns:
+                Never returns.
+
+            Raises:
+                SecurityError: Always, to block attribute access.
+            """
+            msg = (
+                f"Access to attributes is not allowed in templates. "
+                f"Attempted to access '{attribute}' on {type(obj).__name__}. "
+                f"Use only simple variable names like {{{{variable}}}} "
+                f"without dots or methods."
+            )
+            raise SecurityError(msg)
+
     _HAS_JINJA2 = True
 except ImportError:
     _HAS_JINJA2 = False
@@ -59,14 +116,10 @@ def jinja2_formatter(template: str, /, **kwargs: Any) -> str:
         )
         raise ImportError(msg)
 
-    # This uses a sandboxed environment to prevent arbitrary code execution.
-    # Jinja2 uses an opt-out rather than opt-in approach for sand-boxing.
-    # Please treat this sand-boxing as a best-effort approach rather than
-    # a guarantee of security.
-    # We recommend to never use jinja2 templates with untrusted inputs.
-    # https://jinja.palletsprojects.com/en/3.1.x/sandbox/
-    # approach not a guarantee of security.
-    return SandboxedEnvironment().from_string(template).render(**kwargs)
+    # Use a restricted sandbox that blocks ALL attribute/method access
+    # Only simple variable lookups like {{variable}} are allowed
+    # Attribute access like {{variable.attr}} or {{variable.method()}} is blocked
+    return _RestrictedSandboxedEnvironment().from_string(template).render(**kwargs)
 
 
 def validate_jinja2(template: str, input_variables: list[str]) -> None:
@@ -101,7 +154,7 @@ def _get_jinja2_variables_from_template(template: str) -> set[str]:
             "Please install it with `pip install jinja2`."
         )
         raise ImportError(msg)
-    env = Environment()  # noqa: S701
+    env = _RestrictedSandboxedEnvironment()
     ast = env.parse(template)
     return meta.find_undeclared_variables(ast)
 
@@ -268,6 +321,30 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
         msg = f"Unsupported template format: {template_format}"
         raise ValueError(msg)
 
+    # For f-strings, block attribute access and indexing syntax
+    # This prevents template injection attacks via accessing dangerous attributes
+    if template_format == "f-string":
+        for var in input_variables:
+            # Formatter().parse() returns field names with dots/brackets if present
+            # e.g., "obj.attr" or "obj[0]" - we need to block these
+            if "." in var or "[" in var or "]" in var:
+                msg = (
+                    f"Invalid variable name {var!r} in f-string template. "
+                    f"Variable names cannot contain attribute "
+                    f"access (.) or indexing ([])."
+                )
+                raise ValueError(msg)
+
+            # Block variable names that are all digits (e.g., "0", "100")
+            # These are interpreted as positional arguments, not keyword arguments
+            if var.isdigit():
+                msg = (
+                    f"Invalid variable name {var!r} in f-string template. "
+                    f"Variable names cannot be all digits as they are interpreted "
+                    f"as positional arguments."
+                )
+                raise ValueError(msg)
+
     return sorted(input_variables)
 
 
diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py
@@ -376,15 +376,29 @@ def _get_key(
                 if resolved_scope in (0, False):
                     return resolved_scope
                 # Move into the scope
-                try:
-                    # Try subscripting (Normal dictionaries)
-                    resolved_scope = cast("dict[str, Any]", resolved_scope)[child]
-                except (TypeError, AttributeError):
+                if isinstance(resolved_scope, dict):
                     try:
-                        resolved_scope = getattr(resolved_scope, child)
-                    except (TypeError, AttributeError):
-                        # Try as a list
-                        resolved_scope = resolved_scope[int(child)]  # type: ignore[index]
+                        resolved_scope = resolved_scope[child]
+                    except (KeyError, TypeError):
+                        # Key not found - will be caught by outer try-except
+                        msg = f"Key {child!r} not found in dict"
+                        raise KeyError(msg) from None
+                elif isinstance(resolved_scope, (list, tuple)):
+                    try:
+                        resolved_scope = resolved_scope[int(child)]
+                    except (ValueError, IndexError, TypeError):
+                        # Invalid index - will be caught by outer try-except
+                        msg = f"Invalid index {child!r} for list/tuple"
+                        raise IndexError(msg) from None
+                else:
+                    # Reject everything else for security
+                    # This prevents traversing into arbitrary Python objects
+                    msg = (
+                        f"Cannot traverse into {type(resolved_scope).__name__}. "
+                        "Mustache templates only support dict, list, and tuple. "
+                        f"Got: {type(resolved_scope)}"
+                    )
+                    raise TypeError(msg)  # noqa: TRY301
 
             try:
                 # This allows for custom falsy data types
@@ -395,8 +409,9 @@ def _get_key(
                 if resolved_scope in (0, False):
                     return resolved_scope
                 return resolved_scope or ""
-        except (AttributeError, KeyError, IndexError, ValueError):
+        except (AttributeError, KeyError, IndexError, ValueError, TypeError):
             # We couldn't find the key in the current scope
+            # TypeError: Attempted to traverse into non-dict/list type
             # We'll try again on the next pass
             pass
 
diff --git a/libs/core/tests/unit_tests/prompts/test_chat.py b/libs/core/tests/unit_tests/prompts/test_chat.py
@@ -1236,3 +1236,164 @@ def test_dict_message_prompt_template_errors_on_jinja2() -> None:
         _ = ChatPromptTemplate.from_messages(
             [("human", [prompt])], template_format="jinja2"
         )
+
+
+def test_fstring_rejects_invalid_identifier_variable_names() -> None:
+    """Test that f-string templates block attribute access, indexing.
+
+    This validation prevents template injection attacks by blocking:
+    - Attribute access like {msg.__class__}
+    - Indexing like {msg[0]}
+    - All-digit variable names like {0} or {100} (interpreted as positional args)
+
+    While allowing any other field names that Python's Formatter accepts.
+    """
+    # Test that attribute access and indexing are blocked (security issue)
+    invalid_templates = [
+        "{msg.__class__}",  # Attribute access with dunder
+        "{msg.__class__.__name__}",  # Multiple dunders
+        "{msg.content}",  # Attribute access
+        "{msg[0]}",  # Item access
+        "{0}",  # All-digit variable name (positional argument)
+        "{100}",  # All-digit variable name (positional argument)
+        "{42}",  # All-digit variable name (positional argument)
+    ]
+
+    for template_str in invalid_templates:
+        with pytest.raises(ValueError, match="Invalid variable name") as exc_info:
+            ChatPromptTemplate.from_messages(
+                [("human", template_str)],
+                template_format="f-string",
+            )
+
+        error_msg = str(exc_info.value)
+        assert "Invalid variable name" in error_msg
+        # Check for any of the expected error message parts
+        assert (
+            "attribute access" in error_msg
+            or "indexing" in error_msg
+            or "positional arguments" in error_msg
+        )
+
+    # Valid templates - Python's Formatter accepts non-identifier field names
+    valid_templates = [
+        (
+            "Hello {name} and {user_id}",
+            {"name": "Alice", "user_id": "123"},
+            "Hello Alice and 123",
+        ),
+        ("User: {user-name}", {"user-name": "Bob"}, "User: Bob"),  # Hyphen allowed
+        (
+            "Value: {2fast}",
+            {"2fast": "Charlie"},
+            "Value: Charlie",
+        ),  # Starts with digit allowed
+        ("Data: {my var}", {"my var": "Dave"}, "Data: Dave"),  # Space allowed
+    ]
+
+    for template_str, kwargs, expected in valid_templates:
+        template = ChatPromptTemplate.from_messages(
+            [("human", template_str)],
+            template_format="f-string",
+        )
+        result = template.invoke(kwargs)
+        assert result.messages[0].content == expected  # type: ignore[attr-defined]
+
+
+def test_mustache_template_attribute_access_vulnerability() -> None:
+    """Test that Mustache template injection is blocked.
+
+    Verify the fix for security vulnerability GHSA-6qv9-48xg-fc7f
+
+    Previously, Mustache used getattr() as a fallback, allowing access to
+    dangerous attributes like __class__, __globals__, etc.
+
+    The fix adds isinstance checks that reject non-dict/list types.
+    When templates try to traverse Python objects, they get empty string
+    per Mustache spec (better than the previous behavior of exposing internals).
+    """
+    msg = HumanMessage("howdy")
+
+    # Template tries to access attributes on a Python object
+    prompt = ChatPromptTemplate.from_messages(
+        [("human", "{{question.__class__.__name__}}")],
+        template_format="mustache",
+    )
+
+    # After the fix: returns empty string (attack blocked!)
+    # Previously would return "HumanMessage" via getattr()
+    result = prompt.invoke({"question": msg})
+    assert result.messages[0].content == ""  # type: ignore[attr-defined]
+
+    # Mustache still works correctly with actual dicts
+    prompt_dict = ChatPromptTemplate.from_messages(
+        [("human", "{{person.name}}")],
+        template_format="mustache",
+    )
+    result_dict = prompt_dict.invoke({"person": {"name": "Alice"}})
+    assert result_dict.messages[0].content == "Alice"  # type: ignore[attr-defined]
+
+
+@pytest.mark.requires("jinja2")
+def test_jinja2_template_attribute_access_is_blocked() -> None:
+    """Test that Jinja2 SandboxedEnvironment blocks dangerous attribute access.
+
+    This test verifies that Jinja2's sandbox successfully blocks access to
+    dangerous dunder attributes like __class__, unlike Mustache.
+
+    GOOD: Jinja2 SandboxedEnvironment raises SecurityError when attempting
+    to access __class__, __globals__, etc. This is expected behavior.
+    """
+    msg = HumanMessage("howdy")
+
+    # Create a Jinja2 template that attempts to access __class__.__name__
+    prompt = ChatPromptTemplate.from_messages(
+        [("human", "{{question.__class__.__name__}}")],
+        template_format="jinja2",
+    )
+
+    # Jinja2 sandbox should block this with SecurityError
+    with pytest.raises(Exception, match="attribute") as exc_info:
+        prompt.invoke(
+            {"question": msg, "question.__class__.__name__": "safe_placeholder"}
+        )
+
+    # Verify it's a SecurityError from Jinja2 blocking __class__ access
+    error_msg = str(exc_info.value)
+    assert (
+        "SecurityError" in str(type(exc_info.value))
+        or "access to attribute '__class__'" in error_msg
+    ), f"Expected SecurityError blocking __class__, got: {error_msg}"
+
+
+@pytest.mark.requires("jinja2")
+def test_jinja2_blocks_all_attribute_access() -> None:
+    """Test that Jinja2 now blocks ALL attribute/method access for security.
+
+    After the fix, Jinja2 uses _RestrictedSandboxedEnvironment which blocks
+    ALL attribute access, not just dunder attributes. This prevents the
+    parse_raw() vulnerability.
+    """
+    msg = HumanMessage("test content")
+
+    # Test 1: Simple variable access should still work
+    prompt_simple = ChatPromptTemplate.from_messages(
+        [("human", "Message: {{message}}")],
+        template_format="jinja2",
+    )
+    result = prompt_simple.invoke({"message": "hello world"})
+    assert "hello world" in result.messages[0].content  # type: ignore[attr-defined]
+
+    # Test 2: Attribute access should now be blocked (including safe attributes)
+    prompt_attr = ChatPromptTemplate.from_messages(
+        [("human", "Content: {{msg.content}}")],
+        template_format="jinja2",
+    )
+    with pytest.raises(Exception, match="attribute") as exc_info:
+        prompt_attr.invoke({"msg": msg})
+
+    error_msg = str(exc_info.value)
+    assert (
+        "SecurityError" in str(type(exc_info.value))
+        or "Access to attributes is not allowed" in error_msg
+    ), f"Expected SecurityError blocking attribute access, got: {error_msg}"