Expose message history limit for PER Agent (#4016)

pyek-bot · dhrubo-os · web-flow · commit a39dd33259d3 · 2025-08-03T20:07:39.000-07:00
* feat: expose message history limit

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;

* spotless

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;

* feat: add test case for message history limit

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;

* spotless

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;

* chore: add comment about history limit context

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;

---------

Signed-off-by: Pavan Yekbote &lt;pybot@amazon.com&gt;
Co-authored-by: Dhrubo Saha &lt;dhrubo@amazon.com&gt;
diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/agent/MLPlanExecuteAndReflectAgentRunner.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/agent/MLPlanExecuteAndReflectAgentRunner.java
@@ -107,7 +107,6 @@ public class MLPlanExecuteAndReflectAgentRunner implements MLAgentRunner {
         "You are a dedicated helper agent working as part of a plan‑execute‑reflect framework. Your role is to receive a discrete task, execute all necessary internal reasoning or tool calls, and return a single, final response that fully addresses the task. You must never return an empty response. If you are unable to complete the task or retrieve meaningful information, you must respond with a clear explanation of the issue or what was missing. Under no circumstances should you end your reply with a question or ask for more information. If you search any index, always include the raw documents in the final result instead of summarizing the content. This is critical to give visibility into what the query retrieved.";
     private static final String DEFAULT_NO_ESCAPE_PARAMS = "tool_configs,_tools";
     private static final String DEFAULT_MAX_STEPS_EXECUTED = "20";
-    private static final int DEFAULT_MESSAGE_HISTORY_LIMIT = 10;
     private static final String DEFAULT_REACT_MAX_ITERATIONS = "20";
 
     // fields
@@ -138,6 +137,16 @@ public class MLPlanExecuteAndReflectAgentRunner implements MLAgentRunner {
     public static final String REFLECT_PROMPT_TEMPLATE_FIELD = "reflect_prompt_template";
     public static final String PLANNER_WITH_HISTORY_TEMPLATE_FIELD = "planner_with_history_template";
     public static final String EXECUTOR_MAX_ITERATIONS_FIELD = "executor_max_iterations";
+
+    // controls how many messages (last x) from planner memory are passed as context during planning phase
+    // these messages are added as completed steps in the reflect prompt
+    public static final String PLANNER_MESSAGE_HISTORY_LIMIT = "message_history_limit";
+    private static final String DEFAULT_MESSAGE_HISTORY_LIMIT = "10";
+
+    // controls how many messages from executor memory are passed as context during step execution
+    public static final String EXECUTOR_MESSAGE_HISTORY_LIMIT = "executor_message_history_limit";
+    private static final String DEFAULT_EXECUTOR_MESSAGE_HISTORY_LIMIT = "10";
+
     public static final String INJECT_DATETIME_FIELD = "inject_datetime";
     public static final String DATETIME_FORMAT_FIELD = "datetime_format";
 
@@ -271,7 +280,7 @@ public void run(MLAgent mlAgent, Map<String, String> apiParams, ActionListener<O
         String memoryId = allParams.get(MEMORY_ID_FIELD);
         String memoryType = mlAgent.getMemory().getType();
         String appType = mlAgent.getAppType();
-        int messageHistoryLimit = DEFAULT_MESSAGE_HISTORY_LIMIT;
+        int messageHistoryLimit = Integer.parseInt(allParams.getOrDefault(PLANNER_MESSAGE_HISTORY_LIMIT, DEFAULT_MESSAGE_HISTORY_LIMIT));
 
         // todo: use chat history instead of completed steps
         ConversationIndexMemory.Factory conversationIndexMemoryFactory = (ConversationIndexMemory.Factory) memoryFactoryMap.get(memoryType);
@@ -417,6 +426,11 @@ private void executePlanningLoop(
                 reactParams.put(SYSTEM_PROMPT_FIELD, allParams.getOrDefault(EXECUTOR_SYSTEM_PROMPT_FIELD, DEFAULT_EXECUTOR_SYSTEM_PROMPT));
                 reactParams.put(LLM_RESPONSE_FILTER, allParams.get(LLM_RESPONSE_FILTER));
                 reactParams.put(MAX_ITERATION, allParams.getOrDefault(EXECUTOR_MAX_ITERATIONS_FIELD, DEFAULT_REACT_MAX_ITERATIONS));
+                reactParams
+                    .put(
+                        MLAgentExecutor.MESSAGE_HISTORY_LIMIT,
+                        allParams.getOrDefault(EXECUTOR_MESSAGE_HISTORY_LIMIT, DEFAULT_EXECUTOR_MESSAGE_HISTORY_LIMIT)
+                    );
 
                 AgentMLInput agentInput = AgentMLInput
                     .AgentMLInputBuilder()
diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/agent/MLPlanExecuteAndReflectAgentRunnerTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/agent/MLPlanExecuteAndReflectAgentRunnerTest.java
@@ -47,6 +47,8 @@
 import org.opensearch.ml.common.agent.MLMemorySpec;
 import org.opensearch.ml.common.agent.MLToolSpec;
 import org.opensearch.ml.common.conversation.Interaction;
+import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet;
+import org.opensearch.ml.common.input.execute.agent.AgentMLInput;
 import org.opensearch.ml.common.output.model.ModelTensor;
 import org.opensearch.ml.common.output.model.ModelTensorOutput;
 import org.opensearch.ml.common.output.model.ModelTensors;
@@ -327,6 +329,58 @@ public void testExecutionWithHistory() {
         assertEquals("final result", responseTensor.getDataAsMap().get("response"));
     }
 
+    @Test
+    public void testMessageHistoryLimits() {
+        MLAgent mlAgent = createMLAgentWithTools();
+
+        doAnswer(invocation -> {
+            ActionListener<Object> listener = invocation.getArgument(2);
+            ModelTensor modelTensor = ModelTensor
+                .builder()
+                .dataAsMap(ImmutableMap.of("response", "{\"steps\":[\"step1\"], \"result\":\"\"}"))
+                .build();
+            ModelTensors modelTensors = ModelTensors.builder().mlModelTensors(Arrays.asList(modelTensor)).build();
+            ModelTensorOutput mlModelTensorOutput = ModelTensorOutput.builder().mlModelOutputs(Arrays.asList(modelTensors)).build();
+            when(mlTaskResponse.getOutput()).thenReturn(mlModelTensorOutput);
+            listener.onResponse(mlTaskResponse);
+            return null;
+        }).when(client).execute(eq(MLPredictionTaskAction.INSTANCE), any(MLPredictionTaskRequest.class), any());
+
+        doAnswer(invocation -> {
+            ActionListener<Object> listener = invocation.getArgument(1);
+            ModelTensor modelTensor = ModelTensor.builder().dataAsMap(ImmutableMap.of("response", "tool execution result")).build();
+            ModelTensors modelTensors = ModelTensors.builder().mlModelTensors(Arrays.asList(modelTensor)).build();
+            ModelTensorOutput mlModelTensorOutput = ModelTensorOutput.builder().mlModelOutputs(Arrays.asList(modelTensors)).build();
+            when(mlExecuteTaskResponse.getOutput()).thenReturn(mlModelTensorOutput);
+            listener.onResponse(mlExecuteTaskResponse);
+            return null;
+        }).when(client).execute(eq(MLExecuteTaskAction.INSTANCE), any(MLExecuteTaskRequest.class), any());
+
+        doAnswer(invocation -> {
+            ActionListener<UpdateResponse> listener = invocation.getArgument(2);
+            listener.onResponse(updateResponse);
+            return null;
+        }).when(mlMemoryManager).updateInteraction(any(), any(), any());
+
+        Map<String, String> params = new HashMap<>();
+        params.put("question", "test question");
+        params.put("memory_id", "test_memory_id");
+        params.put("parent_interaction_id", "test_parent_interaction_id");
+        params.put("message_history_limit", "5");
+        params.put("executor_message_history_limit", "3");
+        mlPlanExecuteAndReflectAgentRunner.run(mlAgent, params, agentActionListener);
+
+        verify(conversationIndexMemory).getMessages(any(), eq(5));
+
+        ArgumentCaptor<MLExecuteTaskRequest> executeCaptor = ArgumentCaptor.forClass(MLExecuteTaskRequest.class);
+        verify(client).execute(eq(MLExecuteTaskAction.INSTANCE), executeCaptor.capture(), any());
+
+        AgentMLInput agentInput = (AgentMLInput) executeCaptor.getValue().getInput();
+        RemoteInferenceInputDataSet dataset = (RemoteInferenceInputDataSet) agentInput.getInputDataset();
+        Map<String, String> executorParams = dataset.getParameters();
+        assertEquals("3", executorParams.get("message_history_limit"));
+    }
+
     // ToDo: add test case for when max steps is reached
 
     private MLAgent createMLAgentWithTools() {