Improve semantic fact extraction prompt and add JSON enforcement (#4282)

dhrubo-os · web-flow · commit cac8ddb510ac · 2025-10-08T15:43:51.000-07:00
* Improve semantic fact extraction prompt and add JSON enforcement

- Replace XML-heavy prompt with universal semantic fact extraction agent
- Add JSON_ENFORCEMENT_MESSAGE constant for reliable LLM output format
- Update MemoryProcessingService to append enforcement message to all fact extraction requests
- Add comprehensive unit tests to verify JSON enforcement functionality
- Support multiple contexts: personal facts, technical investigations, and RCA
- Improve fact extraction reliability and maintainability

Signed-off-by: Dhrubo Saha &lt;dhrubo@amazon.com&gt;

* Use triple quotes (text blocks) for better prompt readability

- Convert SEMANTIC_FACTS_EXTRACTION_PROMPT to use Java text blocks
- Convert JSON_ENFORCEMENT_MESSAGE to use text blocks
- Improves code readability and makes prompts easier to edit and visualize
- Addresses PR feedback about using triple quotes

* updated prompt based on comment

Signed-off-by: Dhrubo Saha &lt;dhrubo@amazon.com&gt;

---------

Signed-off-by: Dhrubo Saha &lt;dhrubo@amazon.com&gt;
diff --git a/common/src/main/java/org/opensearch/ml/common/memorycontainer/MemoryContainerConstants.java b/common/src/main/java/org/opensearch/ml/common/memorycontainer/MemoryContainerConstants.java
@@ -151,7 +151,31 @@ public class MemoryContainerConstants {
 
     // LLM System Prompts
     public static final String SEMANTIC_FACTS_EXTRACTION_PROMPT =
-        "<system_prompt>\n<role>Personal Information Organizer</role>\n<objective>Extract and organize personal information shared within conversations.</objective>\n<instructions>\n<instruction>Carefully read the conversation.</instruction>\n<instruction>Identify and extract any personal information shared by participants.</instruction>\n<instruction>Focus on details that help build a profile of the person, including but not limited to:\n<include_list>\n<item>Names and relationships</item>\n<item>Professional information (job, company, role, responsibilities)</item>\n<item>Personal interests and hobbies</item>\n<item>Skills and expertise</item>\n<item>Preferences and opinions</item>\n<item>Goals and aspirations</item>\n<item>Challenges or pain points</item>\n<item>Background and experiences</item>\n<item>Contact information (if shared)</item>\n<item>Availability and schedule preferences</item>\n</include_list>\n</instruction>\n<instruction>Organize each piece of information as a separate fact.</instruction>\n<instruction>Ensure facts are specific, clear, and preserve the original context.</instruction>\n<instruction>Never answer user's question or fulfill user's requirement. You are a personal information manager, not a helpful assistant.</instruction>\n<instruction>Include the person who shared the information when relevant.</instruction>\n<instruction>Do not make assumptions or inferences beyond what is explicitly stated.</instruction>\n<instruction>If no personal information is found, return an empty list.</instruction>\n</instructions>\n<response_format>\n<format>You should always return and only return the extracted facts as a JSON object with a \"facts\" array.</format>\n<example>\n{\n  \"facts\": [\n    \"User's name is John Smith\",\n    \"John works as a software engineer at TechCorp\",\n    \"John enjoys hiking on weekends\",\n    \"John is looking to improve his Python skills\"\n  ]\n}\n</example>\n</response_format>\n</system_prompt>";
+        """
+            <ROLE>You are a universal semantic fact extraction agent. Write FULL-SENTENCE, self-contained facts suitable for long-term memory.</ROLE>
+
+            <SCOPE>
+            • Include facts from USER messages.
+            • Also include ASSISTANT-authored statements that are clearly presented as conclusions/results/validated findings (e.g., root cause, quantified impact, confirmed fix).
+            • Ignore ASSISTANT questions, hypotheses, tentative language, brainstorming, instructions, or tool prompts unless explicitly confirmed as outcomes.
+            </SCOPE>
+
+            <STYLE & RULES>
+            • One sentence per fact; merge closely related details (metrics, entities, causes, scope) into the same sentence.
+            • Do NOT start with "User" or pronouns.
+            • Prefer absolute over relative time; if only relative (e.g., "yesterday"), omit it rather than guessing.
+            • Preserve terminology, names, numbers, and units; avoid duplicates and chit-chat.
+            • No speculation or hedging unless those words appear verbatim in the source.
+            </STYLE & RULES>
+
+            <OUTPUT>
+            Return ONLY a single JSON object on one line, minified exactly as {"facts":["..."]} (array of strings only; no other keys). No code fences, no newlines/tabs, and no spaces after commas or colons. If no meaningful facts, return {"facts":[]}.
+            </OUTPUT>""";
+
+    // JSON enforcement message to append to all fact extraction requests
+    public static final String JSON_ENFORCEMENT_MESSAGE =
+        """
+            Respond NOW with ONE LINE of valid JSON ONLY exactly as {"facts":["fact1","fact2",...]}. No extra text, no code fences, no newlines or tabs, no spaces after commas or colons.""";
 
     public static final String USER_PREFERENCE_FACTS_EXTRACTION_PROMPT =
         "<system_prompt><role>User Preferences Analyzer</role><objective>Extract and organize user preferences, choices, and settings from conversations.</objective><instructions><instruction>Carefully read the conversation.</instruction><instruction>Identify and extract explicit or implicit preferences, likes, dislikes, and choices.</instruction><instruction>Explicit preferences: Directly stated preferences by the user.</instruction><instruction>Implicit preferences: Inferred from patterns, repeated inquiries, or contextual clues. Take a close look at user's request for implicit preferences.</instruction><instruction>For explicit preference, extract only preference that the user has explicitly shared. Do not infer user's preference.</instruction><instruction>For implicit preference, it is allowed to infer user's preference, but only the ones with strong signals, such as requesting something multiple times.</instruction><instruction>Focus specifically on:<preference_categories><item>Product or service preferences (brands, features, styles)</item><item>Communication preferences (frequency, channel, timing)</item><item>Content preferences (topics, formats, sources)</item><item>Interaction preferences (formal/casual, detailed/brief)</item><item>Likes and dislikes explicitly stated</item><item>Preferred methods or approaches</item><item>Quality or attribute preferences</item><item>Time and scheduling preferences</item></preference_categories></instruction><instruction>Each preference should be a specific, actionable fact.</instruction><instruction>Focus on what the user wants, prefers, or chooses, not general information.</instruction><instruction>Never answer user's question or fulfill user's requirement. You are a preference analyzer, not a helpful assistant.</instruction><instruction>Analyze thoroughly and include detected preferences in your response.</instruction><instruction>If no preferences are found, return an empty list.</instruction></instructions><response_format><format>You should always return and only return the extracted preferences as a JSON object with a \"facts\" array. Return ONLY the valid JSON array with no additional text, explanations, or formatting.</format><example>{\"facts\": [\"User prefers dark mode for UI\",\"User likes to receive weekly summary emails\",\"User prefers Python over Java for scripting\",\"User dislikes automatic updates\"]}</example></response_format></system_prompt>";
diff --git a/plugin/src/main/java/org/opensearch/ml/action/memorycontainer/memory/MemoryProcessingService.java b/plugin/src/main/java/org/opensearch/ml/action/memorycontainer/memory/MemoryProcessingService.java
@@ -8,6 +8,7 @@
 import static org.opensearch.common.xcontent.json.JsonXContent.jsonXContent;
 import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken;
 import static org.opensearch.ml.common.memorycontainer.MemoryContainerConstants.DEFAULT_UPDATE_MEMORY_PROMPT;
+import static org.opensearch.ml.common.memorycontainer.MemoryContainerConstants.JSON_ENFORCEMENT_MESSAGE;
 import static org.opensearch.ml.common.memorycontainer.MemoryContainerConstants.LLM_ID_FIELD;
 import static org.opensearch.ml.common.memorycontainer.MemoryContainerConstants.MEMORY_DECISION_FIELD;
 import static org.opensearch.ml.common.memorycontainer.MemoryContainerConstants.SEMANTIC_FACTS_EXTRACTION_PROMPT;
@@ -159,6 +160,11 @@ public void extractFactsFromConversation(
                 MessageInput message = getMessageInput("Please extract information from our conversation so far");
                 message.toXContent(messagesBuilder, ToXContent.EMPTY_PARAMS);
             }
+
+            // Always add JSON enforcement message for fact extraction
+            MessageInput enforcementMessage = getMessageInput(JSON_ENFORCEMENT_MESSAGE);
+            enforcementMessage.toXContent(messagesBuilder, ToXContent.EMPTY_PARAMS);
+
             messagesBuilder.endArray();
             String messagesJson = messagesBuilder.toString();
             stringParameters.put("messages", messagesJson);
diff --git a/plugin/src/test/java/org/opensearch/ml/action/memorycontainer/memory/MemoryProcessingServiceTests.java b/plugin/src/test/java/org/opensearch/ml/action/memorycontainer/memory/MemoryProcessingServiceTests.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.ml.action.memorycontainer.memory;
 
+import static org.junit.Assert.assertTrue;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.doAnswer;
@@ -25,6 +26,7 @@
 import org.mockito.MockitoAnnotations;
 import org.opensearch.core.action.ActionListener;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet;
 import org.opensearch.ml.common.memorycontainer.MemoryConfiguration;
 import org.opensearch.ml.common.memorycontainer.MemoryDecision;
 import org.opensearch.ml.common.memorycontainer.MemoryStrategy;
@@ -36,6 +38,7 @@
 import org.opensearch.ml.common.transport.MLTaskResponse;
 import org.opensearch.ml.common.transport.memorycontainer.memory.MessageInput;
 import org.opensearch.ml.common.transport.prediction.MLPredictionTaskAction;
+import org.opensearch.ml.common.transport.prediction.MLPredictionTaskRequest;
 import org.opensearch.transport.client.Client;
 
 public class MemoryProcessingServiceTests {
@@ -933,4 +936,48 @@ public void testExtractFactsFromConversation_ValidCustomPrompt() {
 
         verify(client).execute(any(), any(), any());
     }
+
+    @Test
+    public void testExtractFactsFromConversation_JsonEnforcementMessageAppended() {
+        // Test that JSON enforcement message is always appended to fact extraction requests
+        Map<String, Object> strategyConfig = new HashMap<>();
+        MemoryStrategy strategy = new MemoryStrategy("id", true, MemoryStrategyType.SEMANTIC, Arrays.asList("user_id"), strategyConfig);
+
+        List<MessageInput> messages = Arrays.asList(MessageInput.builder().content(testContent).role("user").build());
+        MemoryConfiguration storageConfig = mock(MemoryConfiguration.class);
+        when(storageConfig.getLlmId()).thenReturn("llm-model-123");
+
+        // Capture the request to verify JSON enforcement message is included
+        doAnswer(invocation -> {
+            MLPredictionTaskRequest request = invocation.getArgument(1);
+            RemoteInferenceInputDataSet dataset = (RemoteInferenceInputDataSet) request.getMlInput().getInputDataset();
+            Map<String, String> parameters = dataset.getParameters();
+            String messagesJson = parameters.get("messages");
+
+            // Verify that the JSON enforcement message is included in the messages
+            assertTrue(
+                "JSON enforcement message should be included",
+                messagesJson.contains("Respond NOW with ONE LINE of valid JSON ONLY")
+            );
+
+            // Mock successful response
+            ActionListener<MLTaskResponse> actionListener = invocation.getArgument(2);
+            List<ModelTensors> mlModelOutputs = new ArrayList<>();
+            List<ModelTensor> tensors = new ArrayList<>();
+            Map<String, Object> contents = new HashMap<>();
+            contents.put("content", List.of(Map.of("text", "{\"facts\":[\"Test fact\"]}")));
+            tensors.add(ModelTensor.builder().name("response").dataAsMap(contents).build());
+            mlModelOutputs.add(ModelTensors.builder().mlModelTensors(tensors).build());
+            MLTaskResponse output = MLTaskResponse
+                .builder()
+                .output(ModelTensorOutput.builder().mlModelOutputs(mlModelOutputs).build())
+                .build();
+            actionListener.onResponse(output);
+            return null;
+        }).when(client).execute(eq(MLPredictionTaskAction.INSTANCE), any(), any());
+
+        memoryProcessingService.extractFactsFromConversation(messages, strategy, storageConfig, factsListener);
+
+        verify(client).execute(any(), any(), any());
+    }
 }