Skip to content

Commit 507a9ee

Browse files
authored
Merge pull request #2 from warestack/feat/structured-output-feasibility-agent
Implement Structured Output for FeasibilityAgent
2 parents dd7c000 + 1636094 commit 507a9ee

File tree

20 files changed

+633
-178
lines changed

20 files changed

+633
-178
lines changed

.github/workflows/tests.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: ["3.12"]
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Install uv
20+
uses: astral-sh/setup-uv@v3
21+
with:
22+
version: "latest"
23+
24+
- name: Set up Python ${{ matrix.python-version }}
25+
run: uv python install ${{ matrix.python-version }}
26+
27+
- name: Install dependencies
28+
run: uv sync --all-extras
29+
30+
- name: Run all tests
31+
run: |
32+
echo "Running unit tests..."
33+
uv run pytest tests/unit/ -v --tb=short
34+
echo "Running integration tests (mocked - no real API calls)..."
35+
uv run pytest tests/integration/ -v --tb=short
36+
37+
- name: Upload coverage reports
38+
uses: codecov/codecov-action@v4
39+
if: matrix.python-version == '3.12'
40+
with:
41+
file: ./coverage.xml
42+
fail_ci_if_error: false

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ PLANNING.md
174174
.pdm-build/
175175
.ruff_cache/
176176
.vscode/
177+
.kiro
177178

178179
# Copilot
179180
.github/instructions/

README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,48 @@ rules:
113113
2. **Try acknowledgment workflow**: Comment `@watchflow acknowledge` when rules are violated
114114
3. **Verify rule enforcement**: Check that blocking rules prevent merging
115115

116+
## 🧪 Testing
117+
118+
The project includes comprehensive tests that run **without making real API calls** by default:
119+
120+
### Running Tests
121+
122+
```bash
123+
# Run all tests (mocked - no API costs)
124+
pytest
125+
126+
# Run only unit tests (very fast)
127+
pytest tests/unit/
128+
129+
# Run only integration tests (mocked)
130+
pytest tests/integration/
131+
```
132+
133+
### Test Structure
134+
135+
```
136+
tests/
137+
├── unit/ # ⚡ Fast unit tests (mocked OpenAI)
138+
│ └── test_feasibility_agent.py
139+
└── integration/ # 🌐 Full HTTP stack tests (mocked OpenAI)
140+
└── test_rules_api.py
141+
```
142+
143+
### Real API Testing (Local Development Only)
144+
145+
If you want to test with **real OpenAI API calls** locally:
146+
147+
```bash
148+
# Set environment variables
149+
export OPENAI_API_KEY="your-api-key"
150+
export INTEGRATION_TEST_REAL_API=true
151+
152+
# Run integration tests with real API calls (costs money!)
153+
pytest tests/integration/ -m integration
154+
```
155+
156+
**⚠️ Warning:** Real API tests make actual OpenAI calls and will cost money. They're disabled by default in CI/CD.
157+
116158
## Configuration
117159

118160
For advanced configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,13 +200,14 @@ python_functions = ["test_*"]
200200
addopts = [
201201
"--strict-markers",
202202
"--strict-config",
203-
"--cov=backend",
203+
"--cov=src",
204204
"--cov-report=term-missing",
205205
"--cov-report=html",
206206
"--cov-report=xml",
207207
]
208208
asyncio_mode = "auto"
209209

210+
210211
[tool.coverage.run]
211212
source = ["backend"]
212213
omit = [

src/agents/feasibility_agent/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,5 @@
66
"""
77

88
from .agent import RuleFeasibilityAgent
9-
from .models import FeasibilityResult
109

11-
__all__ = ["RuleFeasibilityAgent", "FeasibilityResult"]
10+
__all__ = ["RuleFeasibilityAgent"]

src/agents/feasibility_agent/agent.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from src.agents.base import AgentResult, BaseAgent
1010

11-
from .models import FeasibilityResult, FeasibilityState
11+
from .models import FeasibilityState
1212
from .nodes import analyze_rule_feasibility, generate_yaml_config
1313

1414
logger = logging.getLogger(__name__)
@@ -27,51 +27,53 @@ def _build_graph(self) -> StateGraph:
2727
workflow.add_node("analyze_feasibility", analyze_rule_feasibility)
2828
workflow.add_node("generate_yaml", generate_yaml_config)
2929

30-
# Add edges
30+
# Add edges with conditional logic
3131
workflow.add_edge(START, "analyze_feasibility")
32-
workflow.add_edge("analyze_feasibility", "generate_yaml")
32+
33+
# Conditional edge: only generate YAML if feasible
34+
workflow.add_conditional_edges(
35+
"analyze_feasibility",
36+
lambda state: "generate_yaml" if state.is_feasible else END,
37+
{"generate_yaml": "generate_yaml", END: END},
38+
)
39+
3340
workflow.add_edge("generate_yaml", END)
3441

42+
logger.info("🔧 FeasibilityAgent graph built with conditional structured output workflow")
3543
return workflow.compile()
3644

3745
async def execute(self, rule_description: str) -> AgentResult:
3846
"""
3947
Check if a rule description is feasible and return YAML or feedback.
4048
"""
4149
try:
50+
logger.info(f"🚀 Starting feasibility analysis for rule: {rule_description[:100]}...")
51+
4252
# Prepare initial state
4353
initial_state = FeasibilityState(rule_description=rule_description)
4454

4555
# Run the graph
4656
result = await self.graph.ainvoke(initial_state)
4757

58+
# Convert dict result back to FeasibilityState if needed
59+
if isinstance(result, dict):
60+
result = FeasibilityState(**result)
61+
62+
logger.info(f"✅ Feasibility analysis completed: feasible={result.is_feasible}, type={result.rule_type}")
63+
4864
# Convert to AgentResult
4965
return AgentResult(
50-
success=result.get("is_feasible", False),
51-
message=result.get("feedback", ""),
66+
success=result.is_feasible,
67+
message=result.feedback,
5268
data={
53-
"is_feasible": result.get("is_feasible", False),
54-
"yaml_content": result.get("yaml_content", ""),
55-
"confidence_score": result.get("confidence_score", 0.0),
56-
"rule_type": result.get("rule_type", ""),
57-
"analysis_steps": result.get("analysis_steps", []),
69+
"is_feasible": result.is_feasible,
70+
"yaml_content": result.yaml_content,
71+
"confidence_score": result.confidence_score,
72+
"rule_type": result.rule_type,
73+
"analysis_steps": result.analysis_steps,
5874
},
5975
)
6076

6177
except Exception as e:
62-
logger.error(f"Error in rule feasibility check: {e}")
78+
logger.error(f"Error in rule feasibility check: {e}")
6379
return AgentResult(success=False, message=f"Feasibility check failed: {str(e)}", data={})
64-
65-
async def check_feasibility(self, rule_description: str) -> FeasibilityResult:
66-
"""
67-
Legacy method for backwards compatibility.
68-
"""
69-
result = await self.execute(rule_description)
70-
71-
return FeasibilityResult(
72-
is_feasible=result.data.get("is_feasible", False),
73-
yaml_content=result.data.get("yaml_content", ""),
74-
feedback=result.message,
75-
confidence_score=result.data.get("confidence_score"),
76-
rule_type=result.data.get("rule_type"),
77-
)

src/agents/feasibility_agent/models.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,20 @@
55
from pydantic import BaseModel, Field
66

77

8-
class FeasibilityResult(BaseModel):
9-
"""Result of checking if a rule is feasible."""
10-
11-
is_feasible: bool
12-
yaml_content: str
13-
feedback: str
14-
confidence_score: float | None = None
15-
rule_type: str | None = None
8+
class FeasibilityAnalysis(BaseModel):
9+
"""Structured output model for rule feasibility analysis."""
10+
11+
is_feasible: bool = Field(description="Whether the rule is feasible to implement with Watchflow")
12+
rule_type: str = Field(description="Type of rule (time_restriction, branch_pattern, title_pattern, etc.)")
13+
confidence_score: float = Field(description="Confidence score from 0.0 to 1.0", ge=0.0, le=1.0)
14+
feedback: str = Field(description="Detailed feedback on implementation considerations")
15+
analysis_steps: list[str] = Field(description="Step-by-step analysis breakdown", default_factory=list)
16+
17+
18+
class YamlGeneration(BaseModel):
19+
"""Structured output model for YAML configuration generation."""
20+
21+
yaml_content: str = Field(description="Generated Watchflow YAML rule configuration")
1622

1723

1824
class FeasibilityState(BaseModel):

src/agents/feasibility_agent/nodes.py

Lines changed: 38 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2,118 +2,94 @@
22
LangGraph nodes for the Rule Feasibility Agent.
33
"""
44

5-
import json
65
import logging
76

87
from langchain_openai import ChatOpenAI
98

109
from src.core.config import config
1110

12-
from .models import FeasibilityState
11+
from .models import FeasibilityAnalysis, FeasibilityState, YamlGeneration
1312
from .prompts import RULE_FEASIBILITY_PROMPT, YAML_GENERATION_PROMPT
1413

1514
logger = logging.getLogger(__name__)
1615

1716

18-
def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState:
17+
async def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState:
1918
"""
20-
Analyze whether a rule description is feasible to implement.
19+
Analyze whether a rule description is feasible to implement using structured output.
2120
"""
2221
try:
23-
# Create LLM client directly using centralized config
22+
# Create LLM client with structured output
2423
llm = ChatOpenAI(
2524
api_key=config.ai.api_key,
2625
model=config.ai.model,
2726
max_tokens=config.ai.max_tokens,
2827
temperature=config.ai.temperature,
2928
)
3029

30+
# Use structured output instead of manual JSON parsing
31+
structured_llm = llm.with_structured_output(FeasibilityAnalysis)
32+
3133
# Analyze rule feasibility
3234
prompt = RULE_FEASIBILITY_PROMPT.format(rule_description=state.rule_description)
3335

34-
response = llm.invoke(prompt)
35-
36-
# Log the raw response for debugging
37-
logger.info(f"Raw LLM response: {response.content}")
38-
39-
# Check if response is empty
40-
if not response.content or response.content.strip() == "":
41-
logger.error("LLM returned empty response")
42-
state.is_feasible = False
43-
state.feedback = "Analysis failed: LLM returned empty response"
44-
return state
45-
46-
# Try to parse JSON with better error handling
47-
try:
48-
result = json.loads(response.content.strip())
49-
except json.JSONDecodeError as json_error:
50-
logger.error(f"Failed to parse JSON response: {json_error}")
51-
logger.error(f"Response content: {response.content}")
52-
53-
# Try to extract JSON from markdown code blocks if present
54-
content = response.content.strip()
55-
if content.startswith("```json"):
56-
content = content[7:] # Remove ```json
57-
elif content.startswith("```"):
58-
content = content[3:] # Remove ```
59-
if content.endswith("```"):
60-
content = content[:-3] # Remove trailing ```
61-
62-
try:
63-
result = json.loads(content.strip())
64-
logger.info("Successfully extracted JSON from markdown code blocks")
65-
except json.JSONDecodeError:
66-
# If all parsing attempts fail, set default values
67-
logger.error("All JSON parsing attempts failed")
68-
state.is_feasible = False
69-
state.feedback = (
70-
f"Analysis failed: Could not parse LLM response as JSON. Raw response: {response.content[:200]}..."
71-
)
72-
return state
73-
74-
# Update state with analysis results
75-
state.is_feasible = result.get("is_feasible", False)
76-
state.rule_type = result.get("rule_type", "")
77-
state.confidence_score = result.get("confidence_score", 0.0)
78-
state.yaml_content = result.get("yaml_content", "")
79-
state.feedback = result.get("feedback", "")
80-
state.analysis_steps = result.get("analysis_steps", [])
81-
82-
logger.info(f"Rule feasibility analysis completed: {state.is_feasible}")
36+
# Get structured response - no more JSON parsing needed!
37+
result = await structured_llm.ainvoke(prompt)
38+
39+
# Update state with analysis results - now type-safe!
40+
state.is_feasible = result.is_feasible
41+
state.rule_type = result.rule_type
42+
state.confidence_score = result.confidence_score
43+
state.feedback = result.feedback
44+
state.analysis_steps = result.analysis_steps
45+
46+
logger.info(f"🔍 Rule feasibility analysis completed: {state.is_feasible}")
47+
logger.info(f"🔍 Rule type identified: {state.rule_type}")
48+
logger.info(f"🔍 Confidence score: {state.confidence_score}")
8349

8450
except Exception as e:
85-
logger.error(f"Error in rule feasibility analysis: {e}")
51+
logger.error(f"Error in rule feasibility analysis: {e}")
8652
state.is_feasible = False
8753
state.feedback = f"Analysis failed: {str(e)}"
54+
state.confidence_score = 0.0
8855

8956
return state
9057

9158

92-
def generate_yaml_config(state: FeasibilityState) -> FeasibilityState:
59+
async def generate_yaml_config(state: FeasibilityState) -> FeasibilityState:
9360
"""
94-
Generate YAML configuration for feasible rules.
61+
Generate YAML configuration for feasible rules using structured output.
62+
This node only runs if the rule is feasible.
9563
"""
9664
if not state.is_feasible or not state.rule_type:
65+
logger.info("🔧 Skipping YAML generation - rule not feasible or no rule type")
9766
return state
9867

9968
try:
100-
# Create LLM client directly using centralized config
69+
# Create LLM client with structured output
10170
llm = ChatOpenAI(
10271
api_key=config.ai.api_key,
10372
model=config.ai.model,
10473
max_tokens=config.ai.max_tokens,
10574
temperature=config.ai.temperature,
10675
)
10776

77+
# Use structured output for YAML generation
78+
structured_llm = llm.with_structured_output(YamlGeneration)
79+
10880
prompt = YAML_GENERATION_PROMPT.format(rule_type=state.rule_type, rule_description=state.rule_description)
10981

110-
response = llm.invoke(prompt)
111-
state.yaml_content = response.content.strip()
82+
# Get structured response
83+
result = await structured_llm.ainvoke(prompt)
84+
85+
# Update state with generated YAML
86+
state.yaml_content = result.yaml_content.strip()
11287

113-
logger.info(f"YAML configuration generated for rule type: {state.rule_type}")
88+
logger.info(f"🔧 YAML configuration generated for rule type: {state.rule_type}")
89+
logger.info(f"🔧 Generated YAML length: {len(state.yaml_content)} characters")
11490

11591
except Exception as e:
116-
logger.error(f"Error generating YAML configuration: {e}")
92+
logger.error(f"Error generating YAML configuration: {e}")
11793
state.feedback += f"\nYAML generation failed: {str(e)}"
11894

11995
return state

0 commit comments

Comments
 (0)