Skip to content

Commit 4031867

Browse files
committed
Batch Ailly calls from Lliam
Rather than make one Ailly call, this batches them into groups of 150 policies and calls Ailly on each batch. Also change the default prefix from [DEFAULT] to DEFAULT. The former was causing build failures on the TCA side.
1 parent 52402b2 commit 4031867

File tree

5 files changed

+109
-25
lines changed

5 files changed

+109
-25
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ __pycache__
77
build/
88
dist/
99
.ailly_iam_policy
10+
*.log

aws_doc_sdk_examples_tools/agent/bin/main.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,35 @@
11
from pathlib import Path
22
from subprocess import run
33
from typing import List
4+
import time
5+
from datetime import timedelta, datetime
46

7+
import logging
58
import typer
69

710
from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
811
from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
912
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
1013
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
1114

15+
logging.basicConfig(level=logging.INFO, filename=f"lliam-run-{datetime.now()}.log", filemode="w")
16+
logger = logging.getLogger(__name__)
17+
1218
app = typer.Typer()
1319

1420
AILLY_DIR = ".ailly_iam_policy"
1521
AILLY_DIR_PATH = Path(AILLY_DIR)
1622
IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"
1723

1824

25+
def format_duration(seconds: float) -> str:
26+
"""Format duration in seconds to hours:minutes:seconds format."""
27+
td = timedelta(seconds=seconds)
28+
hours, remainder = divmod(td.total_seconds(), 3600)
29+
minutes, seconds = divmod(remainder, 60)
30+
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
31+
32+
1933
@app.command()
2034
def update(
2135
iam_tributary_root: str,
@@ -34,8 +48,51 @@ def update(
3448
out_dir=AILLY_DIR_PATH,
3549
language="IAMPolicyGrammar",
3650
)
37-
run(["npx @ailly/cli@1.7.0-rc1", "--root", AILLY_DIR])
3851

52+
# Find all batch directories
53+
batch_dirs = [
54+
d.name
55+
for d in AILLY_DIR_PATH.iterdir()
56+
if d.is_dir() and d.name.startswith("batch_")
57+
]
58+
if batch_dirs:
59+
# Run ailly on each batch directory
60+
total_start_time = time.time()
61+
62+
for batch_dir in sorted(batch_dirs):
63+
batch_start_time = time.time()
64+
65+
cmd = [
66+
"ailly",
67+
"--max-depth",
68+
"10",
69+
"--root",
70+
AILLY_DIR,
71+
str(batch_dir),
72+
]
73+
logger.info(f"Running {cmd}")
74+
run(cmd)
75+
76+
batch_end_time = time.time()
77+
batch_duration = batch_end_time - batch_start_time
78+
batch_num = batch_dir.replace("batch_", "")
79+
logger.info(f"[TIMECHECK] Batch {batch_num} took {format_duration(batch_duration)} to run")
80+
81+
total_end_time = time.time()
82+
total_duration = total_end_time - total_start_time
83+
num_batches = len(batch_dirs)
84+
logger.info(f"[TIMECHECK] {num_batches} batches took {format_duration(total_duration)} to run")
85+
else:
86+
# Fallback to running on the main directory if no batches found
87+
run(
88+
[
89+
"/Users/corepyle/Documents/repos/ailly/cli/index.js",
90+
"--root",
91+
AILLY_DIR,
92+
]
93+
)
94+
95+
logger.info("Processing generated content")
3996
process_ailly_files(
4097
input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
4198
)

aws_doc_sdk_examples_tools/agent/make_prompts.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,14 @@
22

33
import logging
44
import os
5+
import yaml
56
from pathlib import Path
67
from typing import List
7-
import yaml
8-
9-
from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet
108

11-
DEFAULT_METADATA_PREFIX = "[DEFAULT]"
9+
from aws_doc_sdk_examples_tools.doc_gen import DocGen
1210

11+
DEFAULT_METADATA_PREFIX = "DEFAULT"
1312

14-
# Setup logging
15-
logging.basicConfig(level=logging.INFO)
1613
logger = logging.getLogger(__name__)
1714

1815

@@ -26,6 +23,8 @@ def make_doc_gen(root: Path) -> DocGen:
2623
def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
2724
examples = doc_gen.examples
2825
snippets = doc_gen.snippets
26+
27+
filtered_examples = []
2928
for example_id, example in examples.items():
3029
# TCXContentAnalyzer prefixes new metadata title/title_abbrev entries with
3130
# the DEFAULT_METADATA_PREFIX. Checking this here to make sure we're only
@@ -35,30 +34,56 @@ def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
3534
if title.startswith(DEFAULT_METADATA_PREFIX) and title_abbrev.startswith(
3635
DEFAULT_METADATA_PREFIX
3736
):
38-
prompt_path = out_dir / f"{example_id}.md"
39-
snippet_key = (
40-
example.languages[language]
41-
.versions[0]
42-
.excerpts[0]
43-
.snippet_files[0]
44-
.replace("/", ".")
45-
)
46-
snippet = snippets[snippet_key]
47-
prompt_path.write_text(snippet.code, encoding="utf-8")
37+
filtered_examples.append((example_id, example))
38+
39+
batch_size = 150
40+
total_examples = len(filtered_examples)
41+
num_batches = (total_examples + batch_size - 1) // batch_size
42+
43+
logger.info(
44+
f"Splitting {total_examples} examples into {num_batches} batches of {batch_size}"
45+
)
46+
47+
for batch_num in range(num_batches):
48+
batch_dir = out_dir / f"batch_{(batch_num + 1):03}"
49+
batch_dir.mkdir(exist_ok=True)
50+
51+
start_idx = batch_num * batch_size
52+
end_idx = min((batch_num + 1) * batch_size, total_examples)
53+
54+
for i in range(start_idx, end_idx):
55+
example_id, example = filtered_examples[i]
56+
prompt_path = batch_dir / f"{example_id}.md"
57+
58+
try:
59+
snippet_key = (
60+
example.languages[language]
61+
.versions[0]
62+
.excerpts[0]
63+
.snippet_files[0]
64+
.replace("/", ".")
65+
)
66+
snippet = snippets[snippet_key]
67+
prompt_path.write_text(snippet.code, encoding="utf-8")
68+
except (KeyError, IndexError, AttributeError) as e:
69+
logger.warning(f"Error processing example {example_id}: {e}")
4870

4971

5072
def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
5173
"""Create the .aillyrc configuration file."""
5274
fence = "---"
5375
options = {
5476
"isolated": "true",
55-
"mcp": {
56-
"awslabs.aws-documentation-mcp-server": {
57-
"type": "stdio",
58-
"command": "uvx",
59-
"args": ["awslabs.aws-documentation-mcp-server@latest"],
60-
}
61-
},
77+
"overwrite": "true"
78+
# MCP assistance did not produce noticeably different results, but it was
79+
# slowing things down by 10x. Disabled for now.
80+
# "mcp": {
81+
# "awslabs.aws-documentation-mcp-server": {
82+
# "type": "stdio",
83+
# "command": "uvx",
84+
# "args": ["awslabs.aws-documentation-mcp-server@latest"],
85+
# }
86+
# },
6287
}
6388
options_block = yaml.dump(options).strip()
6489
prompts_block = "\n".join(system_prompts)

aws_doc_sdk_examples_tools/agent/process_ailly_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def process_ailly_files(
106106
input_path = Path(input_dir)
107107

108108
try:
109-
for file_path in input_path.glob(file_pattern):
109+
for file_path in input_path.rglob(file_pattern):
110110
logger.info(f"Processing file: {file_path}")
111111
parsed_data = parse_ailly_file(str(file_path))
112112
if parsed_data:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@
1919
"pathspec==0.11.2",
2020
"PyYAML==6.0.1",
2121
"yamale==4.0.4",
22+
"typer==0.16.0",
2223
],
2324
)

0 commit comments

Comments
 (0)