Skip to content

Batch Ailly calls from Lliam #175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ __pycache__
build/
dist/
.ailly_iam_policy
*.log
52 changes: 51 additions & 1 deletion aws_doc_sdk_examples_tools/agent/bin/main.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
from pathlib import Path
from subprocess import run
from typing import List
import time
from datetime import timedelta, datetime

import logging
import typer

from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many

logging.basicConfig(
level=logging.INFO, filename=f"lliam-run-{datetime.now()}.log", filemode="w"
)
logger = logging.getLogger(__name__)

app = typer.Typer()

AILLY_DIR = ".ailly_iam_policy"
AILLY_DIR_PATH = Path(AILLY_DIR)
IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"


def format_duration(seconds: float) -> str:
td = timedelta(seconds=seconds)
return str(td).zfill(8)


@app.command()
def update(
iam_tributary_root: str,
Expand All @@ -34,8 +47,45 @@ def update(
out_dir=AILLY_DIR_PATH,
language="IAMPolicyGrammar",
)
run(["npx @ailly/cli@1.7.0-rc1", "--root", AILLY_DIR])

batch_dirs = [
d.name
for d in AILLY_DIR_PATH.iterdir()
if d.is_dir() and d.name.startswith("batch_")
]

if batch_dirs:
total_start_time = time.time()

for batch_dir in sorted(batch_dirs):
batch_start_time = time.time()

cmd = [
"ailly",
"--max-depth",
"10",
"--root",
AILLY_DIR,
str(batch_dir),
]
logger.info(f"Running {cmd}")
run(cmd)

batch_end_time = time.time()
batch_duration = batch_end_time - batch_start_time
batch_num = batch_dir.replace("batch_", "")
logger.info(
f"[TIMECHECK] Batch {batch_num} took {format_duration(batch_duration)} to run"
)

total_end_time = time.time()
total_duration = total_end_time - total_start_time
num_batches = len(batch_dirs)
logger.info(
f"[TIMECHECK] {num_batches} batches took {format_duration(total_duration)} to run"
)

logger.info("Processing generated content")
process_ailly_files(
input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
)
Expand Down
71 changes: 48 additions & 23 deletions aws_doc_sdk_examples_tools/agent/make_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

import logging
import os
import yaml
from pathlib import Path
from typing import List
import yaml

from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet

DEFAULT_METADATA_PREFIX = "[DEFAULT]"
from aws_doc_sdk_examples_tools.doc_gen import DocGen

DEFAULT_METADATA_PREFIX = "DEFAULT"

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand All @@ -26,6 +23,8 @@ def make_doc_gen(root: Path) -> DocGen:
def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
examples = doc_gen.examples
snippets = doc_gen.snippets

filtered_examples = []
for example_id, example in examples.items():
# TCXContentAnalyzer prefixes new metadata title/title_abbrev entries with
# the DEFAULT_METADATA_PREFIX. Checking this here to make sure we're only
Expand All @@ -35,30 +34,56 @@ def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
if title.startswith(DEFAULT_METADATA_PREFIX) and title_abbrev.startswith(
DEFAULT_METADATA_PREFIX
):
prompt_path = out_dir / f"{example_id}.md"
snippet_key = (
example.languages[language]
.versions[0]
.excerpts[0]
.snippet_files[0]
.replace("/", ".")
)
snippet = snippets[snippet_key]
prompt_path.write_text(snippet.code, encoding="utf-8")
filtered_examples.append((example_id, example))

batch_size = 150
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CONSTANT up top

total_examples = len(filtered_examples)
num_batches = (total_examples + batch_size - 1) // batch_size

logger.info(
f"Splitting {total_examples} examples into {num_batches} batches of {batch_size}"
)

for batch_num in range(num_batches):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://docs.python.org/3/library/itertools.html#itertools.batched

for batch, batch_num in enumerate(batched(filtered_examples)):
  batch_dir...
  for example_id, example in batch:
    prompt_path...

Copy link
Contributor Author

@cpyle0819 cpyle0819 Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not added til 3.12, but I'll add the recipe from earlier versions.

batch_dir = out_dir / f"batch_{(batch_num + 1):03}"
batch_dir.mkdir(exist_ok=True)

start_idx = batch_num * batch_size
end_idx = min((batch_num + 1) * batch_size, total_examples)

for i in range(start_idx, end_idx):
example_id, example = filtered_examples[i]
prompt_path = batch_dir / f"{example_id}.md"

try:
snippet_key = (
example.languages[language]
.versions[0]
.excerpts[0]
.snippet_files[0]
.replace("/", ".")
)
snippet = snippets[snippet_key]
prompt_path.write_text(snippet.code, encoding="utf-8")
except (KeyError, IndexError, AttributeError) as e:
logger.warning(f"Error processing example {example_id}: {e}")


def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
"""Create the .aillyrc configuration file."""
fence = "---"
options = {
"isolated": "true",
"mcp": {
"awslabs.aws-documentation-mcp-server": {
"type": "stdio",
"command": "uvx",
"args": ["awslabs.aws-documentation-mcp-server@latest"],
}
},
"overwrite": "true",
# MCP assistance did not produce noticeably different results, but it was
# slowing things down by 10x. Disabled for now.
# "mcp": {
# "awslabs.aws-documentation-mcp-server": {
# "type": "stdio",
# "command": "uvx",
# "args": ["awslabs.aws-documentation-mcp-server@latest"],
# }
# },
}
options_block = yaml.dump(options).strip()
prompts_block = "\n".join(system_prompts)
Expand Down
3 changes: 1 addition & 2 deletions aws_doc_sdk_examples_tools/agent/process_ailly_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def parse_ailly_file(
if key in result:
result[key] = f"{prefix}{result[key]}"

result["title_abbrev"] = result["title"]
result["id"] = Path(file_path).name.split(".md.ailly.md")[0]
result["_source_file"] = file_path

Expand All @@ -106,7 +105,7 @@ def process_ailly_files(
input_path = Path(input_dir)

try:
for file_path in input_path.glob(file_pattern):
for file_path in input_path.rglob(file_pattern):
logger.info(f"Processing file: {file_path}")
parsed_data = parse_ailly_file(str(file_path))
if parsed_data:
Expand Down
6 changes: 5 additions & 1 deletion aws_doc_sdk_examples_tools/agent/update_doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

IAM_LANGUAGE = "IAMPolicyGrammar"


def examples_from_updates(updates_path: Path) -> Iterable[Example]:
"""
Expand Down Expand Up @@ -44,7 +46,9 @@ def update_examples(doc_gen: DocGen, examples: Iterable[Example]) -> None:
for example in examples:
if doc_gen_example := doc_gen.examples.get(example.id):
doc_gen_example.title = example.title
doc_gen_example.title_abbrev = example.title_abbrev
# This reduces the number of duplicate title_abbrev that occur due to similar policies
source = doc_gen_example.languages[IAM_LANGUAGE].versions[0].source.title
doc_gen_example.title_abbrev = f"{example.title_abbrev} (from {source})"
doc_gen_example.synopsis = example.synopsis
else:
logger.warning(f"Could not find example with id: {example.id}")
Expand Down
41 changes: 41 additions & 0 deletions aws_doc_sdk_examples_tools/agent/update_doc_gen_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest
from pathlib import Path

from aws_doc_sdk_examples_tools.doc_gen import DocGen
from aws_doc_sdk_examples_tools.metadata import Example
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_examples


@pytest.fixture
def doc_gen_tributary():
"""
Fixture that returns a DocGen instance using the doc_gen_tributary_test as root.
"""
tributary_root = (
Path(__file__).parent.parent / "test_resources" / "doc_gen_tributary_test"
)
doc_gen = DocGen.from_root(tributary_root)
doc_gen.collect_snippets()
return doc_gen


def smoke_test_doc_gen(doc_gen_tributary: DocGen):
assert isinstance(doc_gen_tributary, DocGen)


def test_update_examples_title_abbrev(doc_gen_tributary: DocGen):
"""Test that title_abbrev is updated correctly with service_main suffix."""
# Create an example with a title_abbrev to update
update_example = Example(
id="iam_policies_example",
file=None,
languages={},
title_abbrev="Updated Title Abbrev",
)

# Update the examples
update_examples(doc_gen_tributary, [update_example])

# Verify title_abbrev was updated with the service_main suffix
updated_example = doc_gen_tributary.examples["iam_policies_example"]
assert updated_example.title_abbrev == "Updated Title Abbrev (from AWS Account Management)"
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,16 @@ IAMPolicy:
name: "&SAZR;"
link_template: "SomeTemplate"
guide: "&guide-iam-user;"
IAMPolicyGrammar:
property: json
syntax: json
sdk:
1:
long: "IAM policy"
short: "IAM policy"
guide: "IAM/latest/UserGuide/introduction.html"
api_ref:
uid: "IAMPolicy"
name: "&SAZR;"
link_template: "SomeTemplate"
guide: "&guide-iam-user;"
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
iam_policies_example:
category: IAMPolicy
languages:
IAMPolicyGrammar:
versions:
- authors:
- alias: amazonian@amazon.com
name: Sir Peccy
excerpts:
- description: test
owner: AWS/Documentation/Accounts Management Docs
sdk_version: 1
source:
title: AWS Account Management
url: https://code.amazon.com/packages/AccountControlApiDoc
services:
iam: {}
synopsis: This identity-based policy allows the attached identity to retrieve the
billing alternate contact information for a specific account within an organization.
title: Allow retrieval of a specific alternate contact type for an account
title_abbrev: Allow retrieval of a specific alternate contact type for an account
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@
"pathspec==0.11.2",
"PyYAML==6.0.1",
"yamale==4.0.4",
"typer==0.16.0",
],
)
Loading