Skip to content

Commit 23837eb

Browse files
Replace Dagger with uvx in metadata generation script
- Removed Dagger dependency from new script - Use uvx to run datamodel-codegen directly - Simplified script significantly (no async, no container orchestration) - Script is now ~280 lines vs ~290 lines with Dagger - Addresses PR feedback to avoid Dagger in new code Co-Authored-By: AJ Steers <aj@airbyte.io>
1 parent 66d4eeb commit 23837eb

File tree

1 file changed

+83
-93
lines changed

1 file changed

+83
-93
lines changed

bin/generate_connector_metadata_files.py

Lines changed: 83 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,16 @@
1717
import tempfile
1818
from pathlib import Path
1919

20-
import anyio
21-
import dagger
22-
import yaml
20+
try:
21+
import yaml
22+
except ImportError:
23+
print("Error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr)
24+
sys.exit(1)
2325

24-
PYTHON_IMAGE = "python:3.10"
2526
OUTPUT_DIR_PATH = "airbyte_cdk/test/models/connector_metadata/generated"
2627
AIRBYTE_REPO_URL = "https://github.com/airbytehq/airbyte.git"
2728
SCHEMA_PATH = "airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src"
28-
29-
PIP_DEPENDENCIES = [
30-
"datamodel_code_generator==0.26.3",
31-
]
29+
DATAMODEL_CODEGEN_VERSION = "0.26.3"
3230

3331

3432
def clone_schemas_from_github(temp_dir: Path) -> Path:
@@ -64,29 +62,27 @@ def clone_schemas_from_github(temp_dir: Path) -> Path:
6462
return schemas_dir
6563

6664

67-
async def generate_models_single_file(
68-
dagger_client: dagger.Client,
69-
yaml_dir_path: str,
70-
output_file_path: str,
65+
def generate_models_single_file(
66+
yaml_dir_path: Path,
67+
output_file_path: Path,
68+
temp_dir: Path,
7169
) -> None:
7270
"""Generate all metadata models into a single Python file using datamodel-codegen."""
73-
codegen_container = (
74-
dagger_client.container()
75-
.from_(PYTHON_IMAGE)
76-
.with_exec(["mkdir", "-p", "/generated_temp"], use_entrypoint=True)
77-
.with_exec(["pip", "install", " ".join(PIP_DEPENDENCIES)], use_entrypoint=True)
78-
.with_mounted_directory(
79-
"/yaml", dagger_client.host().directory(yaml_dir_path, include=["*.yaml"])
80-
)
81-
)
71+
generated_temp = temp_dir / "generated_temp"
72+
generated_temp.mkdir(parents=True, exist_ok=True)
73+
74+
print("Running datamodel-codegen via uvx...", file=sys.stderr)
8275

83-
codegen_container = codegen_container.with_exec(
76+
subprocess.run(
8477
[
78+
"uvx",
79+
"--from",
80+
f"datamodel-code-generator=={DATAMODEL_CODEGEN_VERSION}",
8581
"datamodel-codegen",
8682
"--input",
87-
"/yaml",
83+
str(yaml_dir_path),
8884
"--output",
89-
"/generated_temp",
85+
str(generated_temp),
9086
"--disable-timestamp",
9187
"--enum-field-as-literal",
9288
"one",
@@ -97,53 +93,52 @@ async def generate_models_single_file(
9793
"deprecated",
9894
"deprecation_message",
9995
],
100-
use_entrypoint=True,
96+
check=True,
10197
)
10298

103-
generated_files = await codegen_container.directory("/generated_temp").entries()
104-
10599
future_imports = set()
106100
stdlib_imports = set()
107101
third_party_imports = set()
108102
classes_and_updates = []
109103

110-
for file_name in sorted(generated_files):
111-
if file_name.endswith(".py") and file_name != "__init__.py":
112-
content = await codegen_container.file(f"/generated_temp/{file_name}").contents()
113-
114-
lines = content.split("\n")
115-
in_imports = True
116-
in_relative_import_block = False
117-
class_content = []
118-
119-
for line in lines:
120-
if in_imports:
121-
if line.startswith("from __future__"):
122-
future_imports.add(line)
123-
elif (
124-
line.startswith("from datetime")
125-
or line.startswith("from enum")
126-
or line.startswith("from typing")
127-
or line.startswith("from uuid")
128-
):
129-
stdlib_imports.add(line)
130-
elif line.startswith("from pydantic") or line.startswith("import "):
131-
third_party_imports.add(line)
132-
elif line.startswith("from ."):
133-
in_relative_import_block = True
134-
if not line.rstrip().endswith(",") and not line.rstrip().endswith("("):
135-
in_relative_import_block = False
136-
elif in_relative_import_block:
137-
if line.strip().endswith(")"):
138-
in_relative_import_block = False
139-
elif line.strip() and not line.startswith("#"):
140-
in_imports = False
141-
class_content.append(line)
142-
else:
104+
for py_file in sorted(generated_temp.glob("*.py")):
105+
if py_file.name == "__init__.py":
106+
continue
107+
108+
content = py_file.read_text()
109+
lines = content.split("\n")
110+
in_imports = True
111+
in_relative_import_block = False
112+
class_content = []
113+
114+
for line in lines:
115+
if in_imports:
116+
if line.startswith("from __future__"):
117+
future_imports.add(line)
118+
elif (
119+
line.startswith("from datetime")
120+
or line.startswith("from enum")
121+
or line.startswith("from typing")
122+
or line.startswith("from uuid")
123+
):
124+
stdlib_imports.add(line)
125+
elif line.startswith("from pydantic") or line.startswith("import "):
126+
third_party_imports.add(line)
127+
elif line.startswith("from ."):
128+
in_relative_import_block = True
129+
if not line.rstrip().endswith(",") and not line.rstrip().endswith("("):
130+
in_relative_import_block = False
131+
elif in_relative_import_block:
132+
if line.strip().endswith(")"):
133+
in_relative_import_block = False
134+
elif line.strip() and not line.startswith("#"):
135+
in_imports = False
143136
class_content.append(line)
137+
else:
138+
class_content.append(line)
144139

145-
if class_content:
146-
classes_and_updates.append("\n".join(class_content))
140+
if class_content:
141+
classes_and_updates.append("\n".join(class_content))
147142

148143
import_sections = []
149144
if future_imports:
@@ -177,22 +172,18 @@ async def generate_models_single_file(
177172

178173
post_processed_content = "\n".join(filtered_lines)
179174

180-
codegen_container = codegen_container.with_new_file(
181-
"/generated/models.py", contents=post_processed_content
182-
)
183-
184-
await codegen_container.file("/generated/models.py").export(output_file_path)
175+
output_file_path.write_text(post_processed_content)
176+
print(f"Generated models: {output_file_path}", file=sys.stderr)
185177

186178

187-
def consolidate_yaml_schemas_to_json(yaml_dir_path: Path, output_json_path: str) -> None:
179+
def consolidate_yaml_schemas_to_json(yaml_dir_path: Path, output_json_path: Path) -> None:
188180
"""Consolidate all YAML schemas into a single JSON schema file."""
189181
schemas = {}
190182

191183
for yaml_file in yaml_dir_path.glob("*.yaml"):
192184
schema_name = yaml_file.stem
193-
with yaml_file.open("r") as f:
194-
schema_content = yaml.safe_load(f)
195-
schemas[schema_name] = schema_content
185+
schema_content = yaml.safe_load(yaml_file.read_text())
186+
schemas[schema_name] = schema_content
196187

197188
all_schema_names = set(schemas.keys())
198189

@@ -251,41 +242,40 @@ def fix_refs(obj, in_definition=False):
251242

252243
consolidated = fix_refs(consolidated, in_definition=False)
253244

254-
Path(output_json_path).write_text(json.dumps(consolidated, indent=2))
245+
output_json_path.write_text(json.dumps(consolidated, indent=2))
255246
print(f"Generated consolidated JSON schema: {output_json_path}", file=sys.stderr)
256247
else:
257248
print(
258249
"Warning: ConnectorMetadataDefinitionV0 not found, generating simple consolidation",
259250
file=sys.stderr,
260251
)
261-
Path(output_json_path).write_text(json.dumps(schemas, indent=2))
252+
output_json_path.write_text(json.dumps(schemas, indent=2))
262253

263254

264-
async def main():
265-
async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client:
266-
print("Generating connector metadata models...", file=sys.stderr)
255+
def main():
256+
print("Generating connector metadata models...", file=sys.stderr)
267257

268-
with tempfile.TemporaryDirectory() as temp_dir:
269-
temp_path = Path(temp_dir)
270-
schemas_dir = clone_schemas_from_github(temp_path)
258+
with tempfile.TemporaryDirectory() as temp_dir:
259+
temp_path = Path(temp_dir)
260+
schemas_dir = clone_schemas_from_github(temp_path)
271261

272-
output_dir = Path(OUTPUT_DIR_PATH)
273-
output_dir.mkdir(parents=True, exist_ok=True)
262+
output_dir = Path(OUTPUT_DIR_PATH)
263+
output_dir.mkdir(parents=True, exist_ok=True)
274264

275-
print("Generating single Python file with all models...", file=sys.stderr)
276-
output_file = str(output_dir / "models.py")
277-
await generate_models_single_file(
278-
dagger_client=dagger_client,
279-
yaml_dir_path=str(schemas_dir),
280-
output_file_path=output_file,
281-
)
265+
print("Generating single Python file with all models...", file=sys.stderr)
266+
output_file = output_dir / "models.py"
267+
generate_models_single_file(
268+
yaml_dir_path=schemas_dir,
269+
output_file_path=output_file,
270+
temp_dir=temp_path,
271+
)
282272

283-
print("Generating consolidated JSON schema...", file=sys.stderr)
284-
json_schema_file = str(output_dir / "metadata_schema.json")
285-
consolidate_yaml_schemas_to_json(schemas_dir, json_schema_file)
273+
print("Generating consolidated JSON schema...", file=sys.stderr)
274+
json_schema_file = output_dir / "metadata_schema.json"
275+
consolidate_yaml_schemas_to_json(schemas_dir, json_schema_file)
286276

287-
print("Connector metadata model generation complete!", file=sys.stderr)
277+
print("Connector metadata model generation complete!", file=sys.stderr)
288278

289279

290280
if __name__ == "__main__":
291-
anyio.run(main)
281+
main()

0 commit comments

Comments
 (0)