1717import  tempfile 
1818from  pathlib  import  Path 
1919
20- import  anyio 
21- import  dagger 
22- import  yaml 
20+ try :
21+     import  yaml 
22+ except  ImportError :
23+     print ("Error: pyyaml is required. Install with: pip install pyyaml" , file = sys .stderr )
24+     sys .exit (1 )
2325
24- PYTHON_IMAGE  =  "python:3.10" 
2526OUTPUT_DIR_PATH  =  "airbyte_cdk/test/models/connector_metadata/generated" 
2627AIRBYTE_REPO_URL  =  "https://github.com/airbytehq/airbyte.git" 
2728SCHEMA_PATH  =  "airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src" 
28- 
29- PIP_DEPENDENCIES  =  [
30-     "datamodel_code_generator==0.26.3" ,
31- ]
29+ DATAMODEL_CODEGEN_VERSION  =  "0.26.3" 
3230
3331
3432def  clone_schemas_from_github (temp_dir : Path ) ->  Path :
@@ -64,29 +62,27 @@ def clone_schemas_from_github(temp_dir: Path) -> Path:
6462    return  schemas_dir 
6563
6664
67- async   def  generate_models_single_file (
68-     dagger_client :  dagger . Client ,
69-     yaml_dir_path :  str ,
70-     output_file_path :  str ,
65+ def  generate_models_single_file (
66+     yaml_dir_path :  Path ,
67+     output_file_path :  Path ,
68+     temp_dir :  Path ,
7169) ->  None :
7270    """Generate all metadata models into a single Python file using datamodel-codegen.""" 
73-     codegen_container  =  (
74-         dagger_client .container ()
75-         .from_ (PYTHON_IMAGE )
76-         .with_exec (["mkdir" , "-p" , "/generated_temp" ], use_entrypoint = True )
77-         .with_exec (["pip" , "install" , " " .join (PIP_DEPENDENCIES )], use_entrypoint = True )
78-         .with_mounted_directory (
79-             "/yaml" , dagger_client .host ().directory (yaml_dir_path , include = ["*.yaml" ])
80-         )
81-     )
71+     generated_temp  =  temp_dir  /  "generated_temp" 
72+     generated_temp .mkdir (parents = True , exist_ok = True )
73+ 
74+     print ("Running datamodel-codegen via uvx..." , file = sys .stderr )
8275
83-     codegen_container   =   codegen_container . with_exec (
76+     subprocess . run (
8477        [
78+             "uvx" ,
79+             "--from" ,
80+             f"datamodel-code-generator=={ DATAMODEL_CODEGEN_VERSION }  ,
8581            "datamodel-codegen" ,
8682            "--input" ,
87-             "/yaml" ,
83+             str ( yaml_dir_path ) ,
8884            "--output" ,
89-             "/ generated_temp" 
85+             str ( generated_temp ) ,
9086            "--disable-timestamp" ,
9187            "--enum-field-as-literal" ,
9288            "one" ,
@@ -97,53 +93,52 @@ async def generate_models_single_file(
9793            "deprecated" ,
9894            "deprecation_message" ,
9995        ],
100-         use_entrypoint = True ,
96+         check = True ,
10197    )
10298
103-     generated_files  =  await  codegen_container .directory ("/generated_temp" ).entries ()
104- 
10599    future_imports  =  set ()
106100    stdlib_imports  =  set ()
107101    third_party_imports  =  set ()
108102    classes_and_updates  =  []
109103
110-     for  file_name  in  sorted (generated_files ):
111-         if  file_name .endswith (".py" ) and  file_name  !=  "__init__.py" :
112-             content  =  await  codegen_container .file (f"/generated_temp/{ file_name }  ).contents ()
113- 
114-             lines  =  content .split ("\n " )
115-             in_imports  =  True 
116-             in_relative_import_block  =  False 
117-             class_content  =  []
118- 
119-             for  line  in  lines :
120-                 if  in_imports :
121-                     if  line .startswith ("from __future__" ):
122-                         future_imports .add (line )
123-                     elif  (
124-                         line .startswith ("from datetime" )
125-                         or  line .startswith ("from enum" )
126-                         or  line .startswith ("from typing" )
127-                         or  line .startswith ("from uuid" )
128-                     ):
129-                         stdlib_imports .add (line )
130-                     elif  line .startswith ("from pydantic" ) or  line .startswith ("import " ):
131-                         third_party_imports .add (line )
132-                     elif  line .startswith ("from ." ):
133-                         in_relative_import_block  =  True 
134-                         if  not  line .rstrip ().endswith ("," ) and  not  line .rstrip ().endswith ("(" ):
135-                             in_relative_import_block  =  False 
136-                     elif  in_relative_import_block :
137-                         if  line .strip ().endswith (")" ):
138-                             in_relative_import_block  =  False 
139-                     elif  line .strip () and  not  line .startswith ("#" ):
140-                         in_imports  =  False 
141-                         class_content .append (line )
142-                 else :
104+     for  py_file  in  sorted (generated_temp .glob ("*.py" )):
105+         if  py_file .name  ==  "__init__.py" :
106+             continue 
107+ 
108+         content  =  py_file .read_text ()
109+         lines  =  content .split ("\n " )
110+         in_imports  =  True 
111+         in_relative_import_block  =  False 
112+         class_content  =  []
113+ 
114+         for  line  in  lines :
115+             if  in_imports :
116+                 if  line .startswith ("from __future__" ):
117+                     future_imports .add (line )
118+                 elif  (
119+                     line .startswith ("from datetime" )
120+                     or  line .startswith ("from enum" )
121+                     or  line .startswith ("from typing" )
122+                     or  line .startswith ("from uuid" )
123+                 ):
124+                     stdlib_imports .add (line )
125+                 elif  line .startswith ("from pydantic" ) or  line .startswith ("import " ):
126+                     third_party_imports .add (line )
127+                 elif  line .startswith ("from ." ):
128+                     in_relative_import_block  =  True 
129+                     if  not  line .rstrip ().endswith ("," ) and  not  line .rstrip ().endswith ("(" ):
130+                         in_relative_import_block  =  False 
131+                 elif  in_relative_import_block :
132+                     if  line .strip ().endswith (")" ):
133+                         in_relative_import_block  =  False 
134+                 elif  line .strip () and  not  line .startswith ("#" ):
135+                     in_imports  =  False 
143136                    class_content .append (line )
137+             else :
138+                 class_content .append (line )
144139
145-              if  class_content :
146-                  classes_and_updates .append ("\n " .join (class_content ))
140+         if  class_content :
141+             classes_and_updates .append ("\n " .join (class_content ))
147142
148143    import_sections  =  []
149144    if  future_imports :
@@ -177,22 +172,18 @@ async def generate_models_single_file(
177172
178173    post_processed_content  =  "\n " .join (filtered_lines )
179174
180-     codegen_container  =  codegen_container .with_new_file (
181-         "/generated/models.py" , contents = post_processed_content 
182-     )
183- 
184-     await  codegen_container .file ("/generated/models.py" ).export (output_file_path )
175+     output_file_path .write_text (post_processed_content )
176+     print (f"Generated models: { output_file_path }  , file = sys .stderr )
185177
186178
187- def  consolidate_yaml_schemas_to_json (yaml_dir_path : Path , output_json_path : str ) ->  None :
179+ def  consolidate_yaml_schemas_to_json (yaml_dir_path : Path , output_json_path : Path ) ->  None :
188180    """Consolidate all YAML schemas into a single JSON schema file.""" 
189181    schemas  =  {}
190182
191183    for  yaml_file  in  yaml_dir_path .glob ("*.yaml" ):
192184        schema_name  =  yaml_file .stem 
193-         with  yaml_file .open ("r" ) as  f :
194-             schema_content  =  yaml .safe_load (f )
195-             schemas [schema_name ] =  schema_content 
185+         schema_content  =  yaml .safe_load (yaml_file .read_text ())
186+         schemas [schema_name ] =  schema_content 
196187
197188    all_schema_names  =  set (schemas .keys ())
198189
@@ -251,41 +242,40 @@ def fix_refs(obj, in_definition=False):
251242
252243        consolidated  =  fix_refs (consolidated , in_definition = False )
253244
254-         Path ( output_json_path ) .write_text (json .dumps (consolidated , indent = 2 ))
245+         output_json_path .write_text (json .dumps (consolidated , indent = 2 ))
255246        print (f"Generated consolidated JSON schema: { output_json_path }  , file = sys .stderr )
256247    else :
257248        print (
258249            "Warning: ConnectorMetadataDefinitionV0 not found, generating simple consolidation" ,
259250            file = sys .stderr ,
260251        )
261-         Path ( output_json_path ) .write_text (json .dumps (schemas , indent = 2 ))
252+         output_json_path .write_text (json .dumps (schemas , indent = 2 ))
262253
263254
264- async  def  main ():
265-     async  with  dagger .Connection (dagger .Config (log_output = sys .stderr )) as  dagger_client :
266-         print ("Generating connector metadata models..." , file = sys .stderr )
255+ def  main ():
256+     print ("Generating connector metadata models..." , file = sys .stderr )
267257
268-          with  tempfile .TemporaryDirectory () as  temp_dir :
269-              temp_path  =  Path (temp_dir )
270-              schemas_dir  =  clone_schemas_from_github (temp_path )
258+     with  tempfile .TemporaryDirectory () as  temp_dir :
259+         temp_path  =  Path (temp_dir )
260+         schemas_dir  =  clone_schemas_from_github (temp_path )
271261
272-              output_dir  =  Path (OUTPUT_DIR_PATH )
273-              output_dir .mkdir (parents = True , exist_ok = True )
262+         output_dir  =  Path (OUTPUT_DIR_PATH )
263+         output_dir .mkdir (parents = True , exist_ok = True )
274264
275-              print ("Generating single Python file with all models..." , file = sys .stderr )
276-              output_file  =  str ( output_dir  /  "models.py" ) 
277-              await   generate_models_single_file (
278-                  dagger_client = dagger_client ,
279-                  yaml_dir_path = str ( schemas_dir ) ,
280-                  output_file_path = output_file ,
281-              )
265+         print ("Generating single Python file with all models..." , file = sys .stderr )
266+         output_file  =  output_dir  /  "models.py" 
267+         generate_models_single_file (
268+             yaml_dir_path = schemas_dir ,
269+             output_file_path = output_file ,
270+             temp_dir = temp_path ,
271+         )
282272
283-              print ("Generating consolidated JSON schema..." , file = sys .stderr )
284-              json_schema_file  =  str ( output_dir  /  "metadata_schema.json" ) 
285-              consolidate_yaml_schemas_to_json (schemas_dir , json_schema_file )
273+         print ("Generating consolidated JSON schema..." , file = sys .stderr )
274+         json_schema_file  =  output_dir  /  "metadata_schema.json" 
275+         consolidate_yaml_schemas_to_json (schemas_dir , json_schema_file )
286276
287-          print ("Connector metadata model generation complete!" , file = sys .stderr )
277+     print ("Connector metadata model generation complete!" , file = sys .stderr )
288278
289279
290280if  __name__  ==  "__main__" :
291-     anyio . run ( main )
281+     main ( )
0 commit comments