Passing arbitrary configuration

ypapadop-amd · ypapadop-amd · commit f6b8e2ad568b · 2025-04-21T12:06:47.000-04:00
diff --git a/programming_examples/basic/vector_vector_add/vector_vector_add.py b/programming_examples/basic/vector_vector_add/vector_vector_add.py
@@ -18,7 +18,7 @@
 
 
 @iron.jit(is_placed=False)
-def vector_vector_add(device, input0, input1, output):
+def vector_vector_add(config, input0, input1, output):
     if input0.shape != input1.shape:
         raise ValueError(
             f"Input shapes are not the equal ({input0.shape} != {input1.shape})."
@@ -81,7 +81,7 @@ def core_body(of_in1, of_in2, of_out):
         rt.drain(of_out.cons(), C, wait=True)
 
     # Place program components (assign them resources on the device) and generate an MLIR module
-    return Program(device, rt).resolve_program(SequentialPlacer())
+    return Program(config['device'], rt).resolve_program(SequentialPlacer())
 
 
 def main():
@@ -118,7 +118,7 @@ def main():
 
     # JIT-compile the kernel then launches the kernel with the given arguments. Future calls
     # to the kernel will use the same compiled kernel and loaded code objects
-    vector_vector_add(device_map[args.device], input0, input1, output)
+    vector_vector_add({'device': device_map[args.device]}, input0, input1, output)
 
     # Check the correctness of the result
     e = np.equal(input0.numpy() + input1.numpy(), output.numpy())
diff --git a/programming_examples/basic/vector_vector_add/vector_vector_add_placed.py b/programming_examples/basic/vector_vector_add/vector_vector_add_placed.py
@@ -17,7 +17,7 @@
 
 
 @iron.jit
-def vector_vector_add(dev, column_id, input0, input1, output):
+def vector_vector_add(config, input0, input1, output):
     if input0.shape != input1.shape:
         raise ValueError(
             f"Input shapes are not the equal ({input0.shape} != {input1.shape})."
@@ -48,16 +48,16 @@ def vector_vector_add(dev, column_id, input0, input1, output):
 
     buffer_depth = 2
 
-    @device(dev)
+    @device(config['device'])
     def device_body():
         tensor_ty = np.ndarray[(num_elements,), np.dtype[dtype]]
         tile_ty = np.ndarray[(n,), np.dtype[dtype]]
 
         # AIE Core Function declarations
 
         # Tile declarations
-        ShimTile = tile(column_id, 0)
-        ComputeTile2 = tile(column_id, 2)
+        ShimTile = tile(config['column_id'], 0)
+        ComputeTile2 = tile(config['column_id'], 2)
 
         # AIE-array data movement with object fifos
         of_in1 = object_fifo("in1", ShimTile, ComputeTile2, buffer_depth, tile_ty)
@@ -128,13 +128,22 @@ def main():
 
     # Construct two input random tensors and an output zeroed tensor
     # The three tensor are in memory accessible to the NPU
-    input0 = iron.randint(0, 100, (args.num_elements,), dtype=np.int32, device=args.device)
-    input1 = iron.randint(0, 100, (args.num_elements,), dtype=np.int32, device=args.device)
+    input0 = iron.randint(
+        0, 100, (args.num_elements,), dtype=np.int32, device=args.device
+    )
+    input1 = iron.randint(
+        0, 100, (args.num_elements,), dtype=np.int32, device=args.device
+    )
     output = iron.zeros_like(input0)
 
     # JIT-compile the kernel then launches the kernel with the given arguments. Future calls
     # to the kernel will use the same compiled kernel and loaded code objects
-    vector_vector_add(device_map[args.device], args.column, input0, input1, output)
+    vector_vector_add(
+        {"device": device_map[args.device], "column_id": args.column},
+        input0,
+        input1,
+        output,
+    )
 
     # Check the correctness of the result
     e = np.equal(input0.numpy() + input1.numpy(), output.numpy())
diff --git a/python/iron/jit.py b/python/iron/jit.py
@@ -81,7 +81,7 @@ def __init__(
         self.__insts_buffer_bo.sync(xrt.xclBOSyncDirection.XCL_BO_SYNC_BO_TO_DEVICE)
 
     # Blocking call.
-    def __call__(self, *args):
+    def __call__(self, config, *args):
         """
         Allows the kernel to be called as a function with the provided arguments.
 
@@ -101,7 +101,7 @@ def __call__(self, *args):
         h = self.__kernel(opcode, self.__insts_buffer_bo, self.__n_insts, *kernel_args)
         r = h.wait()
         if r != xrt.ert_cmd_state.ERT_CMD_STATE_COMPLETED:
-            raise Exception(f"Kernel returned {r}")
+            raise NPUKernel_Error(f"Kernel returned {r}")
 
     def __del__(self):
         """
@@ -169,7 +169,7 @@ def wrapped_function(*args, **kwargs):
                 )
 
             kernel_name = "MLIR_AIE"
-            return NPUKernel(xclbin_path, inst_path, kernel_name=kernel_name)
+            return NPUKernel(xclbin_path, inst_path, kernel_name=kernel_name)(*args, **kwargs)
 
         return wrapped_function