Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,21 @@ This file contains the changelog for the Deeploy project. The changelog is divid


### List of Pull Requests
- Improve Docstring and Debugging [#160](https://github.com/pulp-platform/Deeploy/pull/160)
- Use Pre-Commit in CI [#159](https://github.com/pulp-platform/Deeploy/pull/159)
- Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143)
- Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157)

### Added
- Add many missing docstrings
- Add `__repr__()` function for `_ReferenceBuffer` class
- Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows

### Changed
- Move `MemoryAwareClosureGeneration` pass to `MemoryLevelExtension`
- Move `MemoryAwarePrint*` passes to `MemoryLevelExtension`
- Make `sizeInBytes` a class property instead of a function
- Move `AnnotateNeurekaWeightMemoryLevel` to `Neureka` specific folder
- Switch CI to use pre-commit for linting
- Update `pulp-nnx` and `pulp-nn-mixed` submodules to their latest versions
- PULP-NN moved to TargetLibraries third-party folder
Expand Down
235 changes: 188 additions & 47 deletions Deeploy/CommonExtensions/CodeTransformationPasses/Closure.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,76 @@


class ClosureExecutionBlock(ExecutionBlock):
"""
Execution block wrapper for closure-based code generation.

This class extends ExecutionBlock to support closure-based code generation
patterns, where functions are wrapped in closures with argument structures.
It maintains a reference to the base execution block that contains the
actual code to be wrapped.

Notes
-----
This class is used in the closure generation process to maintain the
relationship between the closure wrapper and the original execution block.
"""

def __init__(self, nodeTemplate = None, closureBlock: Optional[ExecutionBlock] = None):
"""
Initialize a ClosureExecutionBlock.

Parameters
----------
nodeTemplate : NodeTemplate, optional
The node template for this execution block. Default is None.
closureBlock : ExecutionBlock, optional
The execution block to be wrapped in a closure. Default is None.
"""
super().__init__(nodeTemplate)
self.closureBlock = closureBlock

@property
def baseBlock(self):
"""
Get the base execution block, unwrapping nested closures.

Recursively unwraps ClosureExecutionBlock instances to find the
underlying base execution block that contains the actual code.

Returns
-------
ExecutionBlock
The base execution block without closure wrappers.

Notes
-----
This property handles nested closures by recursively calling
baseBlock until a non-ClosureExecutionBlock is found.
"""
if isinstance(self.closureBlock, ClosureExecutionBlock):
return self.closureBlock.baseBlock
return self.closureBlock


class ClosureGeneration(CodeTransformationPass, IntrospectiveCodeTransformationMixIn):
"""
Code transformation pass for generating function closures.

This class transforms execution blocks into closure-based code patterns
where functions are wrapped with argument structures. It generates the
necessary struct definitions, closure functions, and call sites to
enable closure-based execution patterns in generated code.


Notes
-----
The closure generation process involves:
1. Analyzing the execution block to identify dynamic references
2. Creating a struct type to hold closure arguments
3. Generating the closure function definition
4. Replacing the original call with a closure call
5. Optionally generating argument writeback code
"""

closureStructArgType: Dict[str, Type[Union[Pointer, Immediate, Struct]]]
closureStructArgs: Dict[str, Union[Pointer, Immediate, Struct]]
Expand All @@ -75,6 +132,22 @@ def __init__(self,
closureSuffix = "_closure",
writeback: bool = True,
generateStruct: bool = True):
"""
Initialize the ClosureGeneration transformation pass.

Parameters
----------
closureCallTemplate : NodeTemplate, optional
Template for generating closure function calls. Default is the
global _closureCallTemplate.
closureSuffix : str, optional
Suffix to append to closure function names. Default is "_closure".
writeback : bool, optional
Whether to generate writeback code for closure arguments.
Default is True.
generateStruct : bool, optional
Whether to generate argument structure definitions. Default is True.
"""
super().__init__()
self.closureSuffix = closureSuffix
self.closureTemplate = _closureTemplate
Expand All @@ -86,6 +159,31 @@ def __init__(self,

# Don't override this
def _generateClosureStruct(self, ctxt: NetworkContext, executionBlock: ExecutionBlock):
"""
Generate the closure argument structure.

Analyzes the execution block to identify dynamic references and creates
a struct type to hold all closure arguments. This struct will be used
to pass arguments to the closure function.

Parameters
----------
ctxt : NetworkContext
The network context containing buffer information.
executionBlock : ExecutionBlock
The execution block to analyze for dynamic references.

Notes
-----
This method populates the following instance attributes:
- closureStructArgType: The struct class type for closure arguments
- closureStructArgs: The struct instance with argument mappings

The method handles different buffer types:
- TransientBuffer: Mapped to void pointers
- StructBuffer: Excluded from closure arguments
- Other buffers: Use their native types
"""

# Add closure struct info to operatorRepresentation
closureStructArgsType: Dict[str, Type[Union[Pointer, Immediate, Struct]]] = {}
Expand All @@ -108,6 +206,31 @@ def _generateClosureStruct(self, ctxt: NetworkContext, executionBlock: Execution

# Don't override this
def _generateClosureCtxt(self, ctxt: NetworkContext, nodeName: str) -> NetworkContext:
"""
Generate closure context and global definitions.

Creates the closure function definition and struct type definition,
then hoists them to the global scope. This includes generating
the actual closure function code and the argument struct typedef.

Parameters
----------
ctxt : NetworkContext
The network context to modify with global definitions.
nodeName : str
The name of the node for tracking dependencies.

Returns
-------
NetworkContext
The modified network context with closure definitions added.

Notes
-----
This method generates and hoists the following global definitions:
- Closure argument struct typedef
- Closure function definition with argument casting and optional writeback
"""

ret = ctxt.hoistStruct(self.closureStructArgs, self.closureName + "_args", self.closureStructArgType)
ctxt.lookup(ret)._users.append(nodeName)
Expand All @@ -133,6 +256,36 @@ def _generateClosureCtxt(self, ctxt: NetworkContext, nodeName: str) -> NetworkCo
# Don't override this
def _generateClosureCall(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
nodeName: str) -> Tuple[NetworkContext, ExecutionBlock]:
"""
Generate the closure call and replace the original execution block.

Creates a new ClosureExecutionBlock that wraps the original execution
with closure call code. This includes the closure function call and
optional argument writeback code.

Parameters
----------
ctxt : NetworkContext
The network context for code generation.
executionBlock : ExecutionBlock
The original execution block to wrap with closure calls.
nodeName : str
The name of the node for struct generation.

Returns
-------
Tuple[NetworkContext, ExecutionBlock]
A tuple containing:
- The modified network context
- The new ClosureExecutionBlock with closure calls

Notes
-----
This method replaces the original function call with:
1. A closure function call (added to the left)
2. Optional argument writeback code (added to the right if enabled)
3. Optional argument struct generation
"""

allArgs = {
"closureName": self.closureName,
Expand All @@ -158,57 +311,45 @@ def apply(self,
executionBlock: ExecutionBlock,
name: str,
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
"""
Apply the closure generation transformation.

Transforms the given execution block into a closure-based pattern
by generating the necessary struct, closure function, and call site.
This is the main entry point for the closure transformation.

Parameters
----------
ctxt : NetworkContext
The network context containing buffer and type information.
executionBlock : ExecutionBlock
The execution block to transform into a closure pattern.
name : str
The base name for generating closure-related identifiers.
verbose : CodeGenVerbosity, optional
The verbosity level for code generation. Default is _NoVerbosity.

Returns
-------
Tuple[NetworkContext, ExecutionBlock]
A tuple containing:
- The modified network context with closure definitions
- The new ClosureExecutionBlock with closure call patterns

Notes
-----
The transformation process includes:
1. Generating a unique closure name with the specified suffix
2. Capturing the original function call code
3. Creating the closure argument struct
4. Generating the closure function definition in global scope
5. Replacing the original call with a closure call pattern
"""

# Prepend underscore to avoid name issues when beginning with problematic characters (like numbers)
self.closureName = "_" + name + self.closureSuffix
self.functionCall = executionBlock.generate(ctxt)
self._generateClosureStruct(ctxt, executionBlock)
ctxt = self._generateClosureCtxt(ctxt, name)
ctxt, executionBlock = self._generateClosureCall(ctxt, executionBlock, name)
return ctxt, executionBlock


class MemoryAwareClosureGeneration(ClosureGeneration):

def __init__(self,
closureCallTemplate: NodeTemplate = _closureCallTemplate,
closureSuffix = "_closure",
writeback: bool = True,
generateStruct: bool = True,
startRegion: str = "L2",
endRegion: str = "L1"):
super().__init__(closureCallTemplate, closureSuffix, writeback, generateStruct)
self.startRegion = startRegion
self.endRegion = endRegion

# Don't override this
def _generateClosureStruct(self, ctxt: NetworkContext, executionBlock: ExecutionBlock):

# Add closure struct info to operatorRepresentation
closureStructArgsType = {}
closureStruct = {}
makoDynamicReferences = self.extractDynamicReferences(ctxt, executionBlock, unrollStructs = True)

filteredMakoDynamicReferences = []

for ref in makoDynamicReferences:
buf = ctxt.lookup(ref)
if not hasattr(buf, "_memoryLevel") or buf._memoryLevel is None:
filteredMakoDynamicReferences.append(ref)
continue

if buf._memoryLevel == self.startRegion or buf._memoryLevel != self.endRegion:
filteredMakoDynamicReferences.append(ref)

for arg in list(dict.fromkeys(filteredMakoDynamicReferences)):
ref = ctxt.lookup(arg)
if isinstance(ref, TransientBuffer):
closureStructArgsType[ctxt._mangle(arg)] = PointerClass(VoidType)
elif not isinstance(ref, StructBuffer):
closureStructArgsType[ctxt._mangle(arg)] = ref._type

if not isinstance(ref, StructBuffer):
closureStruct[ctxt._mangle(arg)] = arg

structClass = StructClass(self.closureName + "_args_t", closureStructArgsType)
self.closureStructArgType = structClass
self.closureStructArgs = self.closureStructArgType(closureStruct, ctxt)
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,67 @@


class ProfilingCodeGeneration(CodeTransformationPass):
"""
Code transformation pass for inserting cycle measurement profiling code.

This class extends CodeTransformationPass to automatically insert profiling
code around execution blocks. It adds cycle counting instrumentation before
and after the target code, enabling performance measurement and analysis
of individual operations during runtime.

The generated profiling code uses a `getCycles()` function to measure
execution time and prints the results to stdout. This is useful for
performance analysis, optimization, and debugging of neural network
operations.

Notes
-----
This transformation requires that the target platform provides a
`getCycles()` function that returns the current cycle count as a uint32_t.
The transformation also assumes printf functionality is available for
output formatting.

The profiling code is non-intrusive and can be easily enabled or disabled
by including or excluding this transformation pass from the compilation
pipeline.
"""

def apply(self,
ctxt: NetworkContext,
executionBlock: ExecutionBlock,
name: str,
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
"""
Apply cycle measurement profiling to an execution block.

Wraps the given execution block with cycle counting code that measures
and reports the execution time. The profiling code is added before
(left) and after (right) the original execution block.

Parameters
----------
ctxt : NetworkContext
The network context for code generation. This parameter is passed
through unchanged as cycle measurement doesn't modify the context.
executionBlock : ExecutionBlock
The execution block to instrument with cycle measurement code.
The original block remains unchanged, with profiling code added
around it.
name : str
The name of the operation being profiled. This name is used to
generate unique variable names and is included in the output
message for identification.
verbose : CodeGenVerbosity, optional
The verbosity level for code generation. Default is _NoVerbosity.
This parameter is not used by the cycle measurement transformation.

Returns
-------
Tuple[NetworkContext, ExecutionBlock]
A tuple containing:
- The unchanged network context
- The modified execution block with profiling code added
"""
executionBlock.addLeft(NodeTemplate("""
uint32_t ${op}_cycles = getCycles();
"""), {"op": name})
Expand Down
Loading
Loading