Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jobs:
testFloatRelu
testFloatMaxPool
testFloatMatmul
testFloatReshapeWithSkipConnection
testFloatSoftmax
testFloatTranspose
testFloatMul
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
This release containing major architectural changes, new platform support, enhanced simulation workflows, floating-point kernel support, training infrastructure for CCT models, memory allocation strategies, and documentation improvements.

### List of Pull Requests
- Reshape Memory Freeing and Generic Float GEMM Fixes [#91](https://github.com/pulp-platform/Deeploy/pull/91)
- Prepare for Release and Separate Dependencies [#90](https://github.com/pulp-platform/Deeploy/pull/90)
- Move PULP SDK to main branch/fork [#88](https://github.com/pulp-platform/Deeploy/pull/88)
- Finite Lifetime for IO Tensors [#51](https://github.com/pulp-platform/Deeploy/pull/51)
Expand Down Expand Up @@ -63,6 +64,9 @@ This release containing major architectural changes, new platform support, enhan


### Added
- New alias list parameter for buffer objects
- New test, also included in the CI pipeline, for the reshape and skip connection situation
- 'shape' parameter handling similar to the 'indices' parameter in the generic reshape template
- Test the correcteness of the memory map generated by the tiler
- Add attribute to `VariableBuffer` to distinguish I/Os
- Add proper static memory allocation with finite lifetime for I/Os
Expand Down Expand Up @@ -229,6 +233,8 @@ This release containing major architectural changes, new platform support, enhan
- Packages listed in `dev-requirements.txt` are installed in the final stage of the Deeploy container.

### Fixed
- Buffer deallocation to only happen when all its aliases are not live anymore (the data stored there is not needed anymore, not even by other nodes)
- GEMM Generic float template to iterate through terms only when they actually contain multiple matrices
- Fix the PULP Deployer where outputs were unecessary loaded in L3
- Fix the lifetime computation of aliased buffers
- Removed unsupported `-MMD` compiler flag in LLVM-based toolchains.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,32 @@ def apply(self,
# We have to allocate the output buffers, unless they are global

for buffer in list(reversed(outputNames)) + transientBuffers:
# Extract buffer info from context
nb = ctxt.lookup(buffer)

# Check that it was not already allocated
assert ctxt.localObjects[nb.name]._live == False, f"Tried to allocate already live buffer {nb.name}"

# Mark it as live
ctxt.localObjects[nb.name]._live = True

# Add the allocation code to the execution block
executionBlock.addLeft(nb.allocTemplate, nb._bufferRepresentation())

for buffer in inputNames + transientBuffers:
# Extract buffer info from context
nb = ctxt.lookup(buffer)

# Check that it was not already deallocated
assert ctxt.localObjects[nb.name]._live == True, f"Tried to deallocate already dead buffer {nb.name}"

# Mark it as dead (not useful anymore)
ctxt.localObjects[nb.name]._live = False
executionBlock.addRight(nb.deallocTemplate, nb._bufferRepresentation())

# Check for live ancestors (buffers that this is an alias of, that are still live),
# and add the deallocation code to the execution block if none found
if not nb.has_live_ancestors(ctxt = ctxt):
executionBlock.addRight(nb.deallocTemplate, nb._bufferRepresentation())

return ctxt, executionBlock

Expand Down
61 changes: 60 additions & 1 deletion Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class VariableBuffer():
allocTemplate: NodeTemplate #: NodeTemplate: Holds the buffer's allocation code
deallocTemplate: NodeTemplate #: NodeTemplate: Holds the buffer's deallocation code

def __init__(self, name: str = '', shape = [1]):
def __init__(self, name: str = '', shape = [1], alias_of: Optional[List[str]] = []):
self.name: str = name #: str: Canonical name that this buffer is registered as in the NetworkContext
self.shape: Sequence[
int] = shape #: Sequence[int]: Represents the dimensions of the underlying tensor as a sequence of dimension sizes
Expand All @@ -274,6 +274,8 @@ def __init__(self, name: str = '', shape = [1]):
self.is_input: bool = False
self.is_output: bool = False

self.alias_of: List[str] = alias_of if alias_of is not None else []

def _bufferRepresentation(self) -> Dict:
return {"type": self._instance, "name": self.name, "size": int(np.prod(self.shape))}

Expand Down Expand Up @@ -339,6 +341,61 @@ def __getstate__(self):
def fromNode(cls, node: gs.Node):
return (cls(name = node.name, shape = node.shape if not isinstance(node, gs.Constant) else node.values.shape))

def add_aliases(self, aliases_to_add: List[str]):
"""
Adds list of aliases to the alias_of attribute.
Parameters
----------
alias_to_add : List[str]
List of names of aliases to add to the alias_of attribute.
Returns
-------
None
"""

if not hasattr(self, "alias_of"):
return None

for alias in aliases_to_add:
if alias not in self.alias_of:
self.alias_of.append(alias)

return None

def get_aliases_of(self):
"""
Getter function for the alias_of attribute.
Returns
-------
List[str]
List of names o all aliases of this VariableBuffer.
"""

if hasattr(self, "alias_of"):
return self.alias_of
else:
return list()

def has_live_ancestors(self, ctxt: NetworkContext) -> bool:
"""Checks whether this VariableBuffer has any live ancestors, i.e. buffers that are still live and are aliased by this buffer.
Parameters
----------
ctxt : NetworkContext
Current NetworkContext
Returns
-------
bool
True if this VariableBuffer has any live ancestors, False otherwise
"""
if not hasattr(self, "alias_of"):
return False

for alias in self.alias_of:
if ctxt.lookup(alias)._live:
return True

return False


class TransientBuffer(VariableBuffer):
"""Class to represent memory space required by kernels that is not covered by input and output tensors, e.g. im2col buffers in convolutions
Expand All @@ -365,6 +422,8 @@ def __init__(self, name: str = '', size = 0):
self.is_input: bool = False
self.is_output: bool = False

self.alias_of: List[str] = []

def __eq__(self, other):

ret = all([self.name == other.name, self.size == other.size])
Expand Down
52 changes: 48 additions & 4 deletions Deeploy/Targets/Generic/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,14 +1001,28 @@ def parseNodeCtxt(self,
node: gs.Node,
channels_first: bool = True) -> Tuple[NetworkContext, bool]:

# Define names of node inputs and outputs, according to the ONNX standard
inputs = ['data_in', 'shape']
outputs = ['data_out']

# Map inputs and outputs to their corresponding names in the operator representation
for idx, inputNode in enumerate(node.inputs):
self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
for idx, outputNode in enumerate(node.outputs):
self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name

# Update alias_of parameter for the output node
output_node = ctxt.lookup(node.outputs[outputs.index("data_out")].name)
input_node = ctxt.lookup(node.inputs[inputs.index("data_in")].name)

# Prepare new aliases
new_output_node_aliases = input_node.get_aliases_of()
new_output_node_aliases.append(input_node.name)

# Add new aliases
output_node.add_aliases(aliases_to_add = new_output_node_aliases)

# Compute data size
self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)

return ctxt, True
Expand Down Expand Up @@ -1623,9 +1637,13 @@ def parseNodeCtxt(self,
node.inputs.append(zeroTensor)
self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'

# Store the input and output shapes in the operator representation
self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)
self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape
self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape
self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape

# Store the matrix dimensions in the operator representation
self.operatorRepresentation['M'] = ctxt.lookup(
node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])]
self.operatorRepresentation['N'] = ctxt.lookup(
Expand All @@ -1637,11 +1655,24 @@ def parseNodeCtxt(self,
ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup(
node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']])

self.operatorRepresentation['batch'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2])
# Check if the batch dimensions are compatible
self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2])
self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])

self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'],
self.operatorRepresentation['batch_B'])

# SCHEREMO: Assert that batch is the same on both matrices
W_batched = (self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]))
self.operatorRepresentation['W_batched'] = W_batched
assert (self.operatorRepresentation["batch_A"] == self.operatorRepresentation["batch_B"]) or (
self.operatorRepresentation["batch_A"] == 1
) or (
self.operatorRepresentation["batch_B"] == 1
), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2."

# Create flags for same dimension between each input matrix and the final batch dimension
self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod(
ctxt.lookup(node.inputs[0].name).shape[:-2]))
self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = (
self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]))

return ctxt, ret

Expand Down Expand Up @@ -1745,12 +1776,25 @@ def parseNodeCtxt(self,
self.operatorRepresentation[outputs[idx]] = newCtxt.lookup(outputNode.name).name

if len(node.inputs) == 3:
# Compute bias name and shape if present in the inputs
self.operatorRepresentation['C'] = newCtxt.lookup(node.inputs[2].name).name
self.operatorRepresentation['C_shape'] = newCtxt.lookup(node.inputs[2].name).shape

# Create flag for same dimension between bias matrix and the final batch dimension
self.operatorRepresentation['C_batched'] = (self.operatorRepresentation['batch'] == np.prod(
newCtxt.lookup(node.inputs[2].name).shape[:-2]))
elif not self.noBiasHoisting:
# Create mock bias matrix if not present in the inputs
values = np.zeros((1))
zeroTensor = gs.Constant(f'{node.name}_C_Tensor', values = values)
newCtxt.hoistConstant(zeroTensor)

# Store it in the operator representation
self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
self.operatorRepresentation['C_shape'] = (0,)

# Create flag for same dimension between bias matrix and the final batch dimension
self.operatorRepresentation['C_batched'] = False

self.operatorRepresentation['size'] = np.prod(newCtxt.lookup(node.inputs[0].name).shape)

Expand Down
9 changes: 9 additions & 0 deletions Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,18 @@
${transB}
);

% if A_batched:
ref_${data_out}_${A} += ${M} * ${N};
% endif

% if B_batched:
ref_${data_out}_${B} += ${N} * ${O};
% endif

% if C_batched:
ref_${data_out}_${C} += ${M} * ${O};
% endif

ref_${data_out}_${data_out} += ${M} * ${O};
}
END_SINGLE_CORE
Expand Down
5 changes: 5 additions & 0 deletions Deeploy/Targets/Generic/Templates/ReshapeTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def alignToContext(self, ctxt: NetworkContext,
ctxt.globalObjects[operatorRepresentation['indices']]._deploy = False
ctxt.globalObjects[operatorRepresentation['indices']]._live = False

# Same for "shape"
if "shape" in operatorRepresentation.keys():
ctxt.globalObjects[operatorRepresentation["shape"]]._deploy = False
ctxt.globalObjects[operatorRepresentation["shape"]]._live = False

inBuffer = ctxt.lookup(operatorRepresentation['data_in'])
outBuffer = ctxt.lookup(operatorRepresentation['data_out'])
outBuffer._alias = inBuffer.name
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading