Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ Describe the intent of your PR here.
2. [ ] Your PR reviewed and approved.
3. [ ] All checks are passing.
4. [ ] The `CHANGELOG.md` file has been updated.
5. [ ] If the docker was modified, change back its link after review.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,17 @@ Change main.c to use OUTPUTTYPE instead of float

### Added
- CODEOWNERS file to control who is responsible for reviewing future PRs.

## Memory Allocation Strategies and Visualization

### Added
- A visualization of the memory allocation solution generated by Deeploy at each level of memory. I use Plotpy to generate a static `html` file and save it to the `DeeployState` directory.
- An initialization strategy for the variable in the tiling to randomize the variables related to the permutation matrix.
- New interface to `testRunner_tiled_siracusa` to control the generation of the memory allocation visualization, the memory allocation strategy, and the search strategy.
- Export a new docker container with `plotpy` as dependency.

### Changed
- Removed unused `TilerAwareDeployer` class.

### Fixed
- Fixed a bug in the MemoryScheduler where the CP problem was solved more time that it was needed.
28 changes: 11 additions & 17 deletions Deeploy/TilingExtension/MemoryScheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import random
from collections import OrderedDict
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Literal, Optional, Tuple, Union

import numpy as np
from ortools.constraint_solver.pywrapcp import IntVar
Expand Down Expand Up @@ -433,8 +433,8 @@ def _scheduleMemoryConstraints(self,
ctxt: NetworkContext,
allMemoryConstraints: List[PatternMemoryConstraints],
memoryHierarchy: MemoryHierarchy,
memoryLevel: str = "L1",
optimizeSchedule: bool = False):
memoryAllocStrategy: Literal["TetrisRandom", "TetrisCo-Opt"],
memoryLevel: str = "L1"):

if memoryLevel not in self.memoryMap:
self.memoryMap[memoryLevel] = []
Expand Down Expand Up @@ -469,7 +469,7 @@ def _scheduleMemoryConstraints(self,
self.memoryMap[memoryLevel].append(blockList)

# SCHEREMO: Build permutation matrix
if optimizeSchedule:
if memoryAllocStrategy == 'TetrisCo-Opt':
if numVars > 1:

permutationMatrix = self._addPermutationMatrix(tilerModel, numVars, patternIdx)
Expand All @@ -480,10 +480,12 @@ def _scheduleMemoryConstraints(self,
permutationMatrix = np.ones((1,))
permAdj, permCost = adjacencyMatrix, costVector

else:
elif memoryAllocStrategy == 'TetrisRandom':
permutationList = self.heuristicPermutation(adjacencyMatrix, costVector)
permAdj, permCost, permutationMatrix = self._stablePermutation(adjacencyMatrix, costVector,
permutationList)
else:
raise ("Unrecognized memory allocation strategy!")

self._permutationState[memoryLevel + f"_{patternIdx}"] = permutationMatrix

Expand All @@ -498,12 +500,12 @@ def scheduleMemoryConstraints(self,
ctxt: NetworkContext,
allMemoryConstraints: List[PatternMemoryConstraints],
memoryHierarchy: MemoryHierarchy,
memoryLevel: str = "L1",
optimizeSchedule: bool = False):
memoryAllocStrategy: Literal["TetrisRandom", "TetrisCo-Opt"],
memoryLevel: str = "L1"):

self.stringSuffix = self._stringSuffix + f"_{memoryLevel}"
return self._scheduleMemoryConstraints(tilerModel, ctxt, allMemoryConstraints, memoryHierarchy, memoryLevel,
optimizeSchedule)
return self._scheduleMemoryConstraints(tilerModel, ctxt, allMemoryConstraints, memoryHierarchy,
memoryAllocStrategy, memoryLevel)

def getSymbolicCostName(self, patternIdx: int, memoryLevel: str) -> str:
stringSuffix = self._stringSuffix + f"_{memoryLevel}"
Expand All @@ -515,9 +517,6 @@ def getCost(self, tilerModel, patternIdx: int, memoryLevel: str) -> int:

stringSuffix = self._stringSuffix + f"_{memoryLevel}"

collector = tilerModel._solveModel("max")
numVars = len(self.memoryMap[memoryLevel][patternIdx])

name = f"cost{stringSuffix}_copyIdx_{patternIdx}"
symVar = tilerModel._variables[name]
var = tilerModel._resolveVariable(symVar)
Expand All @@ -528,8 +527,6 @@ def getCost(self, tilerModel, patternIdx: int, memoryLevel: str) -> int:
def getHVector(self, tilerModel, patternIdx: int, memoryLevel: str) -> np.ndarray:

stringSuffix = self._stringSuffix + f"_{memoryLevel}"

collector = tilerModel._solveModel("max")
numVars = len(self.memoryMap[memoryLevel][patternIdx])

hVec = np.zeros((numVars))
Expand All @@ -549,10 +546,7 @@ def getBlockVector(self, patternIdx: int, memoryLevel: str) -> List[MemoryBlock]
def getPMatrix(self, tilerModel, patternIdx: int, memoryLevel: str) -> np.ndarray:

stringSuffix = self._stringSuffix + f"_{memoryLevel}"

collector = tilerModel._solveModel("max")
numVars = len(self.memoryMap[memoryLevel][patternIdx])

permMat = np.zeros((numVars, numVars))

for i in range(numVars):
Expand Down
151 changes: 89 additions & 62 deletions Deeploy/TilingExtension/TilerExtension.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,21 @@
# Like Template-T-Obj mapping, propagate cst, graph edition, etc

import copy
from typing import Callable, Dict, List, Optional, Tuple, Type, Union
import os
from typing import Dict, List, Literal, Optional, Tuple, Type, Union

import numpy as np
import onnx_graphsurgeon as gs
import plotly.graph_objects as go
import plotly.io as pio
from ortools.constraint_solver.pywrapcp import IntVar, SolutionCollector

import Deeploy.CommonExtensions.DataTypes as BasicDataTypes
from Deeploy.AbstractDataTypes import Pointer, PointerClass
from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper
from Deeploy.DeeployTypes import ConstantBuffer, GlobalDefinition, NetworkContext, NetworkOptimizationPass, \
NodeBinding, NodeTemplate, ONNXLayer, Schedule, SubGraph, TopologyOptimizer, TransientBuffer
from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy
from Deeploy.DeeployTypes import ConstantBuffer, GlobalDefinition, NetworkContext, NodeBinding, NodeTemplate, \
ONNXLayer, Schedule, SubGraph, TransientBuffer
from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \
MemoryLevelAwareDeployer, MemoryPlatform, MemoryPlatformWrapper, TargetMemoryLevelMapping
from Deeploy.TilingExtension.GenericFlow import GenericFlowState
Expand Down Expand Up @@ -73,10 +76,81 @@ def __init__(self, memoryHierarchy: MemoryHierarchy):

self._worstCaseBufferSize: Dict[str, int] = {}

self.visualizeMemoryAlloc: bool = False
self.memoryAllocStrategy: Literal["TetrisRandom", "TetrisCo-Opt"] = "TetrisRandom"
self.searchStrategy: Literal["min", "max", "random-max"] = "random-max"

@property
def worstCaseBufferSize(self):
return self._worstCaseBufferSize

@staticmethod
def plotMemoryAlloc(memoryMap: Dict[str, List[List[MemoryBlock]]],
deeployStateDir: str,
defaultMemoryLevel: MemoryLevel,
targetMemLevelName: str = 'L1'):

innerMemoryAllocDir = os.path.join(deeployStateDir, f"MemoryAlloc{targetMemLevelName}")
os.makedirs(os.path.abspath(deeployStateDir), exist_ok = True)
os.makedirs(os.path.abspath(innerMemoryAllocDir), exist_ok = True)
defaultMemLevelPlotPath = os.path.abspath(
os.path.join(deeployStateDir, f"memory_alloc_{defaultMemoryLevel.name}.html"))

addTraceConfig = {"fill": "toself", "hoverinfo": "text", "mode": "lines", "line": dict(width = 2)}

updateLayoutConfig = {
"xaxis_title": "Lifetime",
"yaxis_title": "Address Space",
"xaxis": dict(tickformat = "d", showgrid = True),
"yaxis": dict(tickformat = "d", showgrid = True),
"hovermode": "closest",
"showlegend": False,
}

fig = go.Figure()
for buffer in memoryMap[defaultMemoryLevel.name][-1]:
fig.add_trace(
go.Scatter(x = [
buffer._lifetime[0] - 0.5, buffer._lifetime[0] - 0.5, buffer._lifetime[1] + 0.5,
buffer._lifetime[1] + 0.5
],
y = [buffer._addrSpace[0], buffer._addrSpace[1], buffer._addrSpace[1], buffer._addrSpace[0]],
name = buffer.name,
text = buffer.name,
**addTraceConfig))
fig.add_trace(
go.Scatter(
x = [-0.5, len(memoryMap[defaultMemoryLevel.name]) - 1.5],
y = [defaultMemoryLevel.size, defaultMemoryLevel.size],
name = f"{defaultMemoryLevel.name} Memory Size",
text = f"{defaultMemoryLevel.name} Memory Size",
line = dict(color = "red", width = 2, dash = "dash"),
fill = "toself",
hoverinfo = "text",
mode = "lines",
))
fig.update_layout(title = f"Deeploy Memory Allocation {defaultMemoryLevel.name}", **updateLayoutConfig)
pio.write_html(fig, defaultMemLevelPlotPath)

for step_idx, innerMemoryAlloc in enumerate(memoryMap[targetMemLevelName]):
targetMemLevelPlotPath = os.path.abspath(
os.path.join(innerMemoryAllocDir, f"memory_alloc_{targetMemLevelName}_step{step_idx}.html"))
fig = go.Figure()
for buffer in innerMemoryAlloc:
fig.add_trace(
go.Scatter(
x = [
buffer._lifetime[0] - 0.5, buffer._lifetime[0] - 0.5, buffer._lifetime[1] + 0.5,
buffer._lifetime[1] + 0.5
],
y = [buffer._addrSpace[0], buffer._addrSpace[1], buffer._addrSpace[1], buffer._addrSpace[0]],
name = buffer.name,
text = buffer.name,
**addTraceConfig))
fig.update_layout(title = f"Deeploy Memory Allocation {targetMemLevelName} Step {step_idx}",
**updateLayoutConfig)
pio.write_html(fig, targetMemLevelPlotPath)

def _convertCtxtToStaticSchedule(self, ctxt: NetworkContext,
memoryMap: Dict[str, List[List[MemoryBlock]]]) -> NetworkContext:

Expand Down Expand Up @@ -152,7 +226,7 @@ def _convertCtxtToStaticSchedule(self, ctxt: NetworkContext,

return ctxt

def computeTilingSchedule(self, ctxt: NetworkContext) -> TilingSolution:
def computeTilingSchedule(self, ctxt: NetworkContext) -> Tuple[TilingSolution, Dict[str, List[List[MemoryBlock]]]]:

assert self.tilerModel is not None and self.symbolicMemoryConstraints is not None, "Set up the model before trying to compute a schedule!"

Expand Down Expand Up @@ -191,7 +265,7 @@ def computeTilingSchedule(self, ctxt: NetworkContext) -> TilingSolution:

self._convertCtxtToStaticSchedule(ctxt, memoryMap)

return tilingSchedule
return tilingSchedule, memoryMap

def setupModel(self, ctxt: NetworkContext, schedule: Schedule, layerBinding: 'OrderedDict[str, ONNXLayer]',
targetMemoryLevelMapping: TargetMemoryLevelMapping) -> NetworkContext:
Expand All @@ -203,7 +277,7 @@ def setupModel(self, ctxt: NetworkContext, schedule: Schedule, layerBinding: 'Or
else:
wrapSchedule.append(entry)

tilerModel = TilerModel()
tilerModel = TilerModel(searchStrategy = self.searchStrategy)
tilerModel = self._setupGeometricConstraints(tilerModel, ctxt, wrapSchedule, layerBinding)
tilerModel = self._setupTensorDimensionProducts(tilerModel, ctxt, wrapSchedule)
tilerModel = self._setupHeuristics(tilerModel, ctxt, wrapSchedule)
Expand Down Expand Up @@ -413,7 +487,7 @@ def _setupMemoryConstraints(

for level in self.memoryHierarchy.memoryLevels.keys():
self.outerMemoryScheduler.scheduleMemoryConstraints(tilerModel, ctxt, [outerMemoryConstraints],
self.memoryHierarchy, level)
self.memoryHierarchy, self.memoryAllocStrategy, level)

# Update inner memoryHierarchy with outer constraints
innerMemoryHierarchy = MemoryHierarchy([])
Expand All @@ -426,7 +500,7 @@ def _setupMemoryConstraints(

for level in innerMemoryHierarchy.memoryLevels.keys():
self.innerMemoryScheduler.scheduleMemoryConstraints(tilerModel, ctxt, allMemoryConstraints,
innerMemoryHierarchy, level)
innerMemoryHierarchy, self.memoryAllocStrategy, level)

return tilerModel, allMemoryConstraints

Expand Down Expand Up @@ -721,57 +795,6 @@ def _generatePatternStepTransientBufferConstraints(
return patternStepTransientBufferSizes


class TilerAwareDeployer(MemoryLevelAwareDeployer):

def __init__(self,
graph: gs.Graph,
deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper],
inputTypes: Dict[str, Type[Pointer]],
loweringOptimizer: TopologyOptimizer,
scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes),
name: str = 'DeeployNetwork',
default_channels_first: bool = True,
deeployStateDir: str = "DeeployState",
memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = [],
tilerCls: Type[Tiler] = Tiler):
super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name,
default_channels_first, deeployStateDir, memoryLevelAnnotationPasses)
self.tiler = tilerCls(deploymentPlatform.memoryHierarchy)

@property
def worstCaseBufferSize(self):
maxAddr: Dict[str, int] = self.tiler.worstCaseBufferSize

# WIESEP: Memory map form tiler does not include inputs and outputs
for node in (self.inputs() + self.outputs()):
maxAddr[node._memoryLevel] += np.prod(node.shape) * node._type.referencedType.typeWidth // 8

return maxAddr

def tile(self, tilingSolution: Optional[TilingSolution] = None):
if tilingSolution is None:
schedule = self.scheduler(self.graph)

self.tiler.setupModel(ctxt = self.ctxt,
schedule = schedule,
layerBinding = self.layerBinding,
targetMemoryLevelMapping = self.getTargetMemoryLevelMapping())
tilingSolution = self.tiler.computeTilingSchedule(self.ctxt)

# SCHEREMO: Annotate execution block with solution
for layer, pattern in zip(self.layerBinding.values(), tilingSolution):
layer.mapper.binder.executionBlock.patternMemoryConstraint = pattern

# SCHEREMO: Code generation STUB

def bind(self):
if not super().bind():
return False

self.tile()
return True


class TilerDeployerWrapper(NetworkDeployerWrapper):

def __init__(self, deployer: Union[MemoryLevelAwareDeployer, MemoryDeployerWrapper], tilerCls: Type[Tiler] = Tiler):
Expand All @@ -798,7 +821,11 @@ def tile(self, tilingSolution: Optional[TilingSolution] = None):
schedule = schedule,
layerBinding = self.layerBinding,
targetMemoryLevelMapping = self.getTargetMemoryLevelMapping())
tilingSolution = self.tiler.computeTilingSchedule(self.ctxt)
tilingSolution, memoryMap = self.tiler.computeTilingSchedule(self.ctxt)

if self.tiler.visualizeMemoryAlloc:
self.tiler.plotMemoryAlloc(memoryMap, self.deeployStateDir,
self.Platform.memoryHierarchy._defaultMemoryLevel)

# SCHEREMO: Annotate execution block with solution
for layer, pattern in zip(self.layerBinding.values(), tilingSolution):
Expand Down
Loading
Loading