Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci-platform-siracusa-tiled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ jobs:
{"name":"testFloatSoftmax","L1":[4000]},
{"name":"testFloatTranspose","L1":[2000]},
{"name":"testFloatMul","L1":[2000]},
{"name":"largeFloatAdd","L1":[220000]}
{"name":"largeFloatAdd","L1":[220000]},
{"name":"testRQGEMMwBatch","L1":[20000]},
{"name":"testMatMulBatch","L1":[20000]}
]
num-cores: 8

Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
## Unreleased (Planned Release Target: v0.2.1)

### List of Pull Requests
- Fix PULP GEMM `batch` serialization [#109](https://github.com/pulp-platform/Deeploy/pull/109)
- Split CI Workflows by Platform and Task, Improve Formatting and Linting Reliability [#108](https://github.com/pulp-platform/Deeploy/pull/108)
- Refactor tiling code generation [#105](https://github.com/pulp-platform/Deeploy/pull/105)
- Change order of typeMatching entries [#68](https://github.com/pulp-platform/Deeploy/pull/68)
Expand Down Expand Up @@ -61,6 +62,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Prevent node duplication for graphs generated via GraphSurgeon
- Resolved issue with missing `id` in the `Build Cache for Docker` step, used in the `Inject build-cache` step.
- Fix license CI check and prevent potential issues with `jq` installation
- PULP Gemm `batch` variable serialization

### Removed
- Delete outdated and unused `.gitlab-ci.yml` file
Expand Down
193 changes: 58 additions & 135 deletions Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from typing import Dict, List, Tuple

from Deeploy.AbstractDataTypes import PointerClass
Expand Down Expand Up @@ -135,25 +136,22 @@ def serializeTilingSolution(

# Every output is constructed by a pair of inputs. Reconstruct this pair.
for cube in outputCubes:
MOffset, OOffset = cube.offset[-2:]
MSize, OSize = cube.dims[-2:]

BSize = 1
BOffset = 0
BatchSize = 1
BatchOffset = 0

if len(cube.offset) == 2:
(MOffset, OOffset) = cube.offset
(MSize, OSize) = cube.dims
elif len(cube.offset) == 3:
(BatchOffset, MOffset, OOffset) = cube.offset
(BatchSize, MSize, OSize) = cube.dims
if len(cube.offset) > 2:
BatchSize = math.prod(cube.dims[:-2])

if len(cube.offset) > 3:
assert all(off == 0 for off in cube.offset[:-3]), (
f"Unsupported tiling across leading batch dims: offsets={cube.offset}. "
"Only the last batch dim (besides M/O) may be tiled.")
else:
(BatchOffset, BOffset, MOffset, OOffset) = cube.offset
(BatchSize, BSize, MSize, OSize) = cube.dims
BatchSize = 1

replacements["M"].append(MSize)
replacements["O"].append(OSize)
replacements["batch"].append(BSize)
replacements["batch"].append(BatchSize)

if transA == 0:
AMatrixOffsets = (MOffset, NOffset)
Expand All @@ -162,49 +160,30 @@ def serializeTilingSolution(
AMatrixOffsets = (NOffset, MOffset)
AMatrixShape = (NSize, MSize)

if len(buffA.shape) > 2:
batchDimCount = len(buffA.shape) - 2
AMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + AMatrixOffsets
AMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + AMatrixShape

ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
inputACubes.append(ACube)

if transB == 0:
BMatrixOffsets = (NOffset, OOffset)
BMatrixShape = (NSize, OSize)
else:
BMatrixOffsets = (OOffset, NOffset)
BMatrixShape = (OSize, NSize)

if len(buffA.shape) == 2:
ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
elif len(buffA.shape) == 3:
ACube = HyperRectangle((BatchOffset,) + AMatrixOffsets, (BatchSize,) + AMatrixShape)
else:
ACube = HyperRectangle(
(
BatchOffset,
BOffset,
) + AMatrixOffsets,
(
BatchSize,
BSize,
) + AMatrixShape,
)

if len(buffB.shape) == 2:
BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
elif len(buffB.shape) == 3:
BCube = HyperRectangle((BatchOffset,) + BMatrixOffsets, (BatchSize,) + BMatrixShape)
else:
BCube = HyperRectangle(
(
BatchOffset,
BOffset,
) + BMatrixOffsets,
(
BatchSize,
BSize,
) + BMatrixShape,
)

RequantCube = HyperRectangle((OOffset,), (OSize,))
if len(buffB.shape) > 2:
batchDimCount = len(buffB.shape) - 2
BMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + BMatrixOffsets
BMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + BMatrixShape

inputACubes.append(ACube)
BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
inputBCubes.append(BCube)

RequantCube = HyperRectangle((OOffset,), (OSize,))
inputMulCubes.append(RequantCube)
inputAddCubes.append(RequantCube)

Expand All @@ -231,40 +210,6 @@ def serializeTilingSolution(
return VariableReplacementScheme(replacements, replacementTypes), schedule


class MatrixVecTileConstraint(GEMMTileConstraint):

@staticmethod
def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

tm = GEMMTileConstraint.addGeometricalConstraint(tilerModel, parseDict, ctxt)

return tm

@staticmethod
def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

tm = GEMMTileConstraint.addPolicyConstraint(tilerModel, parseDict, ctxt)

return tm


class TallGEMMTileConstraint(GEMMTileConstraint):

@staticmethod
def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

tm = GEMMTileConstraint.addGeometricalConstraint(tilerModel, parseDict, ctxt)

return tm

@staticmethod
def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

tm = GEMMTileConstraint.addPolicyConstraint(tilerModel, parseDict, ctxt)

return tm


class FloatGEMMTileConstraint(TileConstraint):

@staticmethod
Expand Down Expand Up @@ -367,25 +312,22 @@ def serializeTilingSolution(

# Every output is constructed by a pair of inputs. Reconstruct this pair.
for cube in outputCubes:
MOffset, OOffset = cube.offset[-2:]
MSize, OSize = cube.dims[-2:]

BSize = 1
BOffset = 0
BatchSize = 1
BatchOffset = 0

if len(cube.offset) == 2:
(MOffset, OOffset) = cube.offset
(MSize, OSize) = cube.dims
elif len(cube.offset) == 3:
(BatchOffset, MOffset, OOffset) = cube.offset
(BatchSize, MSize, OSize) = cube.dims
if len(cube.offset) > 2:
BatchSize = math.prod(cube.dims[:-2])

if len(cube.offset) > 3:
assert all(off == 0 for off in cube.offset[:-3]), (
f"Unsupported tiling across leading batch dims: offsets={cube.offset}. "
"Only the last batch dim (besides M/O) may be tiled.")
else:
(BatchOffset, BOffset, MOffset, OOffset) = cube.offset
(BatchSize, BSize, MSize, OSize) = cube.dims
BatchSize = 1

replacements["M"].append(MSize)
replacements["O"].append(OSize)
replacements["batch"].append(BSize)
replacements["batch"].append(BatchSize)

if transA == 0:
AMatrixOffsets = (MOffset, NOffset)
Expand All @@ -394,57 +336,38 @@ def serializeTilingSolution(
AMatrixOffsets = (NOffset, MOffset)
AMatrixShape = (NSize, MSize)

if len(buffA.shape) > 2:
batchDimCount = len(buffA.shape) - 2
AMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + AMatrixOffsets
AMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + AMatrixShape

ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
inputACubes.append(ACube)

if transB == 0:
BMatrixOffsets = (NOffset, OOffset)
BMatrixShape = (NSize, OSize)
else:
BMatrixOffsets = (OOffset, NOffset)
BMatrixShape = (OSize, NSize)

if len(buffA.shape) == 2:
ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
elif len(buffA.shape) == 3:
ACube = HyperRectangle((BatchOffset,) + AMatrixOffsets, (BatchSize,) + AMatrixShape)
else:
ACube = HyperRectangle(
(
BatchOffset,
BOffset,
) + AMatrixOffsets,
(
BatchSize,
BSize,
) + AMatrixShape,
)

if len(buffB.shape) == 2:
BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
elif len(buffB.shape) == 3:
BCube = HyperRectangle((BatchOffset,) + BMatrixOffsets, (BatchSize,) + BMatrixShape)
else:
BCube = HyperRectangle(
(
BatchOffset,
BOffset,
) + BMatrixOffsets,
(
BatchSize,
BSize,
) + BMatrixShape,
)
if len(buffB.shape) > 2:
batchDimCount = len(buffB.shape) - 2
BMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + BMatrixOffsets
BMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + BMatrixShape

BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
inputBCubes.append(BCube)

CMatrixOffsets = (MOffset, OOffset)
CMatrixShape = (MSize, OSize)

if len(buffC.shape) == 2:
CCube = HyperRectangle(CMatrixOffsets, CMatrixShape)
elif len(buffC.shape) == 3:
CCube = HyperRectangle((BatchOffset,) + CMatrixOffsets, (BatchSize,) + CMatrixShape)
else:
CCube = HyperRectangle((BatchOffset, BOffset) + CMatrixOffsets, (BatchSize, BSize) + CMatrixShape)
if len(buffC.shape) > 2:
batchDimCount = len(buffC.shape) - 2
CMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + CMatrixOffsets
CMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + CMatrixShape

inputACubes.append(ACube)
inputBCubes.append(BCube)
CCube = HyperRectangle(CMatrixOffsets, CMatrixShape)
inputAddCubes.append(CCube)

inputLoadSchedule = []
Expand Down
71 changes: 25 additions & 46 deletions Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from typing import Dict, List, Tuple

from Deeploy.AbstractDataTypes import PointerClass
Expand Down Expand Up @@ -125,65 +126,43 @@ def serializeTilingSolution(

# Every output is constructed by a pair of inputs. Reconstruct this pair.
for cube in outputCubes:
MOffset, OOffset = cube.offset[-2:]
MSize, OSize = cube.dims[-2:]

BSize = 1
BOffset = 0
BatchSize = 1
BatchOffset = 0

if len(cube.offset) == 2:
(MOffset, OOffset) = cube.offset
(MSize, OSize) = cube.dims
elif len(cube.offset) == 3:
(BatchOffset, MOffset, OOffset) = cube.offset
(BatchSize, MSize, OSize) = cube.dims
if len(cube.offset) > 2:
BatchSize = math.prod(cube.dims[:-2])

if len(cube.offset) > 3:
assert all(off == 0 for off in cube.offset[:-3]), (
f"Unsupported tiling across leading batch dims: offsets={cube.offset}. "
"Only the last batch dim (besides M/O) may be tiled.")
else:
(BatchOffset, BOffset, MOffset, OOffset) = cube.offset
(BatchSize, BSize, MSize, OSize) = cube.dims
BatchSize = 1

replacements["M"].append(MSize)
replacements["O"].append(OSize)
replacements["batch"].append(BSize)
replacements["batch"].append(BatchSize)

AMatrixOffsets = (MOffset, NOffset)
AMatrixShape = (MSize, NSize)

if len(buffA.shape) > 2:
batchDimCount = len(buffA.shape) - 2
AMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + AMatrixOffsets
AMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + AMatrixShape

ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
inputACubes.append(ACube)

BMatrixOffsets = (NOffset, OOffset)
BMatrixShape = (NSize, OSize)

if len(buffA.shape) == 2:
ACube = HyperRectangle(AMatrixOffsets, AMatrixShape)
elif len(buffA.shape) == 3:
ACube = HyperRectangle((BatchOffset,) + AMatrixOffsets, (BatchSize,) + AMatrixShape)
else:
ACube = HyperRectangle(
(
BatchOffset,
BOffset,
) + AMatrixOffsets,
(
BatchSize,
BSize,
) + AMatrixShape,
)

if len(buffB.shape) == 2:
BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
elif len(buffB.shape) == 3:
BCube = HyperRectangle((BatchOffset,) + BMatrixOffsets, (BatchSize,) + BMatrixShape)
else:
BCube = HyperRectangle(
(
BatchOffset,
BOffset,
) + BMatrixOffsets,
(
BatchSize,
BSize,
) + BMatrixShape,
)
if len(buffB.shape) > 2:
batchDimCount = len(buffB.shape) - 2
BMatrixOffsets = tuple(cube.offset[:-2][-batchDimCount:]) + BMatrixOffsets
BMatrixShape = tuple(cube.dims[:-2][-batchDimCount:]) + BMatrixShape

inputACubes.append(ACube)
BCube = HyperRectangle(BMatrixOffsets, BMatrixShape)
inputBCubes.append(BCube)

inputLoadSchedule = []
Expand Down
Loading
Loading