Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/nncf/common/graph/patterns/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,4 @@ class IgnoredPatternNames(Enum):
FC_BN_HSWISH_ACTIVATION = PatternDesc("fc_bn_hswish_activation")
EQUAL_LOGICALNOT = PatternDesc("equal_logicalnot")
ROPE = PatternDesc("rope", model_types=[ModelType.TRANSFORMER])
SAM_PE = PatternDesc("sam_pe", model_types=[ModelType.TRANSFORMER])
26 changes: 26 additions & 0 deletions src/nncf/onnx/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,29 @@ def create_rope() -> GraphPattern:
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@ONNX_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
def create_sam_pe() -> GraphPattern:
"""
Positional Embedding from Segment Anything Model (SAM).
"""
pattern = GraphPattern()

matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.ONNXMatMulMetatype}
)
mul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.ONNXMulLayerMetatype}
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.ONNXCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.ONNXSinMetatype})
concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.ONNXConcatMetatype})

pattern.add_edge(matmul_node, mul_node)
pattern.add_edge(mul_node, cos_node)
pattern.add_edge(mul_node, sin_node)
pattern.add_edge(cos_node, concat)
pattern.add_edge(sin_node, concat)

return pattern
26 changes: 26 additions & 0 deletions src/nncf/openvino/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,29 @@ def create_rope() -> GraphPattern:
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@OPENVINO_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
def create_sam_pe() -> GraphPattern:
"""
Positional Embedding from Segment Anything Model (SAM).
"""
pattern = GraphPattern()

matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.OVMatMulMetatype}
)
mul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.OVMultiplyMetatype}
)
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.OVCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.OVSinMetatype})
concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.OVConcatMetatype})

pattern.add_edge(matmul_node, mul_node)
pattern.add_edge(mul_node, cos_node)
pattern.add_edge(mul_node, sin_node)
pattern.add_edge(cos_node, concat)
pattern.add_edge(sin_node, concat)

return pattern
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from nncf.onnx.graph.transformations.command_creation import ONNXCommandCreator
from nncf.onnx.graph.transformations.commands import ONNXTargetPoint
from nncf.onnx.quantization.ignored_patterns import create_rope
from nncf.onnx.quantization.ignored_patterns import create_sam_pe
from nncf.parameters import CompressionFormat
from nncf.parameters import CompressWeightsMode
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
Expand Down Expand Up @@ -491,7 +492,9 @@ def _replace_matmul_with_matmulnbits(

@staticmethod
def get_ignored_patterns() -> GraphPattern:
return create_rope()
pattern = create_rope()
pattern.add_pattern_alternative(create_sam_pe())
return pattern


class ONNXAWQAlgoAlgoBackend(AWQAlgoBackend, ONNXWeightCompressionAlgoBackend):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from nncf.openvino.optimized_functions import clear_ov_model_cache
from nncf.openvino.optimized_functions.models import OV_MODEL_CACHE
from nncf.openvino.quantization.ignored_patterns import create_rope
from nncf.openvino.quantization.ignored_patterns import create_sam_pe
from nncf.openvino.rt_info import dump_parameters
from nncf.openvino.statistics.collectors import OVMaxVarianceReducer
from nncf.openvino.statistics.collectors import OVMeanAbsMaxReducer
Expand Down Expand Up @@ -394,7 +395,9 @@ def filter_func(point: StatisticPoint) -> bool:

@staticmethod
def get_ignored_patterns() -> GraphPattern:
return create_rope()
pattern = create_rope()
pattern.add_pattern_alternative(create_sam_pe())
return pattern


class OVTensorWeightCompressionAlgoBackend(OVWeightCompressionAlgoBackend):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from nncf.torch.model_transformer import PTModelTransformer
from nncf.torch.nncf_network import NNCFNetwork
from nncf.torch.quantization.ignored_patterns import create_rope
from nncf.torch.quantization.ignored_patterns import create_sam_pe
from nncf.torch.quantization.layers import QUANTIZATION_MODULES
from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
Expand Down Expand Up @@ -481,7 +482,9 @@ def transform_model(

@staticmethod
def get_ignored_patterns() -> GraphPattern:
return create_rope()
pattern = create_rope()
pattern.add_pattern_alternative(create_sam_pe())
return pattern


class PTAWQAlgoAlgoBackend(AWQAlgoBackend, PTWeightCompressionAlgoBackend):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from nncf.torch.model_graph_manager import get_weight_compression_reduction_axes
from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
from nncf.torch.quantization.ignored_patterns import create_rope
from nncf.torch.quantization.ignored_patterns import create_sam_pe
from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT8AsymmetricWeightsDecompressor
Expand Down Expand Up @@ -257,7 +258,9 @@ def transform_model(

@staticmethod
def get_ignored_patterns() -> GraphPattern:
return create_rope()
pattern = create_rope()
pattern.add_pattern_alternative(create_sam_pe())
return pattern


class FXMixedPrecisionAlgoBackend(MixedPrecisionAlgoBackend, FXWeightCompressionAlgoBackend):
Expand Down
24 changes: 24 additions & 0 deletions src/nncf/torch/quantization/ignored_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,27 @@ def create_rope() -> GraphPattern:
pattern.add_edge(concat_node, cos_node)
pattern.add_edge(concat_node, sin_node)
return pattern


@PT_IGNORED_PATTERNS.register(IgnoredPatternNames.SAM_PE)
def create_sam_pe() -> GraphPattern:
"""
Positional Embedding from Segment Anything Model (SAM).
"""
pattern = GraphPattern()

matmul_node = pattern.add_node(
**{GraphPattern.LABEL_ATTR: "MATMUL", GraphPattern.METATYPE_ATTR: om.PTMatMulMetatype}
)
mul_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "MULTIPLY", GraphPattern.METATYPE_ATTR: om.PTMulMetatype})
cos_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "COS", GraphPattern.METATYPE_ATTR: om.PTCosMetatype})
sin_node = pattern.add_node(**{GraphPattern.LABEL_ATTR: "SIN", GraphPattern.METATYPE_ATTR: om.PTSinMetatype})
concat = pattern.add_node(**{GraphPattern.LABEL_ATTR: "CONCAT", GraphPattern.METATYPE_ATTR: om.PTCatMetatype})

pattern.add_edge(matmul_node, mul_node)
pattern.add_edge(mul_node, cos_node)
pattern.add_edge(mul_node, sin_node)
pattern.add_edge(cos_node, concat)
pattern.add_edge(sin_node, concat)

return pattern
21 changes: 21 additions & 0 deletions tests/cross_fw/test_templates/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,24 @@ def forward(self, x):
x1 = x.sin()
x2 = x.cos()
return x1, x2


class SAMPEModel(nn.Module):
"""
Positional Embedding from Segment Anything Model (SAM).
"""

INPUT_SIZE = [1, 2, 3, 2]

def __init__(self):
super().__init__()
with set_torch_seed():
self.weight = nn.Parameter(torch.empty((2, 128)))

def forward(self, x):
x = torch.matmul(x, self.weight)
x = x * (2 * torch.pi)
x1 = x.sin()
x2 = x.cos()
x = torch.cat([x1, x2], dim=-1)
return x
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def get_matmul_model() -> TModel:
def get_RoPE_model() -> TModel:
"""Returns a backend model for test_rope_weight_compression."""

@staticmethod
@abstractmethod
def get_SAM_PE_model() -> TModel:
"""Returns a backend model for test_sam_pe_weight_compression."""

@pytest.mark.parametrize(
("mode", "ref_act_score", "ref_score"),
(
Expand Down Expand Up @@ -400,6 +405,26 @@ def test_rope_weight_compression(self):
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed

def test_sam_pe_weight_compression(self):
model = self.get_SAM_PE_model()

dataset = Dataset(
[self.to_tensor(np.ones([1, 2, 3, 2], dtype=np.float32))],
self.get_transform_func(),
)
compressed_model = compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
ratio=1.0,
group_size=-1,
dataset=dataset,
all_layers=True,
)

int4_ref_num_compressed = 0
int4_num_nodes = self.get_num_int4_nodes(compressed_model)
assert int4_num_nodes == int4_ref_num_compressed

@staticmethod
@abstractmethod
def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
Expand Down
21 changes: 21 additions & 0 deletions tests/onnx/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,27 @@ def add_add(self, input_a: str, input_b: str, output: Optional[str] = None) -> s
)
return output

def add_mul_const(
self, input: str, shape: tuple[int], output: Optional[str] = None, data: Optional[np.ndarray] = None
) -> str:
i = len(self._nodes)

w_name = f"W_{i}"
if data is None:
w_values = np.random.rand(*shape).astype(np.float32)
else:
w_values = data
w_initializer = onnx.helper.make_tensor(
name=w_name, data_type=onnx.TensorProto.FLOAT, dims=shape, vals=w_values.tobytes(), raw=True
)
self._initializers.append(w_initializer)

output = f"Mul_{i}_output" if output is None else output
self._nodes.append(
onnx.helper.make_node(op_type="Mul", inputs=[input, w_name], outputs=[output], name=f"Mul_{i}")
)
return output

def add_relu(self, input: str, output: Optional[str] = None) -> str:
i = len(self._nodes)

Expand Down
18 changes: 18 additions & 0 deletions tests/onnx/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,24 @@ def get_RoPE_model() -> onnx.ModelProto:

return mb.build()

@staticmethod
def get_SAM_PE_model() -> onnx.ModelProto:
"""
Builds a model to be used in the TemplateWeightCompression.test_sam_pe_weight_compression() test.
"""
mb = ModelBuilder()

x = mb.add_input("input", (-1, -1, -1, 2))
x = mb.add_matmul(x, shape=(2, 128))
x = mb.add_mul_const(x, shape=(1,), data=np.array([2 * np.pi], np.float32))
x1 = mb.add_sin(x)
x2 = mb.add_cos(x)
x = mb.add_concat([x1, x2], axis=-1)

mb.add_output(x, (-1, -1, -1, 256))

return mb.build()

@staticmethod
def get_sequential_matmul_model() -> onnx.ModelProto:
"""
Expand Down
20 changes: 20 additions & 0 deletions tests/openvino/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1252,6 +1252,26 @@ def _create_ov_model(self):
return model


class SAMPEModel(OVReferenceModel):
"""
Positional Embedding from Segment Anything Model (SAM).
"""

def _create_ov_model(self):
inp = opset.parameter([-1, -1, -1, 2], name="inp")
matmul_data = self._rng.random((128, 2)).astype(np.float32)

matmul = opset.matmul(inp, matmul_data, transpose_a=False, transpose_b=True, name="MatMul")
scaled_matmul = opset.multiply(matmul, opset.constant(2 * np.pi, dtype=np.float32), name="Scaled_MatMul")
sin = opset.sin(scaled_matmul, name="sin")
cos = opset.cos(scaled_matmul, name="cos")
concat = opset.concat([sin, cos], axis=-1, name="concat")
concat_result = opset.result(concat, name="concat_result")

model = ov.Model([concat_result], [inp])
return model


class MatMul(OVReferenceModel):
def _create_ov_model(self):
input_node = opset.parameter([1, 4, 8], name="Input")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from tests.openvino.native.models import ModelNamedConsts
from tests.openvino.native.models import OVReferenceModel
from tests.openvino.native.models import RoPEModel
from tests.openvino.native.models import SAMPEModel
from tests.openvino.native.models import SequentialMatmulModel
from tests.openvino.native.models import WeightsModel
from tests.openvino.native.quantization.test_fq_params_calculation import REFERENCE_SCALES_DIR
Expand Down Expand Up @@ -1929,6 +1930,10 @@ def get_matmul_model() -> ov.Model:
def get_RoPE_model() -> ov.Model:
return RoPEModel().ov_model

@staticmethod
def get_SAM_PE_model() -> ov.Model:
return SAMPEModel().ov_model

@staticmethod
def get_sequential_matmul_model() -> ov.Model:
return SequentialMatmulModel().ov_model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from nncf.torch.quantization.quantize_functions import unpack_int4
from nncf.torch.quantization.quantize_functions import unpack_uint4
from tests.cross_fw.test_templates.helpers import RoPEModel
from tests.cross_fw.test_templates.helpers import SAMPEModel
from tests.cross_fw.test_templates.template_test_weights_compression import TemplateWeightCompression
from tests.torch.test_models.synthetic import ShortTransformer
from tests.torch.test_tensor import cast_to
Expand Down Expand Up @@ -480,6 +481,10 @@ def get_matmul_model() -> torch.nn.Module:
def get_RoPE_model() -> torch.nn.Module:
return RoPEModel()

@staticmethod
def get_SAM_PE_model() -> torch.nn.Module:
return SAMPEModel()

@staticmethod
def get_sequential_matmul_model() -> torch.nn.Module:
return SequentialMatmulModel()
Expand Down
8 changes: 8 additions & 0 deletions tests/torch2/fx/test_compress_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
from nncf.torch.quantization.layers import INT4SymmetricWeightsDecompressor
from tests.cross_fw.test_templates.helpers import RoPEModel
from tests.cross_fw.test_templates.helpers import SAMPEModel
from tests.cross_fw.test_templates.template_test_weights_compression import TemplateWeightCompression
from tests.torch.test_models.synthetic import ShortTransformer
from tests.torch.test_tensor import cast_to
Expand Down Expand Up @@ -329,6 +330,13 @@ def get_RoPE_model() -> torch.fx.GraphModule:
exported_model = get_torch_fx_model(model, ex_input)
return exported_model

@staticmethod
def get_SAM_PE_model() -> torch.fx.GraphModule:
model = SAMPEModel()
ex_input = torch.ones(SAMPEModel.INPUT_SIZE, dtype=torch.float32)
exported_model = get_torch_fx_model(model, ex_input)
return exported_model

@staticmethod
def get_sequential_matmul_model() -> torch.fx.GraphModule:
model = SequentialMatmulModel()
Expand Down