Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Oct 29, 2025

📄 11% (0.11x) speedup for get_intersection_mask in inference/core/workflows/core_steps/fusion/detections_consensus/v1.py

⏱️ Runtime : 1.08 milliseconds 968 microseconds (best of 368 runs)

📝 Explanation and details

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 73 Passed
🌀 Generated Regression Tests 36 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
⚙️ Existing Unit Tests and Runtime
Test File::Test Function Original ⏱️ Optimized ⏱️ Speedup
workflows/unit_tests/core_steps/fusion/test_detections_consensus.py::test_get_intersection_mask 51.6μs 48.5μs 6.52%✅
workflows/unit_tests/core_steps/fusion/test_detections_consensus.py::test_get_intersection_mask_when_single_element_provided 30.6μs 27.9μs 9.54%✅
🌀 Generated Regression Tests and Runtime
import numpy as np
# imports
import pytest  # used for our unit tests
from inference.core.workflows.core_steps.fusion.detections_consensus.v1 import \
    get_intersection_mask


class DummyDetections:
    """
    Minimal mock class for sv.Detections to allow mask input.
    """
    def __init__(self, mask):
        self.mask = mask
from inference.core.workflows.core_steps.fusion.detections_consensus.v1 import \
    get_intersection_mask

# unit tests

# ----------- BASIC TEST CASES -----------

def test_single_mask_all_true():
    # Single mask, all True
    mask = np.ones((1, 3, 3), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.17μs -> 6.92μs (32.5% faster)

def test_single_mask_all_false():
    # Single mask, all False
    mask = np.zeros((1, 3, 3), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.70μs -> 5.79μs (50.2% faster)

def test_two_masks_full_overlap():
    # Two masks, both all True
    mask = np.ones((2, 2, 2), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.54μs -> 5.91μs (44.4% faster)

def test_two_masks_partial_overlap():
    # Two masks, partial overlap
    mask1 = np.array([[True, False], [True, True]])
    mask2 = np.array([[True, True], [False, True]])
    masks = np.stack([mask1, mask2])
    detections = DummyDetections(masks)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.19μs -> 5.51μs (48.6% faster)
    # Only [0,0] and [1,1] are True in both
    expected = np.array([[True, False], [False, True]])

def test_three_masks_partial_overlap():
    # Three masks, partial overlap
    mask1 = np.array([[True, True], [False, True]])
    mask2 = np.array([[True, False], [True, True]])
    mask3 = np.array([[True, True], [True, False]])
    masks = np.stack([mask1, mask2, mask3])
    detections = DummyDetections(masks)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.20μs -> 5.66μs (44.7% faster)
    # Only [0,0] is True in all masks
    expected = np.array([[True, False], [False, False]])

# ----------- EDGE TEST CASES -----------

def test_empty_masks():
    # No masks (N=0), should raise an error or return all True (by definition of np.all)
    mask = np.empty((0, 3, 3), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.39μs -> 5.83μs (43.9% faster)

def test_masks_with_different_shapes_raises():
    # Masks with different shapes should raise an error when stacked
    mask1 = np.ones((2, 3), dtype=bool)
    mask2 = np.ones((3, 3), dtype=bool)
    with pytest.raises(ValueError):
        # np.stack will fail
        np.stack([mask1, mask2])

def test_mask_with_one_pixel():
    # Mask of shape (N, 1, 1)
    mask = np.array([[[True]], [[False]], [[True]]])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 10.3μs -> 7.33μs (41.1% faster)

def test_mask_with_one_mask_one_pixel():
    # Mask of shape (1, 1, 1)
    mask = np.array([[[True]]])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.70μs -> 6.53μs (48.5% faster)

def test_mask_with_one_mask_one_pixel_false():
    mask = np.array([[[False]]])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.09μs -> 6.19μs (46.9% faster)

def test_mask_dtype_non_bool():
    # Non-bool dtype should work as long as values are 0/1
    mask = np.array([[[1, 0], [1, 1]], [[1, 1], [0, 1]]], dtype=int)
    detections = DummyDetections(mask.astype(bool))
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.20μs -> 6.14μs (49.9% faster)
    expected = np.array([[True & True, False & True], [True & False, True & True]])
    expected = np.array([[True, False], [False, True]])

def test_mask_with_all_false_except_one():
    # All masks are False except one mask is True everywhere
    mask = np.zeros((5, 4, 4), dtype=bool)
    mask[0] = True
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.31μs -> 5.60μs (48.4% faster)

def test_mask_with_all_true_except_one_pixel():
    # All masks are True except one pixel in one mask is False
    mask = np.ones((3, 3, 3), dtype=bool)
    mask[1, 2, 1] = False
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 7.93μs -> 5.28μs (50.2% faster)
    expected = np.ones((3, 3), dtype=bool)
    expected[2, 1] = False

def test_mask_with_no_overlap():
    # All masks are mutually exclusive (no overlap)
    mask = np.zeros((3, 2, 2), dtype=bool)
    mask[0, 0, 0] = True
    mask[1, 0, 1] = True
    mask[2, 1, 0] = True
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.69μs -> 5.89μs (47.6% faster)

# ----------- LARGE SCALE TEST CASES -----------

def test_large_number_of_masks_and_pixels_all_true():
    # Large N, H, W, all True
    N, H, W = 100, 20, 20
    mask = np.ones((N, H, W), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 11.0μs -> 8.33μs (31.9% faster)

def test_large_number_of_masks_and_pixels_all_false():
    # Large N, H, W, all False
    N, H, W = 100, 20, 20
    mask = np.zeros((N, H, W), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 11.2μs -> 8.42μs (33.5% faster)

def test_large_number_of_masks_and_pixels_partial_overlap():
    # Large N, H, W, only some overlap
    N, H, W = 50, 30, 30
    mask = np.ones((N, H, W), dtype=bool)
    # Set one pixel False in one mask
    mask[10, 5, 5] = False
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 10.6μs -> 7.69μs (38.2% faster)
    # All other pixels should be True
    mask[10, 5, 5] = True  # restore

def test_large_masks_sparse_overlap():
    # Large masks, sparse overlap
    N, H, W = 10, 100, 100
    mask = np.zeros((N, H, W), dtype=bool)
    # Only one pixel is True in all masks
    for i in range(N):
        mask[i, 50, 50] = True
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 12.7μs -> 9.80μs (29.7% faster)
    expected = np.zeros((H, W), dtype=bool)
    expected[50, 50] = True

def test_large_masks_random_overlap():
    # Large masks, random overlap
    N, H, W = 20, 40, 40
    rng = np.random.default_rng(42)
    mask = rng.integers(0, 2, size=(N, H, W), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.00μs -> 5.85μs (36.8% faster)
    # For each pixel, True only if all masks are True at that pixel
    expected = np.all(mask, axis=0)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import numpy as np
# imports
import pytest  # used for our unit tests
from inference.core.workflows.core_steps.fusion.detections_consensus.v1 import \
    get_intersection_mask


# Helper class to mimic sv.Detections for testing
class DummyDetections:
    def __init__(self, mask):
        self.mask = mask

# ------------------------------
# Basic Test Cases
# ------------------------------

def test_single_mask_all_true():
    # One mask, all True
    mask = np.ones((1, 4, 4), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 10.4μs -> 7.46μs (39.2% faster)

def test_single_mask_mixed():
    # One mask, mixed True/False
    mask = np.array([[[True, False], [False, True]]])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.30μs -> 6.53μs (42.3% faster)

def test_two_masks_partial_overlap():
    # Two masks, partial overlap
    mask = np.array([
        [[True, False], [True, True]],
        [[True, True], [False, True]]
    ])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.64μs -> 6.65μs (45.0% faster)
    # Intersection: [[True & True, False & True], [True & False, True & True]]
    expected = np.array([[True, False], [False, True]])

def test_three_masks_no_overlap():
    # Three masks, no pixel is True in all
    mask = np.array([
        [[True, False], [False, False]],
        [[False, True], [False, False]],
        [[False, False], [True, False]]
    ])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.25μs -> 6.11μs (51.4% faster)

def test_three_masks_full_overlap():
    # Three masks, all True everywhere
    mask = np.ones((3, 5, 5), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 7.95μs -> 5.49μs (44.7% faster)

def test_three_masks_partial_overlap():
    # Three masks, some overlap
    mask = np.array([
        [[True, True], [False, True]],
        [[True, False], [False, True]],
        [[True, True], [True, True]]
    ])
    detections = DummyDetections(mask)
    expected = np.array([[True & True & True, True & False & True],
                         [False & False & True, True & True & True]])
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.91μs -> 5.88μs (51.5% faster)

# ------------------------------
# Edge Test Cases
# ------------------------------

def test_empty_mask_list():
    # No masks at all
    mask = np.empty((0, 3, 3), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 8.70μs -> 5.42μs (60.4% faster)

def test_empty_height_width():
    # Masks with zero height/width
    mask = np.ones((2, 0, 0), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 7.58μs -> 4.91μs (54.4% faster)

def test_mixed_true_false_masks():
    # Some masks all True, some all False
    mask = np.array([
        [[True, True], [True, True]],
        [[False, False], [False, False]]
    ])
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 9.26μs -> 6.30μs (46.9% faster)

def test_masks_with_different_shapes_raises():
    # Masks with different shapes should raise an error
    mask1 = np.ones((2, 2), dtype=bool)
    mask2 = np.ones((3, 2), dtype=bool)
    # Stack with different shapes should fail
    with pytest.raises(ValueError):
        # This should raise when creating the array, not in the function
        _ = np.array([mask1, mask2])

def test_non_boolean_masks():
    # Non-boolean masks (e.g., int)
    mask = np.array([
        [[1, 0], [0, 1]],
        [[1, 1], [0, 1]]
    ], dtype=int)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 11.5μs -> 8.56μs (34.4% faster)
    # Should treat nonzero as True
    expected = np.array([[True, False], [False, True]])

def test_mask_with_nan_values():
    # Masks with np.nan values
    mask = np.array([
        [[True, np.nan], [False, True]],
        [[True, True], [np.nan, True]]
    ], dtype=object)
    detections = DummyDetections(mask)
    # np.all propagates nan as True, so nan & True = nan (which is True in bool context)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 10.6μs -> 7.93μs (33.6% faster)

# ------------------------------
# Large Scale Test Cases
# ------------------------------

def test_large_masks_all_true():
    # Large masks, all True
    mask = np.ones((10, 100, 100), dtype=bool)
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 12.2μs -> 9.71μs (25.6% faster)

def test_large_masks_partial_overlap():
    # Large masks, some overlap
    mask = np.ones((5, 500, 500), dtype=bool)
    mask[0, 100:200, 100:200] = False  # Remove a block from first mask
    mask[1, 300:400, 300:400] = False  # Remove a block from second mask
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 54.4μs -> 50.4μs (8.03% faster)

def test_large_masks_no_overlap():
    # Large masks, no overlap
    mask = np.zeros((3, 1000, 1000), dtype=bool)
    # Make a single pixel True in each mask, but at different locations
    mask[0, 0, 0] = True
    mask[1, 1, 1] = True
    mask[2, 2, 2] = True
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 171μs -> 165μs (3.59% faster)

def test_large_masks_sparse_overlap():
    # Large masks, sparse overlap
    mask = np.zeros((10, 1000, 1000), dtype=bool)
    # Set a single pixel True in all masks at (123,456)
    for i in range(10):
        mask[i, 123, 456] = True
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 469μs -> 465μs (0.762% faster)

def test_large_masks_random():
    # Large random masks
    np.random.seed(42)
    mask = np.random.rand(5, 100, 100) > 0.5
    detections = DummyDetections(mask)
    codeflash_output = get_intersection_mask(detections); result = codeflash_output # 14.9μs -> 10.7μs (39.4% faster)
    # Intersection should be True only where all masks are True
    expected = np.ones((100, 100), dtype=bool)
    for i in range(5):
        expected &= mask[i]
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-get_intersection_mask-mhbvl2g3 and push.

Codeflash

@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 29, 2025 10:51
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 29, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants