Skip to content

Assigning an array element to another is sometimes abysmally slow #67655

Open
@za-creature

Description

@za-creature

Description
In certain, tricky to reproduce, circumstances,

data[2] = data[next]

is (at least) an order of magnitude slower than

let wtf = data[next]
data[2] = wtf

Steps to reproduce
Apologies for the long report, I did my best to replicate this using a simpler example but was unable to.

I've implemented a bounded priority queue using an interval heap (tldr: an inline min-max heap a.k.a. the unholy mixture of a min heap with a max heap) and this code is derived from the fuzzer I used to test it (please note that this is work in progress and not available for redistribution)

The actual issue is around line 77 in the following code:

class BoundedPriorityQueue<Item: Comparable> {
    // a bounded priority queue backed by an interval heap (inline min-max heap)
    // reserves two nil sentries as the parent of root to avoid negative range checks
    private var data: [Item?]
    public init(capacity: Int) {
        assert(capacity % 4 == 2, "capacity must be an odd multiple of 2")
        data = [Item?](repeating: nil, count: capacity + 2)
        limit = data.count>>1
    }

    @inline(__always) private func swap(_ i: Int, _ j: Int) {
        guard let a = data[i], let b = data[j], a > b else { return }
        data[j] = a
        data[i] = b
    }

    @inline(__always) private func compare(_ i: Int, _ j: Int) -> Bool {
        guard let a = data[i], let b = data[j] else { return false }
        return a > b
    }

    @inline(__always) private func compareAndSwap(_ i: Int, _ j: Int) -> Bool {
        guard let a = data[i], let b = data[j], a > b else { return false }
        data[j] = a
        data[i] = b
        return true
    }

    // heap up: used when inserting new elements
    private func upmin(_ p: Int) {
        var i: Int, p = p; repeat {
            i = p
            p = ((i>>2)<<1)|1
        } while compareAndSwap(p, i)
    }

    private func upmax(_ p: Int) {
        var i: Int, p = p; repeat {
            i = p
            p = (i>>2)<<1
        } while compareAndSwap(i, p)
    }

    private let limit: Int
    private func downmin() {
        // used when evicting the lowest priority message in favor of a new one
        // the queue is always full when this is called
        var l = 3, i, r: Int; repeat {
            swap(l, l-1)
            guard l < limit else { break }
            i = l
            r = (l<<1)|1
            l = r-2
            if compare(l, r) { l = r }
        } while compareAndSwap(i, l)
    }

    private func downmax() {
        // used when consuming the highest priority message
        var l = 2, i, r: Int; repeat {
            swap(l+1, l)
            guard l < limit else { break }
            i = l
            l = l<<1
            r = l+2
            if compare(r, l) { l = r }
        } while compareAndSwap(l, i)
    }

    private var next = 2 // index of next inserted element
    public func pop() -> Item? {
        guard next > 2 else { return nil }
        let max = data[2]
        next -= 1

        // MARK: START OF BUG REPORT
        //data[2] = data[next]
        let wtf = data[next]
        data[2] = wtf
        // MARK: END OF BUG REPORT

        data[next] = nil
        downmax()
        return max
    }

    public func append<T>(_ entry: Item, _ evict: ((Item) throws -> T?)? = nil) rethrows -> T? {
        // adds `entry` to the queue if possible
        // if this requires another item to be evicted, returns the result
        // (or exception) of calling `evict(item)` where `item` has the
        // lowest priority of all known items and _MAY_ be `entry` itself
        // if `evict` is not called, the return value is `nil`
        if next < data.count {
            data[next] = entry
            if next & 1 != 0 {
                // next is min, max exists at next-1
                let max = next - 1
                if compareAndSwap(next, max) {
                    upmax(max)
                } else {
                    upmin(next)
                }
            } else {
                // next is max, closest min is in the parent
                let min = ((next>>2)<<1)|1
                if compareAndSwap(min, next) {
                    upmin(min)
                } else {
                    upmax(next)
                }
            }
            next += 1
            return nil
        } else {
            // replace lowest prio; next is at least 4 and remains unchanged
            let min = data[3]!
            guard min < entry else {
                return try evict?(entry)
            }

            data[3] = entry
            downmin()
            return try evict?(min)
        }
    }
}


// MARK: test harness
import Foundation
import SwiftUI


func measure<Result>(block: () throws -> Result) rethrows -> Result {
    func now() -> UInt64 { clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW) }
    let start = now()
    defer { print("\((now() - start) / 1_000_000)ms") }
    return try block()
}


func benchmark() { measure {
    var evicted = [Int]()
    @inline(__always) func evict(_ val: Int) { evicted.append(val) }

    let samples = 100_000 // number of samples in test
    let capacity = 50_002  // size of queue (odd multiple of 2)
    let range = 10_000_000 // max start range
    var output = Array(0..<capacity)

    let queue = BoundedPriorityQueue<Int>(capacity: capacity)

    let lower = Int.random(in: 0..<range)
    var input = Array(lower..<lower + samples)

    input.shuffle()
    evicted.removeAll(keepingCapacity: true)

    //startMeasuring()
    let logStep = samples / 10
    var count = 0
    input.forEach {
        queue.append($0, evict)
        count += 1
        if count % logStep == 0 { print("step") }
    }
    for i in 0..<capacity {
        output[i] = queue.pop()!
        count += 1
        if count % logStep == 0 { print("pop step") }
    }
    //stopMeasuring()

    evicted.sort()
    //let first = lower + samples - capacity
    //XCTAssertEqual(output.reversed(), Array((first..<lower + samples)))
    //XCTAssertEqual(evicted, Array(lower..<first))
}}


@main
struct BenchmarkApp: App {
    var body: some Scene {
        WindowGroup {
            Text("Hello, world!").onAppear(perform: benchmark)
        }
    }
}

Expected behavior
These are the timings I measured on my setup (macx86 compiler, iphone 13 mini target):

For samples = 100k, capacity = 50_002 (default included in report, sorry for asking you to (un)comment some code to compare)

without local: 3.918s
with    local: 0.678s

For samples = 1M, capacity = 500_002 (heads up, this takes about 7 minutes)

without local: 372.236s
with    local:   8.457s

Heaps are nominally n log n (assuming my implementation is at least somewhat correct), so the with local timings check out.
Not sure where the slowdown comes from, but it's a pretty big one:

  • To my knowledge, there are no (non-stack) allocations involved in the provided sample.
  • I came about this while pausing the debugger at random and it always stopped at the same line.

Environment

  • 1.75.2 Apple Swift version 5.8.1 (swiftlang-5.8.0.124.5 clang-1403.0.22.11.100)
  • Xcode 14.3.1 (14E300c)
  • Deployment target: 14.7, 15.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    ArrayArea → standard library: The `Array` typebugA deviation from expected or documented behavior. Also: expected but undesirable behavior.performancerun-time performancestandard libraryArea: Standard library umbrellaswift 5.8

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions