Description
Description
In certain, tricky to reproduce, circumstances,
data[2] = data[next]
is (at least) an order of magnitude slower than
let wtf = data[next]
data[2] = wtf
Steps to reproduce
Apologies for the long report, I did my best to replicate this using a simpler example but was unable to.
I've implemented a bounded priority queue using an interval heap (tldr: an inline min-max heap a.k.a. the unholy mixture of a min heap with a max heap) and this code is derived from the fuzzer I used to test it (please note that this is work in progress and not available for redistribution)
The actual issue is around line 77 in the following code:
class BoundedPriorityQueue<Item: Comparable> {
// a bounded priority queue backed by an interval heap (inline min-max heap)
// reserves two nil sentries as the parent of root to avoid negative range checks
private var data: [Item?]
public init(capacity: Int) {
assert(capacity % 4 == 2, "capacity must be an odd multiple of 2")
data = [Item?](repeating: nil, count: capacity + 2)
limit = data.count>>1
}
@inline(__always) private func swap(_ i: Int, _ j: Int) {
guard let a = data[i], let b = data[j], a > b else { return }
data[j] = a
data[i] = b
}
@inline(__always) private func compare(_ i: Int, _ j: Int) -> Bool {
guard let a = data[i], let b = data[j] else { return false }
return a > b
}
@inline(__always) private func compareAndSwap(_ i: Int, _ j: Int) -> Bool {
guard let a = data[i], let b = data[j], a > b else { return false }
data[j] = a
data[i] = b
return true
}
// heap up: used when inserting new elements
private func upmin(_ p: Int) {
var i: Int, p = p; repeat {
i = p
p = ((i>>2)<<1)|1
} while compareAndSwap(p, i)
}
private func upmax(_ p: Int) {
var i: Int, p = p; repeat {
i = p
p = (i>>2)<<1
} while compareAndSwap(i, p)
}
private let limit: Int
private func downmin() {
// used when evicting the lowest priority message in favor of a new one
// the queue is always full when this is called
var l = 3, i, r: Int; repeat {
swap(l, l-1)
guard l < limit else { break }
i = l
r = (l<<1)|1
l = r-2
if compare(l, r) { l = r }
} while compareAndSwap(i, l)
}
private func downmax() {
// used when consuming the highest priority message
var l = 2, i, r: Int; repeat {
swap(l+1, l)
guard l < limit else { break }
i = l
l = l<<1
r = l+2
if compare(r, l) { l = r }
} while compareAndSwap(l, i)
}
private var next = 2 // index of next inserted element
public func pop() -> Item? {
guard next > 2 else { return nil }
let max = data[2]
next -= 1
// MARK: START OF BUG REPORT
//data[2] = data[next]
let wtf = data[next]
data[2] = wtf
// MARK: END OF BUG REPORT
data[next] = nil
downmax()
return max
}
public func append<T>(_ entry: Item, _ evict: ((Item) throws -> T?)? = nil) rethrows -> T? {
// adds `entry` to the queue if possible
// if this requires another item to be evicted, returns the result
// (or exception) of calling `evict(item)` where `item` has the
// lowest priority of all known items and _MAY_ be `entry` itself
// if `evict` is not called, the return value is `nil`
if next < data.count {
data[next] = entry
if next & 1 != 0 {
// next is min, max exists at next-1
let max = next - 1
if compareAndSwap(next, max) {
upmax(max)
} else {
upmin(next)
}
} else {
// next is max, closest min is in the parent
let min = ((next>>2)<<1)|1
if compareAndSwap(min, next) {
upmin(min)
} else {
upmax(next)
}
}
next += 1
return nil
} else {
// replace lowest prio; next is at least 4 and remains unchanged
let min = data[3]!
guard min < entry else {
return try evict?(entry)
}
data[3] = entry
downmin()
return try evict?(min)
}
}
}
// MARK: test harness
import Foundation
import SwiftUI
func measure<Result>(block: () throws -> Result) rethrows -> Result {
func now() -> UInt64 { clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW) }
let start = now()
defer { print("\((now() - start) / 1_000_000)ms") }
return try block()
}
func benchmark() { measure {
var evicted = [Int]()
@inline(__always) func evict(_ val: Int) { evicted.append(val) }
let samples = 100_000 // number of samples in test
let capacity = 50_002 // size of queue (odd multiple of 2)
let range = 10_000_000 // max start range
var output = Array(0..<capacity)
let queue = BoundedPriorityQueue<Int>(capacity: capacity)
let lower = Int.random(in: 0..<range)
var input = Array(lower..<lower + samples)
input.shuffle()
evicted.removeAll(keepingCapacity: true)
//startMeasuring()
let logStep = samples / 10
var count = 0
input.forEach {
queue.append($0, evict)
count += 1
if count % logStep == 0 { print("step") }
}
for i in 0..<capacity {
output[i] = queue.pop()!
count += 1
if count % logStep == 0 { print("pop step") }
}
//stopMeasuring()
evicted.sort()
//let first = lower + samples - capacity
//XCTAssertEqual(output.reversed(), Array((first..<lower + samples)))
//XCTAssertEqual(evicted, Array(lower..<first))
}}
@main
struct BenchmarkApp: App {
var body: some Scene {
WindowGroup {
Text("Hello, world!").onAppear(perform: benchmark)
}
}
}
Expected behavior
These are the timings I measured on my setup (macx86 compiler, iphone 13 mini target):
For samples = 100k, capacity = 50_002 (default included in report, sorry for asking you to (un)comment some code to compare)
without local: 3.918s
with local: 0.678s
For samples = 1M, capacity = 500_002 (heads up, this takes about 7 minutes)
without local: 372.236s
with local: 8.457s
Heaps are nominally n log n (assuming my implementation is at least somewhat correct), so the with local
timings check out.
Not sure where the slowdown comes from, but it's a pretty big one:
- To my knowledge, there are no (non-stack) allocations involved in the provided sample.
- I came about this while pausing the debugger at random and it always stopped at the same line.
Environment
- 1.75.2 Apple Swift version 5.8.1 (swiftlang-5.8.0.124.5 clang-1403.0.22.11.100)
- Xcode 14.3.1 (14E300c)
- Deployment target: 14.7, 15.0