-
Notifications
You must be signed in to change notification settings - Fork 10.5k
[String] Last-minute ABI adjustment: 4-bit discriminator #21310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,18 +72,9 @@ extension _SmallString { | |
} | ||
} | ||
|
||
@inlinable | ||
internal var discriminator: _StringObject.Discriminator { | ||
@inline(__always) get { | ||
let value = _storage.1 &>> _StringObject.Nibbles.discriminatorShift | ||
return _StringObject.Discriminator(UInt8(truncatingIfNeeded: value)) | ||
} | ||
@inline(__always) set { | ||
_storage.1 &= _StringObject.Nibbles.largeAddressMask | ||
_storage.1 |= ( | ||
UInt64(truncatingIfNeeded: newValue._value) | ||
&<< _StringObject.Nibbles.discriminatorShift) | ||
} | ||
@inlinable @inline(__always) | ||
internal var rawDiscriminatedObject: UInt64 { | ||
return _storage.1 | ||
} | ||
|
||
@inlinable | ||
|
@@ -96,7 +87,7 @@ extension _SmallString { | |
@inlinable | ||
internal var count: Int { | ||
@inline(__always) get { | ||
return discriminator.smallCount | ||
return _StringObject.getSmallCount(fromRaw: rawDiscriminatedObject) | ||
} | ||
} | ||
|
||
|
@@ -108,27 +99,22 @@ extension _SmallString { | |
@inlinable | ||
internal var isASCII: Bool { | ||
@inline(__always) get { | ||
return discriminator.smallIsASCII | ||
return _StringObject.getSmallIsASCII(fromRaw: rawDiscriminatedObject) | ||
} | ||
} | ||
|
||
// Give raw, nul-terminated code units. This is only for limited internal | ||
// usage: it always clears the discriminator and count (in case it's full) | ||
@inlinable | ||
@inlinable @inline(__always) | ||
internal var zeroTerminatedRawCodeUnits: RawBitPattern { | ||
@inline(__always) get { | ||
return ( | ||
self._storage.0, | ||
self._storage.1 & _StringObject.Nibbles.largeAddressMask) | ||
} | ||
let smallStringCodeUnitMask: UInt64 = 0x00FF_FFFF_FFFF_FFFF | ||
return (self._storage.0, self._storage.1 & smallStringCodeUnitMask) | ||
} | ||
|
||
@inlinable | ||
internal func computeIsASCII() -> Bool { | ||
// TODO(String micro-performance): Evaluate other expressions, e.g. | first | ||
let asciiMask: UInt64 = 0x8080_8080_8080_8080 | ||
let raw = zeroTerminatedRawCodeUnits | ||
return (raw.0 & asciiMask == 0) && (raw.1 & asciiMask == 0) | ||
return (raw.0 | raw.1) & asciiMask == 0 | ||
} | ||
} | ||
|
||
|
@@ -220,7 +206,7 @@ extension _SmallString { | |
|
||
// Overwrite stored code units, including uninitialized. `f` should return the | ||
// new count. | ||
@inlinable @inline(__always) | ||
@inline(__always) | ||
internal mutating func withMutableCapacity( | ||
_ f: (UnsafeMutableBufferPointer<UInt8>) throws -> Int | ||
) rethrows { | ||
|
@@ -231,14 +217,28 @@ extension _SmallString { | |
return try f(UnsafeMutableBufferPointer( | ||
start: ptr, count: _SmallString.capacity)) | ||
} | ||
|
||
_internalInvariant(len <= _SmallString.capacity) | ||
discriminator = .small(withCount: len, isASCII: self.computeIsASCII()) | ||
|
||
let (leading, trailing) = self.zeroTerminatedRawCodeUnits | ||
self = _SmallString(leading: leading, trailing: trailing, count: len) | ||
} | ||
} | ||
|
||
// Creation | ||
extension _SmallString { | ||
@inlinable @inline(__always) | ||
internal init(leading: UInt64, trailing: UInt64, count: Int) { | ||
_internalInvariant(count <= _SmallString.capacity) | ||
|
||
let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooh, nice. (I know it's not new; I just hadn't seen it before.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, given how fast this is, I wonder if the dedicated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not that fast; we have to mask off the discriminator too. While ARM can encode the constant as an immediate, but it's still more code. Specifically for small strings, ASCII-ness is by far the most common and important characteristic to know. The other contender for this bit would be remembering both isASCII and doesn't have CR+LF, i.e. single-scalar-grapheme. But, I think it's better to check that on Character access. public func _forCodeGen_1() { if isASCII { foo() } else { bar() } }
public func _forCodeGen_2() { if computeIsASCII() { foo() } else { bar() } }
Considering that this check should probably be at the top of every String API inlined into user code, it's probably worth it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory that could be
but I take your point. |
||
let countAndDiscriminator = UInt64(truncatingIfNeeded: count) &<< 56 | ||
| _StringObject.Nibbles.small(isASCII: isASCII) | ||
_internalInvariant(trailing & countAndDiscriminator == 0) | ||
|
||
self.init(raw: (leading, trailing | countAndDiscriminator)) | ||
_internalInvariant(self.count == count) | ||
} | ||
|
||
// Direct from UTF-8 | ||
@inlinable @inline(__always) | ||
internal init?(_ input: UnsafeBufferPointer<UInt8>) { | ||
|
@@ -251,11 +251,7 @@ extension _SmallString { | |
let leading = _bytesToUInt64(ptr, Swift.min(input.count, 8)) | ||
let trailing = count > 8 ? _bytesToUInt64(ptr + 8, count &- 8) : 0 | ||
|
||
let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0 | ||
let discriminator = _StringObject.Discriminator.small( | ||
withCount: count, | ||
isASCII: isASCII) | ||
self.init(raw: (leading, trailing | discriminator.rawBits)) | ||
self.init(leading: leading, trailing: trailing, count: count) | ||
} | ||
|
||
@usableFromInline // @testable | ||
|
@@ -273,13 +269,8 @@ extension _SmallString { | |
} | ||
_internalInvariant(writeIdx == totalCount) | ||
|
||
let isASCII = base.isASCII && other.isASCII | ||
let discriminator = _StringObject.Discriminator.small( | ||
withCount: totalCount, | ||
isASCII: isASCII) | ||
|
||
let (leading, trailing) = result.zeroTerminatedRawCodeUnits | ||
self.init(raw: (leading, trailing | discriminator.rawBits)) | ||
self.init(leading: leading, trailing: trailing, count: totalCount) | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -281,9 +281,13 @@ extension String { | |
} | ||
} | ||
if _guts._object.isImmortal { | ||
// TODO: We'd rather emit a valid ObjC object statically than create a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a bug tracking this? I should either CC on it or steal it if so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we do, then I assigned it to you ;-) |
||
// shared string class instance. | ||
let gutsCountAndFlags = _guts._object._countAndFlags | ||
return _SharedStringStorage( | ||
immortal: _guts._object.fastUTF8.baseAddress!, | ||
countAndFlags: _guts._object._countAndFlags) | ||
countAndFlags: _StringObject.CountAndFlags( | ||
sharedCount: _guts.count, isASCII: gutsCountAndFlags.isASCII)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the reasoning for making a new CountAndFlags here rather than using gutsCountAndFlags? Just Shared Strings needing some different flags? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shared strings do not have tail-allocated contents, which is one of the new perf flags, so we need to strip it. I very slightly prefer using initializers than mutation, because it makes coding style more uniform for things like |
||
} | ||
|
||
_internalInvariant(_guts._object.hasObjCBridgeableObject, | ||
|
Uh oh!
There was an error while loading. Please reload this page.