Skip to content

[String] ASCII fast-path for UTF16View #20848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 30, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 33 additions & 20 deletions stdlib/public/core/StringUTF16View.swift
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ extension String.UTF16View {
#else
@usableFromInline @inline(never) @_effects(releasenone)
internal func _invariantCheck() {
// TODO: Ensure start/end are not sub-scalr UTF-8 transcoded indices
_internalInvariant(
startIndex.transcodedOffset == 0 && endIndex.transcodedOffset == 0)
}
#endif // INTERNAL_CHECKS_ENABLED
}
Expand All @@ -143,9 +144,8 @@ extension String.UTF16View: BidirectionalCollection {

@inlinable @inline(__always)
public func index(after i: Index) -> Index {
// TODO(String performance) known-ASCII fast path

if _slowPath(_guts.isForeign) { return _foreignIndex(after: i) }
if _guts.isASCII { return i.nextEncoded }

// For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
// scalar, use a transcoded offset first.
Expand All @@ -159,9 +159,8 @@ extension String.UTF16View: BidirectionalCollection {
@inlinable @inline(__always)
public func index(before i: Index) -> Index {
precondition(!i.isZeroPosition)
// TODO(String performance) known-ASCII fast path

if _slowPath(_guts.isForeign) { return _foreignIndex(before: i) }
if _guts.isASCII { return i.priorEncoded }

if i.transcodedOffset != 0 {
_internalInvariant(i.transcodedOffset == 1)
Expand All @@ -181,8 +180,6 @@ extension String.UTF16View: BidirectionalCollection {
}

public func index(_ i: Index, offsetBy n: Int) -> Index {
// TODO(String performance) known-ASCII fast path

if _slowPath(_guts.isForeign) {
return _foreignIndex(i, offsetBy: n)
}
Expand All @@ -195,7 +192,6 @@ extension String.UTF16View: BidirectionalCollection {
public func index(
_ i: Index, offsetBy n: Int, limitedBy limit: Index
) -> Index? {
// TODO(String performance) known-ASCII fast path
if _slowPath(_guts.isForeign) {
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
}
Expand All @@ -217,7 +213,6 @@ extension String.UTF16View: BidirectionalCollection {
}

public func distance(from start: Index, to end: Index) -> Int {
// TODO(String performance) known-ASCII fast path
if _slowPath(_guts.isForeign) {
return _foreignDistance(from: start, to: end)
}
Expand Down Expand Up @@ -250,7 +245,6 @@ extension String.UTF16View: BidirectionalCollection {
@inlinable
public subscript(i: Index) -> UTF16.CodeUnit {
@inline(__always) get {
// TODO(String performance) known-ASCII fast path
String(_guts)._boundsCheck(i)

if _fastPath(_guts.isFastUTF8) {
Expand All @@ -267,16 +261,16 @@ extension String.UTF16View: BidirectionalCollection {
}
}
extension String.UTF16View: CustomStringConvertible {
@inlinable
public var description: String {
@inline(__always) get { return String(_guts) }
}
@inlinable
public var description: String {
@inline(__always) get { return String(_guts) }
}
}

extension String.UTF16View: CustomDebugStringConvertible {
public var debugDescription: String {
return "StringUTF16(\(self.description.debugDescription))"
}
public var debugDescription: String {
return "StringUTF16(\(self.description.debugDescription))"
}
}

extension String {
Expand Down Expand Up @@ -462,8 +456,11 @@ extension String.UTF16View {
// Trivial and common: start
if idx == startIndex { return 0 }

if _guts.isASCII { return idx.encodedOffset }

if _guts.isASCII {
_internalInvariant(idx.transcodedOffset == 0)
return idx.encodedOffset
}

if idx.encodedOffset < _shortHeuristic || !_guts.hasBreadcrumbs {
return _distance(from: startIndex, to: idx)
}
Expand All @@ -483,7 +480,9 @@ extension String.UTF16View {
internal func _nativeGetIndex(for offset: Int) -> Index {
// Trivial and common: start
if offset == 0 { return startIndex }


if _guts.isASCII { return Index(encodedOffset: offset) }

if offset < _shortHeuristic || !_guts.hasBreadcrumbs {
return _index(startIndex, offsetBy: offset)
}
Expand Down Expand Up @@ -542,12 +541,26 @@ extension String {

if _slowPath(range.isEmpty) { return }

let isASCII = _guts.isASCII
return _guts.withFastUTF8 { utf8 in
var writeIdx = 0
let writeEnd = buffer.count
var readIdx = range.lowerBound.encodedOffset
let readEnd = range.upperBound.encodedOffset

if isASCII {
_internalInvariant(range.lowerBound.transcodedOffset == 0)
_internalInvariant(range.upperBound.transcodedOffset == 0)
while readIdx < readEnd {
_internalInvariant(utf8[readIdx] < 0x80)
buffer[_unchecked: writeIdx] = UInt16(
truncatingIfNeeded: utf8[_unchecked: readIdx])
readIdx &+= 1
writeIdx &+= 1
}
return
}

// Handle mid-transcoded-scalar initial index
if _slowPath(range.lowerBound.transcodedOffset != 0) {
_internalInvariant(range.lowerBound.transcodedOffset == 1)
Expand Down