-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from cketti/charsequence_support
Change `StringExtensions` to `CharSequenceExtensions`
- Loading branch information
Showing
9 changed files
with
216 additions
and
240 deletions.
There are no files selected for viewing
53 changes: 53 additions & 0 deletions
53
kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
@file:Suppress( | ||
"INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers | ||
"INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers | ||
) | ||
package de.cketti.codepoints.deluxe | ||
|
||
import de.cketti.codepoints.codePointAt as intCodePointAt | ||
import de.cketti.codepoints.codePointBefore as intCodePointBefore | ||
|
||
/** | ||
* Returns the Unicode code point at the specified index. | ||
* | ||
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character | ||
* sequence. | ||
* | ||
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException]. | ||
* | ||
* See [codePointAt][intCodePointAt]. | ||
* ``` | ||
*/ | ||
@kotlin.internal.HidesMembers | ||
fun CharSequence.codePointAt(index: Int): CodePoint { | ||
return intCodePointAt(index).toCodePoint() | ||
} | ||
|
||
/** | ||
* Returns the Unicode code point before the specified index. | ||
* | ||
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character | ||
* sequence. | ||
* | ||
* If the value `index - 1` is out of bounds of this character sequence, this method throws an | ||
* [IndexOutOfBoundsException]. | ||
* | ||
* See [codePointBefore][intCodePointBefore]. | ||
*/ | ||
fun CharSequence.codePointBefore(index: Int): CodePoint { | ||
return intCodePointBefore(index).toCodePoint() | ||
} | ||
|
||
/** | ||
* Sequence of [CodePoint]s in this character sequence. | ||
*/ | ||
fun CharSequence.codePointSequence(): CodePointSequence { | ||
return CodePointSequence(this) | ||
} | ||
|
||
/** | ||
* Iterator for [CodePoint]s in this character sequence. | ||
*/ | ||
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator { | ||
return CodePointIterator(this, startIndex, endIndex) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 0 additions & 50 deletions
50
kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 0 additions & 90 deletions
90
kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt
This file was deleted.
Oops, something went wrong.
155 changes: 155 additions & 0 deletions
155
kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
package de.cketti.codepoints | ||
|
||
/** | ||
* Returns the Unicode code point at the specified index. | ||
* | ||
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character | ||
* sequence. | ||
* | ||
* If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented | ||
* using a single `Char` and this method will behave exactly like [CharSequence.get]. | ||
* Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate | ||
* range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single | ||
* code point in one of the supplementary planes. This method will do the necessary decoding and return the value of | ||
* that single code point. | ||
* | ||
* In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`, | ||
* this method will return the surrogate code point itself, behaving like [CharSequence.get]. | ||
* | ||
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException]. | ||
* | ||
* To iterate over all code points in a character sequence the index has to be adjusted depending on the value of the | ||
* returned code point. Use [CodePoints.charCount] for this. | ||
* | ||
* ```kotlin | ||
* // Text containing code points outside the BMP (encoded as a surrogate pairs) | ||
* val text = "\uD83E\uDD95\uD83E\uDD96" | ||
* | ||
* var index = 0 | ||
* while (index < text.length) { | ||
* val codePoint = text.codePointAt(index) | ||
* // Do something with codePoint | ||
* | ||
* index += CodePoints.charCount(codePoint) | ||
* } | ||
* ``` | ||
*/ | ||
fun CharSequence.codePointAt(index: Int): Int { | ||
if (index !in indices) throw IndexOutOfBoundsException() | ||
|
||
val firstChar = this[index] | ||
if (firstChar.isHighSurrogate() && index + 1 < length) { | ||
val nextChar = this[index + 1] | ||
if (nextChar.isLowSurrogate()) { | ||
return CodePoints.toCodePoint(firstChar, nextChar) | ||
} | ||
} | ||
|
||
return firstChar.code | ||
} | ||
|
||
/** | ||
* Returns the Unicode code point before the specified index. | ||
* | ||
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character | ||
* sequence. | ||
* | ||
* If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high | ||
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is | ||
* returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`. | ||
* | ||
* If the value `index - 1` is out of bounds of this character sequence, this method throws an | ||
* [IndexOutOfBoundsException]. | ||
*/ | ||
fun CharSequence.codePointBefore(index: Int): Int { | ||
val startIndex = index - 1 | ||
if (startIndex !in indices) throw IndexOutOfBoundsException() | ||
|
||
val firstChar = this[startIndex] | ||
if (firstChar.isLowSurrogate() && startIndex - 1 >= 0) { | ||
val previousChar = this[startIndex - 1] | ||
if (previousChar.isHighSurrogate()) { | ||
return CodePoints.toCodePoint(previousChar, firstChar) | ||
} | ||
} | ||
|
||
return firstChar.code | ||
} | ||
|
||
/** | ||
* Returns the number of Unicode code points in the specified text range of this `CharSequence`. | ||
* | ||
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the | ||
* length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as | ||
* one code point each. | ||
* | ||
* If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than | ||
* `endIndex`, this method throws an [IndexOutOfBoundsException]. | ||
*/ | ||
fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int { | ||
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException() | ||
|
||
var index = beginIndex | ||
var count = 0 | ||
do { | ||
val firstChar = this[index] | ||
index++ | ||
if (firstChar.isHighSurrogate() && index < endIndex) { | ||
val nextChar = this[index] | ||
if (nextChar.isLowSurrogate()) { | ||
index++ | ||
} | ||
} | ||
|
||
count++ | ||
} while (index < endIndex) | ||
|
||
return count | ||
} | ||
|
||
/** | ||
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points. | ||
* | ||
* Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each. | ||
* | ||
* If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and | ||
* the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is | ||
* negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this | ||
* method throws an [IndexOutOfBoundsException]. | ||
*/ | ||
fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int { | ||
if (index !in 0..length) throw IndexOutOfBoundsException() | ||
if (codePointOffset == 0) return index | ||
|
||
if (codePointOffset > 0) { | ||
var currentIndex = index | ||
repeat(codePointOffset) { | ||
if (currentIndex > lastIndex) throw IndexOutOfBoundsException() | ||
val firstChar = this[currentIndex] | ||
currentIndex++ | ||
if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) { | ||
val nextChar = this[currentIndex] | ||
if (nextChar.isLowSurrogate()) { | ||
currentIndex++ | ||
} | ||
} | ||
} | ||
|
||
return currentIndex | ||
} else { | ||
var currentIndex = index - 1 | ||
repeat(-codePointOffset) { | ||
if (currentIndex < 0) throw IndexOutOfBoundsException() | ||
val firstChar = this[currentIndex] | ||
currentIndex-- | ||
if (firstChar.isLowSurrogate() && currentIndex >= 0) { | ||
val previousChar = this[currentIndex] | ||
if (previousChar.isHighSurrogate()) { | ||
currentIndex-- | ||
} | ||
} | ||
} | ||
|
||
return currentIndex + 1 | ||
} | ||
} |
Oops, something went wrong.