diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt new file mode 100644 index 0000000..c680122 --- /dev/null +++ b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt @@ -0,0 +1,53 @@ +@file:Suppress( + "INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers + "INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers +) +package de.cketti.codepoints.deluxe + +import de.cketti.codepoints.codePointAt as intCodePointAt +import de.cketti.codepoints.codePointBefore as intCodePointBefore + +/** + * Returns the Unicode code point at the specified index. + * + * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character + * sequence. + * + * If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException]. + * + * See [codePointAt][intCodePointAt]. + * ``` + */ +@kotlin.internal.HidesMembers +fun CharSequence.codePointAt(index: Int): CodePoint { + return intCodePointAt(index).toCodePoint() +} + +/** + * Returns the Unicode code point before the specified index. + * + * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character + * sequence. + * + * If the value `index - 1` is out of bounds of this character sequence, this method throws an + * [IndexOutOfBoundsException]. + * + * See [codePointBefore][intCodePointBefore]. + */ +fun CharSequence.codePointBefore(index: Int): CodePoint { + return intCodePointBefore(index).toCodePoint() +} + +/** + * Sequence of [CodePoint]s in this character sequence. + */ +fun CharSequence.codePointSequence(): CodePointSequence { + return CodePointSequence(this) +} + +/** + * Iterator for [CodePoint]s in this character sequence. + */ +fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator { + return CodePointIterator(this, startIndex, endIndex) +} diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt index 15d1fd7..dc51181 100644 --- a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt +++ b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt @@ -3,23 +3,23 @@ package de.cketti.codepoints.deluxe import kotlin.jvm.JvmInline /** - * Sequence of [CodePoint]s in the given [String]. + * Sequence of [CodePoint]s in the given [CharSequence]. */ @JvmInline -value class CodePointSequence(private val text: String) : Sequence { +value class CodePointSequence(private val text: CharSequence) : Sequence { override fun iterator(): CodePointIterator { return text.codePointIterator() } } /** - * Iterator for [CodePoint]s in the given [String]. + * Iterator for [CodePoint]s in the given [CharSequence]. * - * The `startIndex` and `endIndex` parameters are the regular `String` indices, i.e. the number of `Char`s from the - * start of the string. + * The `startIndex` and `endIndex` parameters are the regular `CharSequence` indices, i.e. the number of `Char`s from + * the start of the character sequence. */ class CodePointIterator( - private val text: String, + private val text: CharSequence, startIndex: Int, private val endIndex: Int ) : Iterator { diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt deleted file mode 100644 index 7e8ef65..0000000 --- a/kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt +++ /dev/null @@ -1,50 +0,0 @@ -@file:Suppress( - "INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers - "INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers -) -package de.cketti.codepoints.deluxe - -import de.cketti.codepoints.codePointAt as intCodePointAt -import de.cketti.codepoints.codePointBefore as intCodePointBefore - -/** - * Returns the Unicode code point at the specified index. - * - * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. - * - * If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. - * - * See [codePointAt][intCodePointAt]. - * ``` - */ -@kotlin.internal.HidesMembers -fun String.codePointAt(index: Int): CodePoint { - return intCodePointAt(index).toCodePoint() -} - -/** - * Returns the Unicode code point before the specified index. - * - * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. - * - * If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. - * - * See [codePointBefore][intCodePointBefore]. - */ -fun String.codePointBefore(index: Int): CodePoint { - return intCodePointBefore(index).toCodePoint() -} - -/** - * Sequence of [CodePoint]s in this string. - */ -fun String.codePointSequence(): CodePointSequence { - return CodePointSequence(this) -} - -/** - * Iterator for [CodePoint]s in this string. - */ -fun String.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator { - return CodePointIterator(this, startIndex, endIndex) -} diff --git a/kotlin-codepoints-deluxe/src/commonTest/kotlin/StringExtensionsTest.kt b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt similarity index 98% rename from kotlin-codepoints-deluxe/src/commonTest/kotlin/StringExtensionsTest.kt rename to kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt index 7ce2f9d..cae26e0 100644 --- a/kotlin-codepoints-deluxe/src/commonTest/kotlin/StringExtensionsTest.kt +++ b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt @@ -3,7 +3,7 @@ package de.cketti.codepoints.deluxe import kotlin.test.assertEquals import kotlin.test.Test -class StringExtensionsTest { +class CharSequenceExtensionsTest { @Test fun codePointAt() { assertEquals('a'.toCodePoint(), "a".codePointAt(0)) diff --git a/kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt b/kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt deleted file mode 100644 index 08c55f9..0000000 --- a/kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt +++ /dev/null @@ -1,90 +0,0 @@ -package de.cketti.codepoints - -import de.cketti.codepoints.CodePoints.toCodePoint - -actual fun String.codePointAt(index: Int): Int { - if (index !in indices) throw IndexOutOfBoundsException() - - val firstChar = this[index] - if (firstChar.isHighSurrogate() && index + 1 < length) { - val nextChar = this[index + 1] - if (nextChar.isLowSurrogate()) { - return toCodePoint(firstChar, nextChar) - } - } - - return firstChar.code -} - -actual fun String.codePointBefore(index: Int): Int { - val startIndex = index - 1 - if (startIndex !in indices) throw IndexOutOfBoundsException() - - val firstChar = this[startIndex] - if (firstChar.isLowSurrogate() && startIndex - 1 >= 0) { - val previousChar = this[startIndex - 1] - if (previousChar.isHighSurrogate()) { - return toCodePoint(previousChar, firstChar) - } - } - - return firstChar.code -} - -actual fun String.codePointCount(beginIndex: Int, endIndex: Int): Int { - if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException() - - var index = beginIndex - var count = 0 - do { - val firstChar = this[index] - index++ - if (firstChar.isHighSurrogate() && index < endIndex) { - val nextChar = this[index] - if (nextChar.isLowSurrogate()) { - index++ - } - } - - count++ - } while (index < endIndex) - - return count -} - -actual fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int { - if (index !in 0..length) throw IndexOutOfBoundsException() - if (codePointOffset == 0) return index - - if (codePointOffset > 0) { - var currentIndex = index - repeat(codePointOffset) { - if (currentIndex > lastIndex) throw IndexOutOfBoundsException() - val firstChar = this[currentIndex] - currentIndex++ - if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) { - val nextChar = this[currentIndex] - if (nextChar.isLowSurrogate()) { - currentIndex++ - } - } - } - - return currentIndex - } else { - var currentIndex = index - 1 - repeat(-codePointOffset) { - if (currentIndex < 0) throw IndexOutOfBoundsException() - val firstChar = this[currentIndex] - currentIndex-- - if (firstChar.isLowSurrogate() && currentIndex >= 0) { - val previousChar = this[currentIndex] - if (previousChar.isHighSurrogate()) { - currentIndex-- - } - } - } - - return currentIndex + 1 - } -} diff --git a/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt new file mode 100644 index 0000000..0239b85 --- /dev/null +++ b/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt @@ -0,0 +1,155 @@ +package de.cketti.codepoints + +/** + * Returns the Unicode code point at the specified index. + * + * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character + * sequence. + * + * If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented + * using a single `Char` and this method will behave exactly like [CharSequence.get]. + * Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate + * range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single + * code point in one of the supplementary planes. This method will do the necessary decoding and return the value of + * that single code point. + * + * In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`, + * this method will return the surrogate code point itself, behaving like [CharSequence.get]. + * + * If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException]. + * + * To iterate over all code points in a character sequence the index has to be adjusted depending on the value of the + * returned code point. Use [CodePoints.charCount] for this. + * + * ```kotlin + * // Text containing code points outside the BMP (encoded as a surrogate pairs) + * val text = "\uD83E\uDD95\uD83E\uDD96" + * + * var index = 0 + * while (index < text.length) { + * val codePoint = text.codePointAt(index) + * // Do something with codePoint + * + * index += CodePoints.charCount(codePoint) + * } + * ``` + */ +fun CharSequence.codePointAt(index: Int): Int { + if (index !in indices) throw IndexOutOfBoundsException() + + val firstChar = this[index] + if (firstChar.isHighSurrogate() && index + 1 < length) { + val nextChar = this[index + 1] + if (nextChar.isLowSurrogate()) { + return CodePoints.toCodePoint(firstChar, nextChar) + } + } + + return firstChar.code +} + +/** + * Returns the Unicode code point before the specified index. + * + * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character + * sequence. + * + * If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high + * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is + * returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`. + * + * If the value `index - 1` is out of bounds of this character sequence, this method throws an + * [IndexOutOfBoundsException]. + */ +fun CharSequence.codePointBefore(index: Int): Int { + val startIndex = index - 1 + if (startIndex !in indices) throw IndexOutOfBoundsException() + + val firstChar = this[startIndex] + if (firstChar.isLowSurrogate() && startIndex - 1 >= 0) { + val previousChar = this[startIndex - 1] + if (previousChar.isHighSurrogate()) { + return CodePoints.toCodePoint(previousChar, firstChar) + } + } + + return firstChar.code +} + +/** + * Returns the number of Unicode code points in the specified text range of this `CharSequence`. + * + * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the + * length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as + * one code point each. + * + * If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than + * `endIndex`, this method throws an [IndexOutOfBoundsException]. + */ +fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int { + if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException() + + var index = beginIndex + var count = 0 + do { + val firstChar = this[index] + index++ + if (firstChar.isHighSurrogate() && index < endIndex) { + val nextChar = this[index] + if (nextChar.isLowSurrogate()) { + index++ + } + } + + count++ + } while (index < endIndex) + + return count +} + +/** + * Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points. + * + * Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each. + * + * If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and + * the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is + * negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this + * method throws an [IndexOutOfBoundsException]. + */ +fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int { + if (index !in 0..length) throw IndexOutOfBoundsException() + if (codePointOffset == 0) return index + + if (codePointOffset > 0) { + var currentIndex = index + repeat(codePointOffset) { + if (currentIndex > lastIndex) throw IndexOutOfBoundsException() + val firstChar = this[currentIndex] + currentIndex++ + if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) { + val nextChar = this[currentIndex] + if (nextChar.isLowSurrogate()) { + currentIndex++ + } + } + } + + return currentIndex + } else { + var currentIndex = index - 1 + repeat(-codePointOffset) { + if (currentIndex < 0) throw IndexOutOfBoundsException() + val firstChar = this[currentIndex] + currentIndex-- + if (firstChar.isLowSurrogate() && currentIndex >= 0) { + val previousChar = this[currentIndex] + if (previousChar.isHighSurrogate()) { + currentIndex-- + } + } + } + + return currentIndex + 1 + } +} diff --git a/kotlin-codepoints/src/commonMain/kotlin/StringExtensions.kt b/kotlin-codepoints/src/commonMain/kotlin/StringExtensions.kt deleted file mode 100644 index b92d50c..0000000 --- a/kotlin-codepoints/src/commonMain/kotlin/StringExtensions.kt +++ /dev/null @@ -1,73 +0,0 @@ -package de.cketti.codepoints - -/** - * Returns the Unicode code point at the specified index. - * - * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. - * - * If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented - * using a single `Char` and this method will behave exactly like [String.get]. - * Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate - * range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single - * code point in one of the supplementary planes. This method will do the necessary decoding and return the value of - * that single code point. - * - * In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`, - * this method will return the surrogate code point itself, behaving like [String.get]. - * - * If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. - * - * To iterate over all code points in a string the index has to be adjusted depending on the value of the returned code - * point. Use [CodePoints.charCount] for this. - * - * ```kotlin - * // Text containing code points outside the BMP (encoded as a surrogate pairs) - * val text = "\uD83E\uDD95\uD83E\uDD96" - * - * var index = 0 - * while (index < text.length) { - * val codePoint = text.codePointAt(index) - * // Do something with codePoint - * - * index += CodePoints.charCount(codePoint) - * } - * ``` - */ -expect fun String.codePointAt(index: Int): Int - -/** - * Returns the Unicode code point before the specified index. - * - * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string. - * - * If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high - * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is - * returned. In all other cases this method behaves like [String.get] was called with an argument of `index - 1`. - * - * If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException]. - */ -expect fun String.codePointBefore(index: Int): Int - -/** - * Returns the number of Unicode code points in the specified text range of this `String`. - * - * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the - * length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as - * one code point each. - * - * If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than - * `endIndex`, this method throws an [IndexOutOfBoundsException]. - */ -expect fun String.codePointCount(beginIndex: Int, endIndex: Int): Int - -/** - * Returns the index within this `String` that is offset from the given `index` by `codePointOffset` code points. - * - * Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each. - * - * If `index` is negative or larger than the length of this string, or if `codePointOffset` is positive and the - * substring starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is negative - * and the substring before index has fewer than the absolute value of `codePointOffset` code points, this method throws - * an [IndexOutOfBoundsException]. - */ -expect fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int diff --git a/kotlin-codepoints/src/commonTest/kotlin/StringExtensionsTest.kt b/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt similarity index 99% rename from kotlin-codepoints/src/commonTest/kotlin/StringExtensionsTest.kt rename to kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt index 8bee2ae..cb0f18a 100644 --- a/kotlin-codepoints/src/commonTest/kotlin/StringExtensionsTest.kt +++ b/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt @@ -4,7 +4,7 @@ import kotlin.test.assertEquals import kotlin.test.Test import kotlin.test.assertFailsWith -class StringExtensionsTest { +class CharSequenceExtensionsTest { @Test fun codePointAt() { assertEquals('a'.code, "a".codePointAt(0)) diff --git a/kotlin-codepoints/src/jvmMain/kotlin/StringExtensions.kt b/kotlin-codepoints/src/jvmMain/kotlin/StringExtensions.kt deleted file mode 100644 index fc28a38..0000000 --- a/kotlin-codepoints/src/jvmMain/kotlin/StringExtensions.kt +++ /dev/null @@ -1,19 +0,0 @@ -@file:Suppress("NOTHING_TO_INLINE", "PLATFORM_CLASS_MAPPED_TO_KOTLIN") - -package de.cketti.codepoints - -actual inline fun String.codePointAt(index: Int): Int { - return (this as java.lang.String).codePointAt(index) -} - -actual inline fun String.codePointBefore(index: Int): Int { - return (this as java.lang.String).codePointBefore(index) -} - -actual inline fun String.codePointCount(beginIndex: Int, endIndex: Int): Int { - return (this as java.lang.String).codePointCount(beginIndex, endIndex) -} - -actual inline fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int { - return (this as java.lang.String).offsetByCodePoints(index, codePointOffset) -}