Skip to content

fix camelCase #1072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -9938,8 +9938,6 @@ public final class org/jetbrains/kotlinx/dataframe/impl/UtilsKt {
public static final fun getColumnName (Lkotlin/reflect/KCallable;)Ljava/lang/String;
public static final fun getColumnName (Lkotlin/reflect/KFunction;)Ljava/lang/String;
public static final fun getColumnName (Lkotlin/reflect/KProperty;)Ljava/lang/String;
public static final fun getDELIMITED_STRING_REGEX ()Lkotlin/text/Regex;
public static final fun getDELIMITERS_REGEX ()Lkotlin/text/Regex;
public static final fun headPlusArray (B[B)[B
public static final fun headPlusArray (C[C)[C
public static final fun headPlusArray (D[D)[D
Expand All @@ -9948,7 +9946,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/UtilsKt {
public static final fun headPlusArray (J[J)[J
public static final fun headPlusArray (S[S)[S
public static final fun headPlusArray (Z[Z)[Z
public static final fun toCamelCaseByDelimiters (Ljava/lang/String;Lkotlin/text/Regex;)Ljava/lang/String;
public static final fun toCamelCaseByDelimiters (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;)Ljava/lang/String;
public static synthetic fun toCamelCaseByDelimiters$default (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
public static final fun zero (Lkotlin/reflect/KClass;)Ljava/lang/Number;
public static final fun zeroOrNull (Lkotlin/reflect/KClass;)Ljava/lang/Number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.renamedReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
Expand Down Expand Up @@ -46,23 +44,44 @@ public class RenameClause<T, C>(internal val df: DataFrame<T>, internal val colu
}

/**
* ## Rename to camelCase
* ## Rename to "camelCase"
*
* This function renames all columns in this [DataFrame] to the "camelCase" format.
*
* Removes all delimiters between words and capitalizes each word except the first one.
* Adds an underscore between consecutive numbers.
* If the string does not contain any letters or numbers, it remains unchanged.
*
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
* into a consistent "camelCase" representation.
*
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
* [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
*
* Returns a [DataFrame] with updated column names.
*
* ### Examples:
* ```
* "snake_case_name" -> "snakeCaseName"
* "PascalCaseName" -> "pascalCaseName"
* "doner-case-name" -> "donerCaseName"
* "UPPER_CASE_NAME -> upperCaseName"
* ```
*
* @return a [DataFrame] with column names converted to "camelCase" format.
*/
@Refine
@Interpretable("RenameToCamelCase")
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> =
// recursively rename all columns written with delimiters or starting with a capital to camel case
rename {
colsAtAnyDepth { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }
colsAtAnyDepth()
}.toCamelCase()
// take all frame columns at any depth and call renameToCamelCase() on all dataframes inside
.update {
colsAtAnyDepth().colsOf<AnyFrame>()
}.with { it.renameToCamelCase() }
}.with {
it.renameToCamelCase()
}

@AccessApiOverload
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
Expand All @@ -80,10 +99,27 @@ public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> Stri
renameImpl(transform)

/**
* ## Rename to camelCase
* ## Rename to "camelCase"
*
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
* Renames the columns, previously selected with [rename] to "camelCase" format.
* All delimiters between words are removed, words are capitalized except for the first one.
* Places underscore between numbers.
* If the string does not contain any letters or numbers, it remains unchanged.
*
* Returns a [DataFrame] with updated column names.
*
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
* into a consistent "camelCase" representation.
*
* ### Examples:
* ```
* "snake_case_name" -> "snakeCaseName"
* "PascalCaseName" -> "pascalCaseName"
* "doner-case-name" -> "donerCaseName"
* "UPPER_CASE_NAME -> upperCaseName"
* ```
*
* @return a [DataFrame] with column names converted to "camelCase" format.
*/
@Refine
@Interpretable("RenameToCamelCaseClause")
Expand All @@ -96,15 +132,29 @@ public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into { it.ren
/**
* ## Rename to camelCase
*
* Renames this column to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
* and converting the first char to lowercase.
* Renames this column to "camelCase" format.
* All delimiters between words are removed, words are capitalized except for the first one.
* Places underscore between numbers.
* If the string does not contain any letters or numbers, it remains unchanged.
*
* Returns a [ColumnReference] with updated name.
*
* This function supports converting names from `snake_case`, `PascalCase`, and other delimited formats
* into a consistent "camelCase" representation.
*
* ### Examples:
* ```
* "snake_case_name" -> "snakeCaseName"
* "PascalCaseName" -> "pascalCaseName"
* "doner-case-name" -> "donerCaseName"
* "UPPER_CASE_NAME -> upperCaseName"
* ```
* @return a [ColumnReference] with the name converted to "camelCase" format.
*/
@Suppress("UNCHECKED_CAST")
public fun <T, C : ColumnReference<T>> C.renameToCamelCase(): C =
rename(
this.name()
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
.replaceFirstChar { it.lowercaseChar() },
this.name().toCamelCaseByDelimiters(),
) as C

@Suppress("UNCHECKED_CAST")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,28 +337,118 @@ internal fun <T : Comparable<T>> T.between(left: T, right: T, includeBoundaries:
this > left && this < right
}

private const val DELIMITERS = "[_\\s]"
public val DELIMITERS_REGEX: Regex = DELIMITERS.toRegex()
public val DELIMITED_STRING_REGEX: Regex = ".+$DELIMITERS.+".toRegex()
// Single regex to split words by non-alphanumeric characters, camelCase, and numbers
internal val CAMEL_DEFAULT_DELIMITERS_REGEX =
(
"[^\\p{L}0-9]+|(?<=[\\p{Ll}])(?=[\\p{Lu}])|(?<=[\\p{Lu}])" +
"(?=[\\p{Lu}][\\p{Ll}])|(?<=\\d)(?=[\\p{L}])|(?<=[\\p{L}])(?=\\d)"
)
.toRegex()

internal val CAMEL_REGEX = "(?<=[a-zA-Z])[A-Z]".toRegex()
/**
* Converts a string into lowerCamelCase using [delimiters].
*
* - Splits this string matching given [delimiters] regular expression
* (by default, via [CAMEL_DEFAULT_DELIMITERS_REGEX] - any characters that are not letters or digits).
* - If the string does not contain any letters or numbers, it remains unchanged.
* - Places underscore ("_") between consecutive numbers (that were split before).
* - The first word remains in lowercase, and subsequent words are capitalized.
*
* Default behavior (with [CAMEL_DEFAULT_DELIMITERS_REGEX]):
*
* ```
* "hello_world" -> "helloWorld"
* "HelloWorld" -> "helloWorld"
* "json.parser.Config" -> "jsonParserConfig"
* "my.var_name test" -> "myVarNameTest"
* "thirdColumn" -> "thirdColumn"
* "someHTMLParser" -> "someHtmlParser"
* "RESTApi" -> "restApi"
* "OAuth2Token" -> "oAuth2Token"
* "GraphQLQuery" -> "graphQlQuery"
* "TCP_3_PROTOCOL" -> "tcp3Protocol"
* "123hello_world456" -> "123HelloWorld456"
* "API_Response_2023" -> "apiResponse2023"
* "UPPER_case-LOWER" -> "upperCaseLower"
* "12parse34CamelCase" -> "12Parse34CamelCase"
* "snake_case_example" -> "snakeCaseExample"
* "dot.separated.words" -> "dotSeparatedWords"
* "kebab-case-example" -> "kebabCaseExample"
* "MIXED_Case_with_123Numbers" -> "mixedCaseWith123Numbers"
* "___!!!___" -> "___!!!___"
* "1000.2000.3000" -> "1000_2000_3000"
* "UPPERCASE" -> "uppercase"
* "alreadyCamelCased" -> "alreadyCamelCased"
* "justNumbers123" -> "justNumbers123"
* "Just_Special$Chars!!" -> "justSpecialChars"
* "singleword" -> "singleword"
* "word_with_underscores_and-dashes" -> "wordWithUnderscoresAndDashes"
* "10-20-aa" -> "10_20Aa"
* ```
*
* @return the formatted string in lowerCamelCase.
*/
public fun String.toCamelCaseByDelimiters(
delimiters: Regex = CAMEL_DEFAULT_DELIMITERS_REGEX,
numberSeparator: String = "_",
): String =
if (!this.any { it.isLetter() || it.isDigit() }) {
this // If the string has no letters, return it unchanged
} else {
split(delimiters)
.filter { it.isNotBlank() }
.map { it.lowercase() }
.joinNumbers(numberSeparator)
.joinToCamelCaseString()
}

/**
* Joins consecutive numbers in a list with the given [separator].
* Assumes that all numbers and strings are separated (after splitting via [CAMEL_DEFAULT_DELIMITERS_REGEX]).
*/
private fun List<String>.joinNumbers(separator: CharSequence): List<String> {
val result = mutableListOf<String>()
var i = 0

while (i < this.size) {
val current = this[i]
if (current.all { it.isDigit() }) { // Check if the current element is a number
val numberGroup = mutableListOf(current)
while (i + 1 < this.size && this[i + 1].all { it.isDigit() }) {
numberGroup.add(this[i + 1])
i++
}
result.add(numberGroup.joinToString(separator)) // Join consecutive numbers with "_"
} else {
result.add(current)
}
i++
}
return result
}

public fun String.toCamelCaseByDelimiters(delimiters: Regex): String = split(delimiters).joinToCamelCaseString()
/**
* Joins a list of words into lowerCamelCase format.
* - The first word is converted to lowercase.
* - Subsequent words start with an uppercase letter.
*/
private fun List<String>.joinToCamelCaseString(): String =
mapIndexed { index, word ->
if (index == 0) word.lowercase() else word.replaceFirstChar { it.uppercaseChar() }
}.joinToString("")

internal val CAMEL_LETTERS_REGEX = "(?<=[a-zA-Z])[A-Z]".toRegex()

internal fun String.toSnakeCase(): String =
if ("[A-Z_]+".toRegex().matches(this)) {
this
} else {
CAMEL_REGEX
CAMEL_LETTERS_REGEX
.replace(this) { "_${it.value}" }
.replace(" ", "_")
.lowercase()
}

internal fun List<String>.joinToCamelCaseString(): String =
joinToString(separator = "") { it.replaceFirstChar { it.uppercaseChar() } }
.replaceFirstChar { it.lowercaseChar() }

/** Returns `true` if this callable is a getter-like function.
*
* A callable is considered getter-like if it is either a property getter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ class RenameToCamelCaseTests {
df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName")
}

@Test
fun `uppercase names`() {
val originalDf = dataFrameOf("ID", "ITEM", "ORDER_DATE")(1, "TOY", "02.03.2009")
val renamedDf = originalDf.renameToCamelCase()
renamedDf.columnNames() shouldBe listOf("id", "item", "orderDate")
}

@Test
fun `doubly nested row`() {
val doublyNestedColumnGroup = dataFrameOf("test_name")(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.jetbrains.kotlinx.dataframe.impl

import io.kotest.matchers.shouldBe
import org.junit.Test

class ToCamelCase {
@Test
fun defaultDelimitersSimpleUseCases() {
val testCases = listOf(
"hello_world",
"HelloWorld",
"json.parser.Config",
"my.var_name test",
"thirdColumn",
"someHTMLParser",
"RESTApi",
"OAuth2Token",
"GraphQLQuery",
"TCP_3_PROTOCOL",
"123hello_world456",
"API_Response_2023",
"UPPER_case-LOWER",
"12parse34CamelCase",
"snake_case_example",
"dot.separated.words",
"kebab-case-example",
"MIXED_Case_with_123Numbers",
"___!!!___",
"1000.2000.3000",
"UPPERCASE",
"alreadyCamelCased",
"justNumbers123",
"Just_Special\$Chars!!",
"singleword",
"word_with_underscores_and-dashes",
"10-20-aa",
"ROOM_1.11",
)
val expected = listOf(
"helloWorld",
"helloWorld",
"jsonParserConfig",
"myVarNameTest",
"thirdColumn",
"someHtmlParser",
"restApi",
"oAuth2Token",
"graphQlQuery",
"tcp3Protocol",
"123HelloWorld456",
"apiResponse2023",
"upperCaseLower",
"12Parse34CamelCase",
"snakeCaseExample",
"dotSeparatedWords",
"kebabCaseExample",
"mixedCaseWith123Numbers",
"___!!!___",
"1000_2000_3000",
"uppercase",
"alreadyCamelCased",
"justNumbers123",
"justSpecialChars",
"singleword",
"wordWithUnderscoresAndDashes",
"10_20Aa",
"room1_11",
)

testCases.zip(expected).forEach { (input, expected) ->
input.toCamelCaseByDelimiters() shouldBe expected
}
}

@Test
fun specialCharacters() {
"música_lírica".toCamelCaseByDelimiters() shouldBe "músicaLírica"
"тут был Андрей".toCamelCaseByDelimiters() shouldBe "тутБылАндрей"
"汉字_拼音".toCamelCaseByDelimiters() shouldBe "汉字拼音"
"X Æ A-12 34".toCamelCaseByDelimiters() shouldBe "xÆA12_34"
"kæt_wɪð_æk!t".toCamelCaseByDelimiters() shouldBe "kætWɪðÆkT"
"Gëëxplodeerd,_of_geïntegreerd?".toCamelCaseByDelimiters() shouldBe "gëëxplodeerdOfGeïntegreerd"
"Äüßergewöhnlich_könnte_flüssig_sein,_aber_wie_öfter?".toCamelCaseByDelimiters() shouldBe
"äüßergewöhnlichKönnteFlüssigSeinAberWieÖfter"
}
}
Loading