Skip to content

Support for the serialization of DataframeConvertible values in ValueColumns has been added to enhance visualization in the KTNB plugin. #823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.jetbrains.kotlinx.dataframe.columns

/**
* Represents special kinds of elements that can be found within a Column.
* This is similar to the [ColumnKind], but it applies to specific elements of the Column.
* Its main use is to provide metadata during serialization for visualization within the KTNB plugin.
*/
internal enum class CellKind {
/**
* Represents a cell kind within a Column that is specifically convertible to a DataFrame.
*/
DataFrameConvertable {
override fun toString(): String = "DataFrameConvertable"
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.api.isList
import org.jetbrains.kotlinx.dataframe.api.rows
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.columns.CellKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
Expand All @@ -37,7 +38,10 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.isDataframeConvertable
import org.jetbrains.kotlinx.dataframe.name
import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
Expand All @@ -48,7 +52,7 @@ import java.io.IOException

// See docs/serialization_format.md for a description of
// serialization versions and format.
internal const val SERIALIZATION_VERSION = "2.1.0"
internal const val SERIALIZATION_VERSION = "2.1.1"

internal object SerializationKeys {
const val DATA = "data"
Expand Down Expand Up @@ -108,14 +112,14 @@ internal fun encodeRowWithMetadata(
frame: ColumnsContainer<*>,
index: Int,
rowLimit: Int? = null,
imageEncodingOptions: Base64ImageEncodingOptions? = null,
customEncoders: List<CustomEncoder> = emptyList(),
): JsonElement? {
val values: List<Pair<String, JsonElement>> = frame.columns().map { col ->
when (col) {
is ColumnGroup<*> -> {
val schema = col.schema()
buildJsonObject {
put(DATA, encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions) ?: JsonPrimitive(null))
put(DATA, encodeRowWithMetadata(col, index, rowLimit, customEncoders) ?: JsonPrimitive(null))
putJsonObject(METADATA) {
put(KIND, JsonPrimitive(ColumnKind.Group.toString()))
put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys))
Expand All @@ -132,9 +136,9 @@ internal fun encodeRowWithMetadata(

is FrameColumn<*> -> {
val data = if (rowLimit == null) {
encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
encodeFrameWithMetadata(col[index], null, customEncoders)
} else {
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, customEncoders)
}
val schema = col.schema.value
buildJsonObject {
Expand All @@ -155,55 +159,82 @@ internal fun encodeRowWithMetadata(
}
}

else -> encodeValue(col, index, imageEncodingOptions)
else -> encodeValue(col, index, customEncoders)
}.let { col.name to it }
}
if (values.isEmpty()) return null
return JsonObject(values.toMap())
}

internal fun encodeValue(
col: AnyCol,
index: Int,
imageEncodingOptions: Base64ImageEncodingOptions? = null,
): JsonElement =
when {
internal fun encodeValue(col: AnyCol, index: Int, customEncoders: List<CustomEncoder> = emptyList()): JsonElement {
val matchingEncoder = customEncoders.firstOrNull { it.canEncode(col[index]) }

return when {
matchingEncoder != null -> matchingEncoder.encode(col[index])

col.isList() -> col[index]?.let { list ->
val values = (list as List<*>).map { convert(it) }
JsonArray(values)
} ?: JsonArray(emptyList())

col.typeClass in valueTypes -> convert(col[index])

col.typeClass == BufferedImage::class && imageEncodingOptions != null ->
col[index]?.let { image ->
JsonPrimitive(encodeBufferedImageAsBase64(image as BufferedImage, imageEncodingOptions))
} ?: JsonPrimitive("")

else -> JsonPrimitive(col[index]?.toString())
}
}

private fun encodeBufferedImageAsBase64(
image: BufferedImage,
imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(),
): String? =
try {
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
} else {
image
}
internal class DataframeConvertableEncoder(
private val encoders: List<CustomEncoder>,
private val rowLimit: Int? = null,
) : CustomEncoder {
override fun canEncode(input: Any?): Boolean = isDataframeConvertable(input)

override fun encode(input: Any?): JsonElement =
input?.let {
val data = encodeFrameWithMetadata(
KotlinNotebookPluginUtils.convertToDataFrame(input),
rowLimit,
encoders,
)
buildJsonObject {
put(DATA, data)
putJsonObject(METADATA) {
put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString()))
}
}
} ?: JsonPrimitive(null)
}

val bytes = if (imageEncodingOptions.isGzipOn) {
preparedImage.toByteArray().encodeGzip()
} else {
preparedImage.toByteArray()
}
internal class BufferedImageEncoder(private val options: Base64ImageEncodingOptions) : CustomEncoder {
override fun canEncode(input: Any?): Boolean = input is BufferedImage

bytes.toBase64()
} catch (e: IOException) {
null
}
override fun encode(input: Any?): JsonElement =
JsonPrimitive(
input?.let { image -> encodeBufferedImageAsBase64(image as BufferedImage, options) } ?: "",
)

private fun encodeBufferedImageAsBase64(
image: BufferedImage,
imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(),
): String =
try {
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
} else {
image
}

val bytes = if (imageEncodingOptions.isGzipOn) {
preparedImage.toByteArray().encodeGzip()
} else {
preparedImage.toByteArray()
}

bytes.toBase64()
} catch (_: IOException) {
""
}
}

private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject =
JsonObject(
Expand All @@ -217,7 +248,7 @@ private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject =
internal fun encodeFrameWithMetadata(
frame: AnyFrame,
rowLimit: Int? = null,
imageEncodingOptions: Base64ImageEncodingOptions? = null,
customEncoders: List<CustomEncoder> = emptyList(),
): JsonArray {
val valueColumn = frame.extractValueColumn()
val arrayColumn = frame.extractArrayColumn()
Expand All @@ -231,13 +262,13 @@ internal fun encodeFrameWithMetadata(
encodeFrameWithMetadata(
it as AnyFrame,
rowLimit,
imageEncodingOptions,
customEncoders,
)
} else {
null
}
}
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, imageEncodingOptions)
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, customEncoders)
}

return buildJsonArray { addAll(data.map { convert(it) }) }
Expand Down Expand Up @@ -345,7 +376,7 @@ internal fun encodeDataFrameWithMetadata(
frame: AnyFrame,
rowLimit: Int,
nestedRowLimit: Int? = null,
imageEncodingOptions: Base64ImageEncodingOptions? = null,
customEncoders: List<CustomEncoder> = emptyList(),
): JsonObject =
buildJsonObject {
put(VERSION, JsonPrimitive(SERIALIZATION_VERSION))
Expand All @@ -366,7 +397,7 @@ internal fun encodeDataFrameWithMetadata(
encodeFrameWithMetadata(
frame = frame.take(rowLimit),
rowLimit = nestedRowLimit,
imageEncodingOptions = imageEncodingOptions,
customEncoders = customEncoders,
),
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow
import org.jetbrains.kotlinx.dataframe.impl.io.readJson
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
Expand Down Expand Up @@ -303,7 +301,7 @@ public fun AnyFrame.toJsonWithMetadata(
rowLimit: Int,
nestedRowLimit: Int? = null,
prettyPrint: Boolean = false,
imageEncodingOptions: Base64ImageEncodingOptions? = null,
customEncoders: List<CustomEncoder> = emptyList(),
): String {
val json = Json {
this.prettyPrint = prettyPrint
Expand All @@ -312,10 +310,31 @@ public fun AnyFrame.toJsonWithMetadata(
}
return json.encodeToString(
JsonElement.serializer(),
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, imageEncodingOptions),
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, customEncoders),
)
}

/**
* Interface for defining a custom encoder. That applied to the value during dataframe JSON serialization
*/
public interface CustomEncoder {
/**
* Determines whether this encoder can encode the given input.
*
* @param input The input object to be checked for suitability.
* @return `true` if the input can be encoded, otherwise `false`.
*/
public fun canEncode(input: Any?): Boolean

/**
* Encodes the provided input into a JSON element.
*
* @param input The input object to be encoded.
* @return A JsonElement representing the encoded input.
*/
public fun encode(input: Any?): JsonElement
}

internal const val DEFAULT_IMG_SIZE = 600

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder
import org.jetbrains.kotlinx.dataframe.impl.io.DataframeConvertableEncoder
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.toHTML
Expand All @@ -34,6 +37,7 @@ import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded
private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311"
private const val MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA = 241
private const val MIN_IDE_VERSION_SUPPORT_IMAGE_VIEWER = 242
private const val MIN_IDE_VERSION_SUPPORT_DATAFRAME_CONVERTABLE = 243

internal class JupyterHtmlRenderer(val display: DisplayConfiguration, val builder: JupyterIntegration.Builder)

Expand Down Expand Up @@ -85,13 +89,19 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
}

else -> {
val imageEncodingOptions =
if (ideBuildNumber.supportsImageViewer()) Base64ImageEncodingOptions() else null
val encoders = buildList<CustomEncoder> {
if (ideBuildNumber.supportsDataFrameConvertableValues()) {
add(DataframeConvertableEncoder(this))
}
if (ideBuildNumber.supportsImageViewer()) {
add(BufferedImageEncoder(Base64ImageEncodingOptions()))
}
}

df.toJsonWithMetadata(
rowLimit = limit,
nestedRowLimit = reifiedDisplayConfiguration.rowsLimit,
imageEncodingOptions = imageEncodingOptions,
customEncoders = encoders,
)
}
}
Expand All @@ -108,6 +118,9 @@ private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTable
private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsImageViewer() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_IMAGE_VIEWER

private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDataFrameConvertableValues() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_DATAFRAME_CONVERTABLE

internal fun Notebook.renderAsIFrameAsNeeded(
data: HtmlData,
staticData: HtmlData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,36 @@ public object KotlinNotebookPluginUtils {
}.toColumnSet()
}

internal fun isDataframeConvertable(dataframeLike: Any?): Boolean =
when (dataframeLike) {
is Pivot<*>,
is ReducedGroupBy<*, *>,
is ReducedPivot<*>,
is PivotGroupBy<*>,
is ReducedPivotGroupBy<*>,
is SplitWithTransform<*, *, *>,
is Split<*, *>,
is Merge<*, *, *>,
is Gather<*, *, *, *>,
is Update<*, *>,
is Convert<*, *>,
is FormattedFrame<*>,
is AnyCol,
is AnyRow,
is GroupBy<*, *>,
is AnyFrame,
is DisableRowsLimitWrapper,
is MoveClause<*, *>,
is RenameClause<*, *>,
is ReplaceClause<*, *>,
is GroupClause<*, *>,
is InsertClause<*>,
is FormatClause<*, *>,
-> true

else -> false
}

/**
* Converts [dataframeLike] to [AnyFrame].
* If [dataframeLike] is already [AnyFrame] then it is returned as is.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF
Expand Down Expand Up @@ -62,7 +63,11 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp
encodingOptions: Base64ImageEncodingOptions?,
): JsonObject {
val df = dataFrameOf(listOf("imgs"), images)
val jsonStr = df.toJsonWithMetadata(20, nestedRowLimit = 20, imageEncodingOptions = encodingOptions)
val jsonStr = df.toJsonWithMetadata(
20,
nestedRowLimit = 20,
customEncoders = listOfNotNull(encodingOptions?.let { BufferedImageEncoder(encodingOptions) }),
)

return parseJsonStr(jsonStr)
}
Expand Down
Loading
Loading