Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GeoDataFrame init #909

Merged
merged 10 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
import org.jetbrains.kotlinx.jupyter.api.libraries.resources
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf

/** Users will get an error if their Kotlin Jupyter kernel is older than this version. */
Expand All @@ -70,29 +69,6 @@ internal class Integration(private val notebook: Notebook, private val options:

val version = options["v"]

private fun KotlinKernelHost.execute(codeWithConverter: CodeWithConverter, argument: String): VariableName? {
val code = codeWithConverter.with(argument)
return if (code.isNotBlank()) {
val result = execute(code)
if (codeWithConverter.hasConverter) {
result.name
} else {
null
}
} else {
null
}
}

private fun KotlinKernelHost.execute(
codeWithConverter: CodeWithConverter,
property: KProperty<*>,
type: KType,
): VariableName? {
val variableName = "(${property.name}${if (property.returnType.isMarkedNullable) "!!" else ""} as $type)"
return execute(codeWithConverter, variableName)
}

private fun KotlinKernelHost.updateImportDataSchemaVariable(
importDataSchema: ImportDataSchema,
property: KProperty<*>,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.jetbrains.kotlinx.dataframe.jupyter

import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
import org.jetbrains.kotlinx.jupyter.api.VariableName
import kotlin.reflect.KProperty
import kotlin.reflect.KType

internal fun KotlinKernelHost.execute(codeWithConverter: CodeWithConverter, argument: String): VariableName? {
val code = codeWithConverter.with(argument)
return if (code.isNotBlank()) {
val result = execute(code)
if (codeWithConverter.hasConverter) {
result.name
} else {
null
}
} else {
null
}
}

internal fun KotlinKernelHost.execute(
codeWithConverter: CodeWithConverter,
property: KProperty<*>,
type: KType,
): VariableName? {
val variableName = "(${property.name}${if (property.returnType.isMarkedNullable) "!!" else ""} as $type)"
return execute(codeWithConverter, variableName)
}
72 changes: 72 additions & 0 deletions dataframe-geo/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import org.jetbrains.kotlin.gradle.tasks.BaseKotlinCompile
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile

plugins {
with(libs.plugins) {
alias(kotlin.jvm)
alias(publisher)
alias(jupyter.api)
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
//alias(ktlint)
}
}

group = "org.jetbrains.kotlinx"

repositories {
// geo repository should come before Maven Central
maven("https://repo.osgeo.org/repository/release")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it true, that these maven repo links will be hidden in the module, and the final user don't need to refer to them somehow in its Gradle script? (just for self checking)

Copy link
Collaborator Author

@AndreiKingsley AndreiKingsley Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, the user will need to add the repository. However, I believe we can fix this with shadow jar.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please check whether you can do that before this is merged

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd leave it that way for now, then I'll try to get rid of it. When using a descriptor in notebook, the user won't need to do this.

mavenCentral()
}

// https://stackoverflow.com/questions/26993105/i-get-an-error-downloading-javax-media-jai-core1-1-3-from-maven-central
// jai core dependency should be excluded from geotools dependencies and added separately
fun ExternalModuleDependency.excludeJaiCore() = exclude("javax.media", "jai_core")

dependencies {
api(project(":core"))

implementation(libs.geotools.main) { excludeJaiCore() }
implementation(libs.geotools.shapefile) { excludeJaiCore() }
implementation(libs.geotools.geojson) { excludeJaiCore() }
implementation(libs.geotools.referencing) { excludeJaiCore() }
implementation(libs.geotools.epsg.hsql) { excludeJaiCore() }

implementation(libs.jai.core)

implementation(libs.jts.core)
implementation(libs.jts.io.common)

implementation(libs.ktor.client.core)
implementation(libs.ktor.client.cio)
implementation(libs.ktor.client.content.negotiation)
implementation(libs.ktor.serialization.kotlinx.json)

testImplementation(kotlin("test"))
}

Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
tasks.withType<KotlinCompile>().configureEach {
val friendModule = project(":core")
val jarTask = friendModule.tasks.getByName("jar") as Jar
val jarPath = jarTask.archiveFile.get().asFile.absolutePath
(this as BaseKotlinCompile).friendPaths.from(jarPath)
}

kotlinPublications {
publication {
publicationName = "dataframeGeo"
artifactId = "dataframe-geo"
description = "GeoDataFrame API"
packageName = artifactId
}
}

tasks.processJupyterApiResources {
libraryProducers = listOf("org.jetbrains.kotlinx.dataframe.jupyter.IntegrationGeo")
}

tasks.test {
useJUnitPlatform()
}
kotlin {
jvmToolchain(11)
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.jetbrains.kotlinx.dataframe.geo

import org.geotools.api.referencing.crs.CoordinateReferenceSystem
import org.geotools.geometry.jts.JTS
import org.geotools.referencing.CRS
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.update
import org.jetbrains.kotlinx.dataframe.api.with

/**
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
* A data structure representing a geographical DataFrame, combining spatial data with
* an optional Coordinate Reference System (CRS).
*
* @param T The type parameter extending `WithGeometry`, indicating the presence of a geometry column.
* @property df The underlying `DataFrame` containing geometries.
* @property crs The coordinate reference system associated with the data, if any.
*/
class GeoDataFrame<T : WithGeometry>(val df: DataFrame<T>, val crs: CoordinateReferenceSystem?) {
/**
* Updates the `GeoDataFrame` using a specified transformation block on the underlying DataFrame.
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
*
* @param updateBlock The block defining the transformations to be applied to the DataFrame.
* @return A new `GeoDataFrame` instance with updated data and the same CRS.
*/
fun update(updateBlock: DataFrame<T>.() -> DataFrame<T>): GeoDataFrame<T> {
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
return GeoDataFrame(df.updateBlock(), crs)
}

/**
* Transforms the geometries to a specified Coordinate Reference System (CRS).
*
* This function reprojects the geometry data from the current CRS to a target CRS.
* If no target CRS is specified and the `GeoDataFrame` has no CRS, WGS 84 is used by default.
*
* @param targetCrs The target CRS for transformation.
* @return A new `GeoDataFrame` with reprojected geometries and the specified CRS.
*/
fun applyCrs(targetCrs: CoordinateReferenceSystem): GeoDataFrame<T> {
if (crs == null) {
return GeoDataFrame(df, targetCrs)
}
if (targetCrs == this.crs) return this
// Use WGS 84 by default TODO
val sourceCRS: CoordinateReferenceSystem = this.crs ?: DEFAULT_CRS
val transform = CRS.findMathTransform(sourceCRS, targetCrs, true)
return GeoDataFrame(
df.update { geometry }.with { JTS.transform(it, transform) },
targetCrs
)
}

override fun equals(other: Any?): Boolean {
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
if (this === other) return true
if (other !is GeoDataFrame<*>) return false

return df == other.df && when {
crs == null && other.crs == null -> true
crs == null || other.crs == null -> false
else -> CRS.equalsIgnoreMetadata(crs, other.crs)
}
}

companion object {
val DEFAULT_CRS = CRS.decode("EPSG:4326", true)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be ENUM of different constants

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it even exists in some form in GeoTools (see DefaultEngineeringCRS). But here is a more global question - whether it is necessary to use geotools API directly or should we wrap it completely.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest not wrap for now, we could not avoid the learning of that

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.jetbrains.kotlinx.dataframe.geo

import org.jetbrains.kotlinx.dataframe.ColumnsContainer
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.geom.MultiPolygon
import org.locationtech.jts.geom.Polygon

@DataSchema
interface WithGeometry {
val geometry: Geometry
}

@DataSchema
interface WithPolygon : WithGeometry {
override val geometry: Polygon
}

@DataSchema
interface WithMultiPolygon : WithGeometry {
override val geometry: MultiPolygon
}

@get:JvmName("geometry")
val <T : WithGeometry> ColumnsContainer<T>.geometry: DataColumn<Geometry>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the T is not needed, right? ColumnsContainer<WithGeometry> has the same effect

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, for the generated accessor we have both ColumnsContainer and DataRow extensions, so probably you should add DataRow<WithGeometry>.geometry: Geometry etc. too :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@koperagen could plugin help here, to generate all these extensions?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean the gradle plugin or the compiler plugin?

The compiler plugin is highly experimental still and it doesn't work yet in notebooks. So I wouldn't rely on that for now.

The gradle plugin, however, could generate these automatically based on the @DataSchema interfaces you defined. You'd have to add the dataframe gradle plugin to the geo module, just like we did in core, and make sure the generated files are part of sources to they are published along with the rest of the code.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sources generated by the plugin are automatically included. Makes sense to double check, but it's expected that you don't need to do anything extra. Just add this and it will generate required properties for annotated interfaces
plugins {
id("org.jetbrains.kotlinx.dataframe")
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, forgot to add it, thank you!

get() = get("geometry") as DataColumn<Geometry>

@get:JvmName("geometryPolygon")
val <T : WithPolygon> ColumnsContainer<T>.geometry: DataColumn<Polygon>
get() = get("geometry") as DataColumn<Polygon>

@get:JvmName("geometryMultiPolygon")
val <T : WithMultiPolygon> ColumnsContainer<T>.geometry: DataColumn<MultiPolygon>
get() = get("geometry") as DataColumn<MultiPolygon>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.jetbrains.kotlinx.dataframe.geo

import org.geotools.geometry.jts.ReferencedEnvelope
import org.jetbrains.kotlinx.dataframe.api.asIterable
import org.jetbrains.kotlinx.dataframe.geo.jts.computeBounds

/**
* Computes the bounding envelope for all geometries in a `GeoDataFrame`,
* considering the specified coordinate reference system (CRS).
*
* @receiver The `GeoDataFrame` containing the geometries for which to compute bounds.
* @return The bounding envelope that includes all geometries,
* associated with the CRS of the `GeoDataFrame`.
*/
fun GeoDataFrame<*>.bounds(): ReferencedEnvelope {
return ReferencedEnvelope(df.geometry.asIterable().computeBounds(), crs)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package org.jetbrains.kotlinx.dataframe.geo.geocode

import io.ktor.client.HttpClient
import io.ktor.client.engine.cio.CIO
import io.ktor.client.plugins.contentnegotiation.ContentNegotiation
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.client.statement.bodyAsText
import io.ktor.http.ContentType
import io.ktor.http.contentType
import io.ktor.serialization.kotlinx.json.json
import kotlinx.coroutines.runBlocking
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.geo.GeoDataFrame
import org.jetbrains.kotlinx.dataframe.geo.toGeo
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.geom.GeometryFactory
import org.locationtech.jts.io.geojson.GeoJsonReader


object Geocoder {

private val url = "https://geo2.datalore.jetbrains.com/map_data/geocoding"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be included in the stock library? As far as I remember this was a reverse engineering endeavour and since it depends on an external URL behaviour might change or stop working entirely

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this experimental module, I am totally fine with including in library.

  1. It's stable for enough time
  2. We not guarantee for a while all our releases of experimental geo-module

But we could also include this solution for the @Jolanrensen question:

  1. Checking the API availability and handling the unavailability with some exception
  2. Checking the API consistency and notification for the users or raise an exception
  3. Customize it with a URL parameter (if this API just will be moved in another place)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Geocoder is not part of the main API and will be removed/reworked completely (via future Kotlin geocoding library we discussed). I kept it, exclusively to play with it. So no effort should be wasted on it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably fine then :) You could add a small note in KDocs too telling this is experimental/temporary, so potential users won't get used to it.


private fun countryQuery(country: String) = """ {
"region_query_names" : [ "$country" ],
"region_query_countries" : null,
"region_query_states" : null,
"region_query_counties" : null,
"ambiguity_resolver" : {
"ambiguity_resolver_ignoring_strategy" : null,
"ambiguity_resolver_box" : null,
"ambiguity_resolver_closest_coord" : null
}
}
""".trimIndent()

private fun geocodeQuery(countries: List<String>) = """
Jolanrensen marked this conversation as resolved.
Show resolved Hide resolved
{
"version" : 3,
"mode" : "by_geocoding",
"feature_options" : [ "limit", "position", "centroid" ],
"resolution" : null,
"view_box" : null,
"fetched_ids" : null,
"region_queries" : [
${countries.joinToString(",\n") { countryQuery(it) }}
],
"scope" : [ ],
"level" : "country",
"namesake_example_limit" : 10,
"allow_ambiguous" : false
}
""".trimIndent()

private fun idsQuery(ids: List<String>) = """
{"version": 3,
"mode": "by_id",
"feature_options": ["boundary"],
"resolution": 5,
"view_box": null,
"fetched_ids": null,
"ids": [${ids.joinToString(", ") { "\"" + it + "\"" }}]}
""".trimIndent()

private val client = HttpClient(CIO) {
install(ContentNegotiation) {
json(Json {
prettyPrint = true
isLenient = true
})
}
}

fun geocodeCountries(countries: List<String>): GeoDataFrame<*> {

val query = geocodeQuery(countries)
val foundNames = mutableListOf<String>()
val geometries = mutableListOf<Geometry>()
runBlocking {
val responseString = client.post(url) {
contentType(ContentType.Application.Json)
// headers[HttpHeaders.AcceptEncoding] = "gzip"
setBody(query)
}.bodyAsText()
val ids = mutableListOf<String>()

Json.parseToJsonElement(responseString).jsonObject["data"]!!.jsonObject["answers"]!!.jsonArray.forEach {
it.jsonObject["features"]!!.jsonArray.single().jsonObject.also {
foundNames.add(it["name"]!!.jsonPrimitive.content)
ids.add(it["id"]!!.jsonPrimitive.content)
}
}
val idsQuery = idsQuery(ids)

val responseStringGeometries = client.post(url) {
contentType(ContentType.Application.Json)
// headers[HttpHeaders.AcceptEncoding] = "gzip"
setBody(idsQuery)
}.bodyAsText()

val geoJsonReader = GeoJsonReader(GeometryFactory())
Json.parseToJsonElement(responseStringGeometries).jsonObject["data"]!!.jsonObject["answers"]!!.jsonArray.forEach {
it.jsonObject["features"]!!.jsonArray.single().jsonObject.also {
val boundary = it["boundary"]!!.jsonPrimitive.content
geometries.add(geoJsonReader.read(boundary))
}
}

}
return dataFrameOf(
"country" to countries,
"foundName" to foundNames,
"geometry" to geometries,
).toGeo()
}
}
Loading