Skip to content

Commit

Permalink
[SEDONA-444] pre-commit: add hook to trim trailing whitespace (#1144)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbampton authored Dec 14, 2023
1 parent 6ac5236 commit 5ae8243
Show file tree
Hide file tree
Showing 37 changed files with 330 additions and 331 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:
- 'docker/**'
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60

jobs:
build:
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
- name: Cache SBT
uses: actions/cache@v3
with:
path: |
path: |
~/.ivy2/cache
~/.sbt
key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }}
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60

jobs:
build:

Expand All @@ -50,15 +50,15 @@ jobs:
- spark: 3.3.0
scala: 2.12.15
jdk: '8'
skipTests: ''
skipTests: ''
- spark: 3.2.3
scala: 2.12.15
jdk: '8'
skipTests: ''
skipTests: ''
- spark: 3.1.2
scala: 2.12.15
jdk: '8'
skipTests: ''
skipTests: ''
- spark: 3.0.3
scala: 2.12.15
jdk: '8'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-extension.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ on:
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'

jobs:
build:
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ on:
- 'spark-shaded/**'
- 'pom.xml'
- 'python/**'

jobs:
build:
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60

jobs:
build:

Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ repos:
- id: end-of-file-fixer
files: \.(java|md|py|scala)$
exclude: ^docs/image|^spark/common/src/test/resources
- id: trailing-whitespace
files: \.(ipynb|java|py|R|scala|sh|xml|yaml|yml)$
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.38.0
hooks:
Expand Down
82 changes: 41 additions & 41 deletions R/R/data_interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# ------- Read RDD ------------

#' Create a SpatialRDD from an external data source.
#'
#'
#' Import spatial object from an external data source into a Sedona SpatialRDD.
#'
#' @param sc A `spark_connection`.
Expand Down Expand Up @@ -155,14 +155,14 @@ sedona_read_dsv_to_typed_rdd <- function(sc,
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#'
#' Constructors of typed RDD (PointRDD, PolygonRDD, LineStringRDD) are soft deprecated, use non-types versions
#'
#'
#' Create a typed SpatialRDD (namely, a PointRDD, a PolygonRDD, or a
#' LineStringRDD)
#' * `sedona_read_shapefile_to_typed_rdd`: from a shapefile data source
#' * `sedona_read_geojson_to_typed_rdd`: from a GeoJSON data source
#'
#'
#'
#' @param sc A `spark_connection`.
#' @param location Location of the data source.
Expand Down Expand Up @@ -197,13 +197,13 @@ sedona_read_shapefile_to_typed_rdd <- function(sc,
location,
type = c("point", "polygon", "linestring"),
storage_level = "MEMORY_ONLY") {

lifecycle::deprecate_soft(
"1.4.0",
"sedona_read_shapefile_to_typed_rdd()",
with = "sedona_read_shapefile()"
)

invoke_static(
sc,
"org.apache.sedona.core.formatMapper.shapefileParser.ShapefileReader",
Expand All @@ -224,13 +224,13 @@ sedona_read_geojson_to_typed_rdd <- function(sc,
has_non_spatial_attrs = TRUE,
storage_level = "MEMORY_ONLY",
repartition = 1L) {

lifecycle::deprecate_soft(
"1.4.0",
"sedona_read_geojson_to_typed_rdd()",
with = "sedona_read_geojson()"
)

invoke_new(
sc,
rdd_cls_from_type(type),
Expand All @@ -249,10 +249,10 @@ sedona_read_geojson_to_typed_rdd <- function(sc,
#' Read geospatial data into a Spatial RDD
#'
#' @description Import spatial object from an external data source into a Sedona SpatialRDD.
#' * `sedona_read_shapefile`: from a shapefile
#' * `sedona_read_geojson`: from a geojson file
#' * `sedona_read_wkt`: from a geojson file
#' * `sedona_read_wkb`: from a geojson file
#' * `sedona_read_shapefile`: from a shapefile
#' * `sedona_read_geojson`: from a geojson file
#' * `sedona_read_wkt`: from a geojson file
#' * `sedona_read_wkb`: from a geojson file
#'
#' @param sc A `spark_connection`.
#' @param location Location of the data source.
Expand Down Expand Up @@ -388,12 +388,12 @@ sedona_read_shapefile <- function(sc,

# ------- Read SDF ------------
#' Read geospatial data into a Spark DataFrame.
#'
#'
#' @description Functions to read geospatial data from a variety of formats into Spark DataFrames.
#'
#' * `spark_read_shapefile`: from a shapefile
#' * `spark_read_geojson`: from a geojson file
#' * `spark_read_geoparquet`: from a geoparquet file
#'
#' * `spark_read_shapefile`: from a shapefile
#' * `spark_read_geojson`: from a geojson file
#' * `spark_read_geoparquet`: from a geoparquet file
#'
#' @inheritParams sparklyr::spark_read_source
#'
Expand All @@ -419,19 +419,19 @@ spark_read_shapefile <- function(sc,
path = name,
options = list(),
...) {

lapply(names(options), function(name) {
if (!name %in% c("")) {
warning(paste0("Ignoring unknown option '", name,"'"))
}
})

rdd <- sedona_read_shapefile(sc,
location = path,
storage_level = "MEMORY_ONLY")



rdd %>% sdf_register(name = name)
}

Expand All @@ -445,7 +445,7 @@ spark_read_geojson <- function(sc,
repartition = 0,
memory = TRUE,
overwrite = TRUE) {

# check options
if ("allow_invalid_geometries" %in% names(options)) final_allow_invalid <- options[["allow_invalid_geometries"]] else final_allow_invalid <- TRUE
if ("skip_syntactically_invalid_geometries" %in% names(options)) final_skip <- options[["skip_syntactically_invalid_geometries"]] else final_skip <- TRUE
Expand All @@ -454,18 +454,18 @@ spark_read_geojson <- function(sc,
warning(paste0("Ignoring unknown option '", name,"'"))
}
})

final_repartition <- max(as.integer(repartition), 1L)

rdd <- sedona_read_geojson(sc,
location = path,
allow_invalid_geometries = final_allow_invalid,
skip_syntactically_invalid_geometries = final_skip,
storage_level = "MEMORY_ONLY",
repartition = final_repartition)



rdd %>% sdf_register(name = name)
}

Expand All @@ -479,8 +479,8 @@ spark_read_geoparquet <- function(sc,
repartition = 0,
memory = TRUE,
overwrite = TRUE) {
spark_read_source(sc,

spark_read_source(sc,
name = name,
path = path,
source = "geoparquet",
Expand Down Expand Up @@ -603,7 +603,7 @@ sedona_save_spatial_rdd <- function(x,
#' Write geospatial data from a Spark DataFrame.
#'
#' @description Functions to write geospatial data into a variety of formats from Spark DataFrames.
#'
#'
#' * `spark_write_geojson`: to GeoJSON
#' * `spark_write_geoparquet`: to GeoParquet
#' * `spark_write_raster`: to raster tiles after using RS output functions (`RS_AsXXX`)
Expand Down Expand Up @@ -643,12 +643,12 @@ spark_write_geojson <- function(x,
options = list(),
partition_by = NULL,
...) {

## find geometry column if not specified
if (!"spatial_col" %in% names(options)) {
schema <- x %>% sdf_schema()
potential_cols <- which(sapply(schema, function(x) x$type == "GeometryUDT"))

if (length(potential_cols) == 0) {
cli::cli_abort("No geometry column found")
} else if (length(potential_cols) > 1) {
Expand All @@ -657,15 +657,15 @@ spark_write_geojson <- function(x,
} else {
spatial_col = names(potential_cols)
}

} else {
spatial_col = options[["spatial_col"]]
}

rdd <- x %>% to_spatial_rdd(spatial_col = spatial_col)

sedona_write_geojson(x = rdd, output_location = path)

}


Expand All @@ -678,7 +678,7 @@ spark_write_geoparquet <- function(x,
options = list(),
partition_by = NULL,
...) {

spark_write_source(
x = x,
source = "geoparquet",
Expand All @@ -688,7 +688,7 @@ spark_write_geoparquet <- function(x,
save_args = list(path),
...
)

}


Expand All @@ -701,7 +701,7 @@ spark_write_raster <- function(x,
options = list(),
partition_by = NULL,
...) {

spark_write_source(
x = x,
source = "raster",
Expand All @@ -711,7 +711,7 @@ spark_write_raster <- function(x,
save_args = list(path),
...
)

}


Expand Down
8 changes: 4 additions & 4 deletions R/R/sdf_interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


#' Import data from a spatial RDD into a Spark Dataframe.
#'
#'
#' @description Import data from a spatial RDD (possibly with non-spatial attributes) into a
#' Spark Dataframe.
#' * `sdf_register`: method for sparklyr's sdf_register to handle Spatial RDD
Expand Down Expand Up @@ -48,7 +48,7 @@
#' type = "polygon"
#' )
#' sdf <- sdf_register(rdd)
#'
#'
#' input_location <- "/dev/null" # replace it with the path to your input file
#' rdd <- sedona_read_dsv_to_typed_rdd(
#' sc,
Expand All @@ -71,7 +71,7 @@ sdf_register.spatial_rdd <- function(x, name = NULL) {
#' @rdname sdf_register.spatial_rdd
as.spark.dataframe <- function(x, non_spatial_cols = NULL, name = NULL) {
sc <- spark_connection(x$.jobj)

# Default keep all columns
if (is.null(non_spatial_cols)) {
if (!is.null(invoke(x$.jobj, "%>%", list("fieldNames")))) { ## Only if dataset has field names
Expand All @@ -82,7 +82,7 @@ as.spark.dataframe <- function(x, non_spatial_cols = NULL, name = NULL) {
} else {
stopifnot("non_spatial_cols needs to be a character vector (or NULL, default)" = is.character(non_spatial_cols))
}

sdf <- invoke_static(
sc,
"org.apache.sedona.sql.utils.Adapter",
Expand Down
2 changes: 1 addition & 1 deletion R/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ home:
title: Sedona Project
text: >
[Homepage](https://sedona.apache.org/)
reference:
- title: "Reading and Writing Spatial DataFrames"
desc: "Functions for reading and writing Spark DataFrames."
Expand Down
4 changes: 2 additions & 2 deletions R/tests/testthat/helper-initialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ testthat_spark_connection <- function(conn_retry_interval_s = 2) {
for (attempt in seq(conn_attempts)) {
success <- tryCatch(
{

config <- spark_config()
config[["sparklyr.connect.timeout"]] <- 300

sc <- spark_connect(
master = "local",
method = "shell",
Expand Down
2 changes: 1 addition & 1 deletion R/tests/testthat/test-crs-transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ test_that("crs_transform() works as expected", {
type = "point"
) %>%
crs_transform("epsg:4326", "epsg:3857")

# expect_equivalent(
# pt_rdd %>%
# sdf_register() %>%
Expand Down
Loading

0 comments on commit 5ae8243

Please sign in to comment.