Description
Based on profiling, it's slow for a large number of geometries because of the mapply
here:
Lines 152 to 163 in cfc321a
library(sf)
library(sfheaders)
bbox_use <- st_bbox(c(xmin = 0, xmax = 100, ymin = 0, ymax = 200)) |> st_as_sfc()
foo <- st_sample(bbox_use, 2e4)
M <- matrix(c(cos(pi/4), sin(pi/4), -sin(pi/4), cos(pi/4)), nrow = 2)
v <- c(140, 34)
profvis::profvis(foo_rot <- foo * M + v)

It's way faster to get the coordinates, apply the affine transformation, and reconstruct the sf.
op2 <- function(df, M, v) {
coords <- st_coordinates(df)
coords <- sweep(coords %*% M, 2, v, FUN = "+")
colnames(coords) <- c("X", "Y")
st_as_sf(as.data.frame(coords), coords = c("X", "Y"))
}
foo_rot2 <- op2(foo, M, v)
all(length(st_equals(foo_rot, foo_rot2)))
# TRUE
bench::mark(check = FALSE,
foo * M + v,
op2(foo, M, v))
# expression min median mem_alloc
# <bch:expr> <bch:tm> <bch:tm> <bch:byt>
# foo * M + v 3.04s 3.04s 2.75MB
# op2(foo, M, v) 8.28ms 9.52ms 2.54MB
Also faster to do the transformation on the coordinates and reconstructing the sf with sfheaders
for polygons, though not sure if you want to introduce a dependency. Plus for sfheaders
, the coordinates data frame must be sorted by the geometry ID. However, while it's faster, it also uses more memory.
op2_poly <- function(df, M, v) {
coords <- st_coordinates(df)
coords_xy <- coords[,c("X", "Y")]
coords_xy <- sweep(coords_xy %*% M, 2, v, FUN = "+")
colnames(coords_xy) <- c("X","Y")
coords_df <- cbind(as.data.frame(coords_xy), coords[,c("L1", "L2")])
sf_polygon(coords_df, x = "X", y = "Y", polygon_id = "L2")
}
system.time(bar_rot2 <- op2_poly(bar, M, v))
all(length(st_equals(bar_rot, bar_rot2)))
# TRUE
bench::mark(check = FALSE,
bar * M + v,
op2_poly(bar, M, v))
# expression min median mem_alloc
# <bch:expr> <bch:> <bch:> <bch:byt>
# bar * M + v 4.08s 4.08s 163MB
# op2_poly(bar, M, v) 2.2s 2.2s 771MB
The slow part is because of the number of geometries rather than the number of vertices. It's much faster when I group the 20,000 points in the toy example into 500 MULTIPOINT geometries, but still calling sfheaders
is so much faster.
baz_coords <- st_coordinates(foo)
baz_coords <- as.data.frame(baz_coords)
baz_coords$L1 <- sample(1:500, nrow(baz_coords), replace = TRUE)
baz_coords <- baz_coords[order(baz_coords$L1),]
baz <- sf_multipoint(baz_coords, "X", "Y", multipoint_id = "L1")
system.time(baz_rot <- baz * M + v)
op2_mp <- function(df, M, v) {
coords <- st_coordinates(df)
coords_xy <- coords[,c("X", "Y")]
coords_xy <- sweep(coords_xy %*% M, 2, v, FUN = "+")
colnames(coords_xy) <- c("X","Y")
coords_df <- cbind(as.data.frame(coords_xy), coords[,"L1",drop = FALSE])
sf_multipoint(coords_df, x = "X", y = "Y", multipoint_id = "L1")
}
system.time(baz_rot2 <- op2_mp(baz, M, v))
all(length(st_equals(baz_rot$geometry, baz_rot2$geometry)))
# TRUE
bench::mark(check = FALSE,
baz * M + v,
op2_mp(baz, M, v))
# expression min median mem_alloc
# <bch:expr> <bch:tm> <bch:t> <bch:byt>
# baz * M + v 220.99ms 229.5ms 2.06MB
# op2_mp(baz, M, … 6.33ms 6.7ms 4.2MB