Arithmetic is slow

Based on profiling, it's slow for a large number of geometries because of the `mapply` here: https://github.com/r-spatial/sf/blob/cfc321aa33633017274d6832589021417e21e700/R/arith.R#L152-L163 That's an R loop, but it would be nice to push it into C++. Here's a reprex:

```r
library(sf)
library(sfheaders)

bbox_use <- st_bbox(c(xmin = 0, xmax = 100, ymin = 0, ymax = 200)) |> st_as_sfc()
foo <- st_sample(bbox_use, 2e4)
M <- matrix(c(cos(pi/4), sin(pi/4), -sin(pi/4), cos(pi/4)), nrow = 2)
v <- c(140, 34)
profvis::profvis(foo_rot <- foo * M + v)
```

<img width="449" alt="Screenshot 2024-04-10 at 9 13 06 PM" src="https://github.com/r-spatial/sf/assets/32186361/95d97e13-6633-4625-bcf3-cc74eec92847">

It's way faster to get the coordinates, apply the affine transformation, and reconstruct the sf.

```r
op2 <- function(df, M, v) {
    coords <- st_coordinates(df)
    coords <- sweep(coords %*% M, 2, v, FUN = "+")
    colnames(coords) <- c("X", "Y")
    st_as_sf(as.data.frame(coords), coords = c("X", "Y"))
}
foo_rot2 <- op2(foo, M, v)
all(length(st_equals(foo_rot, foo_rot2)))
# TRUE

bench::mark(check = FALSE,
            foo * M + v,
            op2(foo, M, v))
# expression          min   median   mem_alloc
# <bch:expr>     <bch:tm> <bch:tm>   <bch:byt>
# foo * M + v       3.04s    3.04s      2.75MB
# op2(foo, M, v)   8.28ms   9.52ms      2.54MB
```

Also faster to do the transformation on the coordinates and reconstructing the sf with `sfheaders` for polygons, though not sure if you want to introduce a dependency. Plus for `sfheaders`, the coordinates data frame must be sorted by the geometry ID. However, while it's faster, it also uses more memory.

```r
op2_poly <- function(df, M, v) {
    coords <- st_coordinates(df)
    coords_xy <- coords[,c("X", "Y")]
    coords_xy <- sweep(coords_xy %*% M, 2, v, FUN = "+")
    colnames(coords_xy) <- c("X","Y")
    coords_df <- cbind(as.data.frame(coords_xy), coords[,c("L1", "L2")])
    sf_polygon(coords_df, x = "X", y = "Y", polygon_id = "L2")
}
system.time(bar_rot2 <- op2_poly(bar, M, v))
all(length(st_equals(bar_rot, bar_rot2)))
# TRUE

bench::mark(check = FALSE,
            bar * M + v,
            op2_poly(bar, M, v))
# expression             min median mem_alloc
# <bch:expr>          <bch:> <bch:>  <bch:byt>
# bar * M + v          4.08s  4.08s     163MB
# op2_poly(bar, M, v)   2.2s   2.2s     771MB
```

The slow part is because of the number of geometries rather than the number of vertices. It's much faster when I group the 20,000 points in the toy example into 500 MULTIPOINT geometries, but still calling `sfheaders` is so much faster.

```r
baz_coords <- st_coordinates(foo)
baz_coords <- as.data.frame(baz_coords)
baz_coords$L1 <- sample(1:500, nrow(baz_coords), replace = TRUE)
baz_coords <- baz_coords[order(baz_coords$L1),]
baz <- sf_multipoint(baz_coords, "X", "Y", multipoint_id = "L1")
system.time(baz_rot <- baz * M + v)

op2_mp <- function(df, M, v) {
    coords <- st_coordinates(df)
    coords_xy <- coords[,c("X", "Y")]
    coords_xy <- sweep(coords_xy %*% M, 2, v, FUN = "+")
    colnames(coords_xy) <- c("X","Y")
    coords_df <- cbind(as.data.frame(coords_xy), coords[,"L1",drop = FALSE])
    sf_multipoint(coords_df, x = "X", y = "Y", multipoint_id = "L1")
}
system.time(baz_rot2 <- op2_mp(baz, M, v))
all(length(st_equals(baz_rot$geometry, baz_rot2$geometry)))
# TRUE

bench::mark(check = FALSE,
            baz * M + v,
            op2_mp(baz, M, v))
# expression            min  median mem_alloc
# <bch:expr>       <bch:tm> <bch:t>  <bch:byt>
# baz * M + v      220.99ms 229.5ms    2.06MB
# op2_mp(baz, M, …   6.33ms   6.7ms     4.2MB
```

	ret = switch(
	.Generic,
	"&" = mapply(function(x, y) { x & y }, e1, e2, SIMPLIFY = FALSE),
	"\|" = mapply(function(x, y) { x \| y }, e1, e2, SIMPLIFY = FALSE),
	"%/%" = mapply(function(x, y) { x %/% y}, e1, e2, SIMPLIFY = FALSE),
	"/" = mapply(function(x, y) { x / y }, e1, e2, SIMPLIFY = FALSE),
	"!=" = mapply(function(x, y) { x != y }, e1, e2, SIMPLIFY = TRUE),
	"==" = mapply(function(x, y) { x == y }, e1, e2, SIMPLIFY = TRUE),
	"" = mapply(function(x, y) { x y }, e1, e2, SIMPLIFY = FALSE),
	"+" = mapply(function(x, y) { x + y }, e1, e2, SIMPLIFY = FALSE),
	"-" = mapply(function(x, y) { x - y }, e1, e2, SIMPLIFY = FALSE),
	"%%" = mapply(function(x, y) { x %% y }, e1, e2, SIMPLIFY = FALSE),

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Arithmetic is slow #2376

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Arithmetic is slow #2376

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions