Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

can indexing be made faster #1465

Open
maelle opened this issue Aug 20, 2024 · 2 comments
Open

can indexing be made faster #1465

maelle opened this issue Aug 20, 2024 · 2 comments
Labels
upkeep maintenance, infrastructure, and similar

Comments

@maelle
Copy link
Contributor

maelle commented Aug 20, 2024

or should we delete these two lines

## TODO: make it faster, don't need the whole matrix usually

## TODO: rewrite this in C to make it faster

@szhorvat
Copy link
Member

Yes, these are both important.

@maelle maelle added the upkeep maintenance, infrastructure, and similar label Aug 27, 2024
@schochastics
Copy link

Here is an incomplete suggestion for the first case

get_edge_ids <- igraph::get.edge.ids
adj_row <- function(x, i, j, attr, sparse, drop = TRUE) {
  if (is_directed(x)) {
    imode <- "out"
    jmode <- "in"
  } else {
    mode <- "all"
  }
  if (!missing(i)) {
    n1 <- length(i)
    n2 <- vcount(x)
    alist <- neighborhood(x, nodes = i, mode = imode, mindist = 1)

    rid <- rep(seq_along(alist), lengths(alist))
    cid <- unlist(alist)
    el <- elid <- cbind(rid, cid)
    el[, 1] <- i[el[, 1]]
    if (!missing(j)) {
      n2 <- length(j)
      idx <- el[, 2] %in% j
      el <- el[idx, , drop = FALSE]
      elid <- elid[idx, , drop = FALSE]
      elid[, 2] <- match(elid[, 2], j)
    }
    if (!sparse) {
      res <- matrix(0, n1, n2)
      if (missing(attr)) {
        res[elid] <- 1
      } else {
        eids <- get_edge_ids(x, c(t(el)))
        values <- edge_attr(x, attr, eids)
        res[elid] <- values
      }
    } else {
      if (missing(attr)) {
        res <- Matrix::sparseMatrix(i = elid[, 1], j = elid[, 2], x = 1, dims = c(n1, n2))
      } else {
        eid <- get_edge_ids(x, c(t(el)))
        values <- edge_attr(x, attr, eid)
        res <- Matrix::sparseMatrix(i = elid[, 1], j = elid[, 2], x = values, dims = c(n1, n2))
      }
    }
  } else if (missing(i) && missing(j)) {
    res <- as_adj(x, sparse = sparse, attr = attr)
  } else if (missing(i) && !missing(j)) {
    n2 <- length(j)
    n1 <- vcount(x)
    alist <- neighborhood(x, nodes = j, mode = jmode, mindist = 1)
    cid <- rep(seq_along(alist), lengths(alist))
    rid <- unlist(alist)
    el <- elid <- cbind(rid, cid)
    el[, 2] <- j[el[, 2]]

    if (!sparse) {
      res <- matrix(0, n1, n2)
      if (missing(attr)) {
        res[elid] <- 1
      } else {
        eids <- get_edge_ids(x, c(t(el)))
        values <- edge_attr(x, attr, eids)
        res[elid] <- values
      }
    } else {
      if (missing(attr)) {
        res <- Matrix::sparseMatrix(i = elid[, 1], j = elid[, 2], x = 1, dims = c(n1, n2))
      } else {
        eid <- get_edge_ids(x, c(t(el)))
        values <- edge_attr(x, attr, eid)
        res <- Matrix::sparseMatrix(i = elid[, 1], j = elid[, 2], x = values, dims = c(n1, n2))
      }
    }
  }
  res[, , drop = drop]
}

Became cumbersome quickly, but this solution does avoid computing the whole adjacency matrix whenever possible. Happy to prepare a PR if this looks ok to you (and you do not prefer a C implementation for this).

Here are some small tests/benchmarks

library(igraph) #2.0.3
#> 
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#> 
#>     decompose, spectrum
#> The following object is masked from 'package:base':
#> 
#>     union
set.seed(12345)
u <- igraph::sample_gnp(10, 0.5, directed = FALSE)
d <- igraph::sample_gnp(10, 0.25, directed = TRUE)
uw <- u
E(uw)$weight <- sample(1:5, replace = TRUE, ecount(u))

dw <- d
E(dw)$weight <- sample(1:5, replace = TRUE, ecount(d))
tst <- c(
  all(d[] == adj_row(d, sparse = TRUE, attr = NULL)),
  all(u[] == adj_row(u, sparse = TRUE, attr = NULL)),
  all(dw[] == adj_row(dw, sparse = TRUE, attr = "weight")),
  all(uw[] == adj_row(uw, sparse = TRUE, attr = "weight")),
  all(d[7, sparse = TRUE] == adj_row(d, i = 7, sparse = TRUE)),
  all(d[c(5, 1, 10), sparse = FALSE] == adj_row(d, i = c(5, 1, 10), sparse = FALSE)),
  all(d[c(2, 4, 6), sparse = TRUE] == adj_row(d, i = c(2, 4, 6), sparse = TRUE)),
  all(d[1:2, 3:7, sparse = FALSE] == adj_row(d, i = 1:2, j = 3:7, sparse = FALSE)),
  all(d[1:2, 3:7, sparse = TRUE] == adj_row(d, i = 1:2, j = 3:7, sparse = TRUE)),
  all(dw[1:2, 3:7, sparse = TRUE] == adj_row(dw, i = 1:2, j = 3:7, attr = "weight", sparse = TRUE)),
  all(dw[1:2, 3:7, sparse = FALSE] == adj_row(dw, i = 1:2, j = 3:7, attr = "weight", sparse = FALSE)),
  all(d[, 3:7, sparse = FALSE] == adj_row(dw, j = 3:7, sparse = FALSE)),
  all(d[, 3:7, sparse = TRUE] == adj_row(dw, j = 3:7, sparse = TRUE)),
  all(dw[, 3:7, sparse = FALSE] == adj_row(dw, j = 3:7, attr = "weight", sparse = FALSE)),
  all(dw[, 3:7, sparse = TRUE] == adj_row(dw, j = 3:7, attr = "weight", sparse = TRUE))
)

tst
#>  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

set.seed(123)
g <- sample_gnp(5000, 0.2, TRUE)
microbenchmark::microbenchmark(
  g[1:10, sparse = TRUE],
  adj_row(g, i = 1:10, sparse = TRUE),
  times = 10
)
#> Unit: milliseconds
#>                                 expr         min          lq       mean
#>               g[1:10, sparse = TRUE] 1265.301275 1319.177312 1367.91565
#>  adj_row(g, i = 1:10, sparse = TRUE)    2.790987    2.981989    3.18912
#>       median         uq       max neval
#>  1343.447250 1362.16232 1541.0282    10
#>     3.143579    3.41028    3.5749    10

E(g)$weight <- sample(1:100, ecount(g), replace = TRUE)

microbenchmark::microbenchmark(
  g[1:10, sparse = TRUE],
  adj_row(g, i = 1:10, attr = "weight", sparse = TRUE),
  times = 10
)
#> Unit: milliseconds
#>                                                  expr        min          lq
#>                                g[1:10, sparse = TRUE] 954.501190 1042.999906
#>  adj_row(g, i = 1:10, attr = "weight", sparse = TRUE)   9.413905    9.587471
#>        mean      median         uq        max neval
#>  1141.82152 1112.757054 1284.83868 1389.03092    10
#>    17.35286    9.908534   27.66706   48.15274    10

Created on 2024-09-22 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
upkeep maintenance, infrastructure, and similar
Projects
None yet
Development

No branches or pull requests

3 participants