From 75ace5d86556ae0dd29389901b88c8c5099921a2 Mon Sep 17 00:00:00 2001 From: Greg Sutcliffe Date: Tue, 14 Dec 2021 15:28:22 +0000 Subject: [PATCH 1/2] Add event_id so we can de-duplicate lists if needed --- R/utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utils.R b/R/utils.R index 2cff7ed..1ce949b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -15,6 +15,7 @@ api_url <- function(url, host = NULL, port = NULL) { process_events <- function(events) { tibble::tibble(event = events) |> tidyr::hoist(event, + id = "event_id", time = "origin_server_ts", type = "type", sender = "sender", From c794978bb443f7a648b6da2c59d0080eca60b816 Mon Sep 17 00:00:00 2001 From: Greg Sutcliffe Date: Tue, 14 Dec 2021 15:29:50 +0000 Subject: [PATCH 2/2] Make room_history handle multiple IDs --- DESCRIPTION | 1 + NAMESPACE | 2 +- R/room_history.R | 60 +++++++++++++++++++-------- man/{room_history.Rd => get_rooms.Rd} | 11 +++-- 4 files changed, 51 insertions(+), 23 deletions(-) rename man/{room_history.Rd => get_rooms.Rd} (58%) diff --git a/DESCRIPTION b/DESCRIPTION index 5742855..4165a57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: glue, httr, lubridate, + purrr, rlog, tibble, tidyr diff --git a/NAMESPACE b/NAMESPACE index 3dbe4e3..8f3126f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,5 @@ # Generated by roxygen2: do not edit by hand export(get_messages) -export(room_history) +export(get_rooms) export(sync) diff --git a/R/room_history.R b/R/room_history.R index 0c5dfd9..625dba7 100644 --- a/R/room_history.R +++ b/R/room_history.R @@ -1,31 +1,25 @@ #' Get the room events for a given room ID. #' -#' @param room_id The room to get data for. -#' @param since Stop paginating when reaching this time. +#' @param room_id The room to get data for. +#' @param since Stop paginating when reaching this time. +#' @param initial_sync Result of a prior call to [sync()]. #' #' @return A tibble containing event information. #' -#' @export -room_history <- function(room_id, since) { - # We expect since to be a POSIX datetime, but could be a string. - since <- lubridate::as_datetime(since) +#' @noRd +room_history <- function(room_id, since, initial_sync) { + # TODO: Add better configuration. + token <- Sys.getenv("token") + timeline <- initial_sync$rooms$join[[room_id]]$timeline + from <- timeline$prev_batch + + events <- process_events(timeline$events) + rlog::log_debug(glue::glue("Initial sync yielded {nrow(events)} events.")) rlog::log_info( glue::glue("Fetching events for room {room_id} since {since}.") ) - # TODO: Add better configuration. - token <- Sys.getenv("token") - - # Perform an initial sync and get events for the room. - initial_sync <- sync() - timeline <- initial_sync$rooms$join[[room_id]]$timeline - from <- timeline$prev_batch - - events <- process_events(timeline$events) - - rlog::log_debug(glue::glue("Initial sync yielded {nrow(events)} events.")) - while (TRUE) { oldest_time <- events |> dplyr::slice_min(time) |> @@ -55,3 +49,33 @@ room_history <- function(room_id, since) { events |> dplyr::filter(time >= since) } + +#' Get the room events for a given room ID. +#' +#' @param room_ids Vector of room IDs to iterate over. +#' @param since Stop paginating when reaching this time. +#' @param sync (Optional) Result of a prior call to [sync()] to save +#' duplication. +#' +#' @return A tibble containing event information. +#' +#' @export +get_rooms <- function(room_ids, since, sync = NULL) { + # We expect since to be a POSIX datetime, but could be a string. + since <- lubridate::as_datetime(since) + + rlog::log_debug(glue::glue("Getting history for {length(room_ids)} rooms.")) + + # Perform an initial sync and get events for the room. + initial_sync <- if (is.null(sync)) { + rlog::log_debug("Initial sync not provided - running sync() now.") + sync() + } else { + rlog::log_debug("Initial sync provided - not running sync().") + sync + } + + tidyr::tibble(room = room_ids) |> + dplyr::group_by(room) |> + dplyr::mutate(events = purrr::map(room, room_history, since, initial_sync)) +} diff --git a/man/room_history.Rd b/man/get_rooms.Rd similarity index 58% rename from man/room_history.Rd rename to man/get_rooms.Rd index 7977084..ce416c0 100644 --- a/man/room_history.Rd +++ b/man/get_rooms.Rd @@ -1,15 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/room_history.R -\name{room_history} -\alias{room_history} +\name{get_rooms} +\alias{get_rooms} \title{Get the room events for a given room ID.} \usage{ -room_history(room_id, since) +get_rooms(room_ids, since, sync = NULL) } \arguments{ -\item{room_id}{The room to get data for.} +\item{room_ids}{Vector of room IDs to iterate over.} \item{since}{Stop paginating when reaching this time.} + +\item{sync}{(Optional) Result of a prior call to \code{\link[=sync]{sync()}} to save +duplication.} } \value{ A tibble containing event information.