Merge pull request #46 from statistikat/tibble_pkg

Tibble pkg
statistikat · Jul 18, 2024 · a24efcc · a24efcc
2 parents 3bd6c7d + 144da68
commit a24efcc
Show file tree

Hide file tree

Showing 70 changed files with 1,532 additions and 400 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: STATcubeR
 Title: R interface for the STATcube REST API and Open Government Data
-Version: 0.5.0
+Version: 0.5.2
 Authors@R: c(
     person("Gregor", "de Cillia", , "Gregor.deCillia@statistik.gv.at", role = "aut"),
     person("Bernhard", "Meindl", , "Bernhard.Meindl@statistik.gv.at", role = "ctb"),
@@ -15,14 +15,17 @@ Description: Import data from the STATcube REST API or from the open data
 License: GPL (>= 2)
 URL: https://statistikat.github.io/STATcubeR,
     https://github.com/statistikat/STATcubeR
+BugReports: https://github.com/statistikat/STATcubeR/issues
 Imports: 
     cli (>= 3.4.1),
     httr,
     jsonlite,
-    magrittr
-Suggests: 
+    magrittr,
+    pillar (>= 1.5.0),
+    vctrs (>= 0.5.2)
+Suggests:
+    spelling, 
     data.tree,
-    pillar,
     rappdirs,
     xml2,
     reactable,
@@ -32,3 +35,4 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
+Language: en-US
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,16 +2,29 @@
 
 S3method(as.character,od_json)
 S3method(as.character,sc_json)
+S3method(as.character,sc_schema_uri)
 S3method(as.data.frame,sc_data)
+S3method(format,pillar_shaft_ogd_file)
+S3method(format,sc_schema_uri)
+S3method(format,sdmx_table)
+S3method(pillar_shaft,ogd_file)
+S3method(pillar_shaft,sc_dttm)
+S3method(pillar_shaft,sc_schema_type)
+S3method(pillar_shaft,sc_schema_uri)
 S3method(print,od_cache_file)
 S3method(print,od_json)
 S3method(print,od_revisions)
 S3method(print,od_table)
 S3method(print,sc_rate_limit_table)
 S3method(print,sc_schema)
+S3method(print,sc_schema_flatten)
 S3method(print,sc_table)
 S3method(print,sc_tibble_meta)
 S3method(print,sc_url)
+S3method(print,sdmx_description)
+S3method(tbl_format_footer,sc_meta)
+S3method(tbl_sum,sc_meta)
+S3method(tbl_sum,sc_tibble)
 export("%>%")
 export(od_cache_clear)
 export(od_cache_dir)
@@ -53,6 +66,7 @@ export(sc_last_error_parsed)
 export(sc_rate_limit_schema)
 export(sc_rate_limit_table)
 export(sc_rate_limits)
+export(sc_recode)
 export(sc_schema)
 export(sc_schema_catalogue)
 export(sc_schema_db)
@@ -62,6 +76,10 @@ export(sc_table_custom)
 export(sc_table_saved)
 export(sc_table_saved_list)
 export(sc_tabulate)
+export(sdmx_table)
 importFrom(magrittr,"%<>%")
 importFrom(magrittr,"%>%")
 importFrom(magrittr,"%T>%")
+importFrom(pillar,pillar_shaft)
+importFrom(pillar,tbl_format_footer)
+importFrom(pillar,tbl_sum)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+# Upcoming (0.6.0)
+
+* Update print methods with the `{tibble}` package (#32)
+
+# STATcubeR 0.5.2
+
+* Add filters and other recodes to `sc_table_custom()` (#33)
+* Add global option `STATcubeR.language` to override the default language
+* `od_table()`: Add descriptions to `x$header` and `x$field(i)`
+* Depend on cli >= 3.4.1 (@matmo, #35)
+* Allow json strings in `sc_table()` (@matmo, #36)
+* add `sdmx_table()` to import sdmx archives (.zip)
+
 # STATcubeR 0.5.0
 
 * adapt `od_list()` to data.statistik.at update ([`2249b66`](https://github.com/statistikat/STATcubeR/commit/2249b6607cb822a4aac56c6258cbe967832171f1))
@@ -38,7 +51,7 @@
 * Allow recodes of `sc_data` objects (#17)
 * Better parsing of time variables (#15, #16)
 * Use bootstrap 5  and `{pkgdown}` 2.0.0 for the website
-* Allow export and import of open data using tar archves (#20)
+* Allow export and import of open data using tar archives (#20)
 
 # STATcubeR 0.2.4
 
@@ -85,7 +98,7 @@ This version finalizes #11
 https://data.statistik.gv.at/
 
 * new class `od_table` to get OGD data
-* methods to tabulate reponses
+* methods to tabulate responses
 * caching
 * four new pkgdown articles for `od_table()`, `od_list()`, `od_resource()` and `sc_data`
 

diff --git a/R/browse.R b/R/browse.R
@@ -14,7 +14,7 @@ sc_browse <- function(server = "ext") {
   sc_url(sc_url_gui(server), "home")
 }
 
-#' @describeIn sc_browse opens the preference menu with the api key
+#' @describeIn sc_browse opens the preference menu with the API key
 #' @examples
 #' sc_browse_preferences()
 #' @export
@@ -75,7 +75,7 @@ in_stat <- function() {
 }
 
 sc_url_gui <- function(server = "ext") {
-  if (server == "ext" && !in_stat())
+  if (server == "ext" && (!in_stat() || Sys.getenv("NOT_CRAN") != ""))
     return("https://portal.statistik.at/statistik.at/ext/statcube/")
   if (server == "test")
     return("http://sdbtest:8081/statistik.at/wdev/statcube/")

diff --git a/R/cache.R b/R/cache.R
@@ -10,7 +10,7 @@
 #' Caching can be set up using environment variables. To set up a persistent cache
 #' for both Open Data and the REST API, the following lines in `.Renviron` can
 #' be used.
-#' The paths in this example are only applicalble for UNIX-based operating systems.
+#' The paths in this example are only applicable for UNIX-based operating systems.
 #'
 #' ```sh
 #' STATCUBE_KEY_EXT   = YOUR_API_KEY_GOES_HERE
@@ -23,7 +23,7 @@
 #' Caching is not implemented for the
 #' endpoints [sc_info()] and [sc_rate_limit_table()].
 #' @rdname sc_cache
-#' @param verbose print instuctions on how to set up caching persistently
+#' @param verbose print instructions on how to set up caching persistently
 #'   via environment variables?
 #' @name sc_cache
 NULL
@@ -49,14 +49,14 @@ sc_cache_disable <- function() {
   Sys.unsetenv("STATCUBE_CACHE")
 }
 
-#' @describeIn sc_cache informs wether the cache is currently enabled
+#' @describeIn sc_cache informs whether the cache is currently enabled
 #' @export
 sc_cache_enabled <- function() {
   Sys.getenv("STATCUBE_CACHE") != ""
 }
 
 #' @export
-#' @param dir a chace directory
+#' @param dir a cache directory
 #' @describeIn sc_cache get/set the directory used for caching
 sc_cache_dir <- function(dir = NULL) {
   if (is.null(dir))

diff --git a/R/error.R b/R/error.R
@@ -1,15 +1,15 @@
 #' Error handling for the STATcube REST API
 #'
 #' @description
-#' In case API requests are unsuccessfull, `STATcubeR` will throw errors
+#' In case API requests are unsuccessful, `STATcubeR` will throw errors
 #' to summarize the httr error type and its meaning.
-#' Requests are considered unsuccessfull if one of the following applies
+#' Requests are considered unsuccessful if one of the following applies
 #' * The response returns `TRUE` for `httr::http_error()`.
 #' * The response is not of type `"application/json"`
 #'
 #' In some cases it is useful to get direct access to a faulty response object.
 #' For that purpose, it is possible to use [sc_last_error()] which will provide
-#' the httr response object for the last unsuccessfull request.
+#' the httr response object for the last unsuccessful request.
 #' @return The return value from `httr::GET()` or `httr::POST()`.
 #' @examplesIf sc_key_exists()
 #' try(sc_table_saved("invalid_id"))

diff --git a/R/od_cache.R b/R/od_cache.R
@@ -15,7 +15,7 @@
 #' od_downloads()
 #' @details
 #' [od_cache_summary()] provides an overview of all contents of the cache through
-#' a data.frame. It hasone row for each dataset and the following columns.
+#' a data.frame. It has one row for each dataset and the following columns.
 #' All file sizes are given in bytes
 #' - **`id`** the dataset id
 #' - **`updated`** the last modified time for `${id}.json`
@@ -41,7 +41,7 @@ od_cache_summary <- function(server = "ext") {
   field <- substr(files[is_field], 1 + pos_underscore[is_field], nchar(files[is_field]) - 4)
   id <- substr(files[is_field], 1, pos_underscore[is_field] - 1)
   sizes_fields <- file.size(file.path(od_cache_dir(), files[is_field])) %>% split(id) %>% sapply(sum)
-  fields <- data.frame(id, field, stringsAsFactors = FALSE)
+  fields <- list(id = id, field = field)
 
   files <- files[!is_field]
   pos_underscore <- as.integer(gregexpr("_HEADER", files))
@@ -50,17 +50,18 @@ od_cache_summary <- function(server = "ext") {
   files <- files[!is_header]
   id_data <- substr(files, 1, nchar(files) - 4)
   all_ids <- unique(c(id_data, id_header, fields$id))
-  data.frame(
-    id = all_ids,
+  res <- data_frame(
+    id = all_ids %>% `class<-`(c("ogd_id", "character")),
     updated = file.mtime(paste0(cache_dir, all_ids, ".json")),
     json = file.size(paste0(cache_dir, all_ids, ".json")),
     data = file.size(paste0(cache_dir, all_ids, ".csv")),
     header = file.size(paste0(cache_dir, all_ids, "_HEADER.csv")),
     fields = sizes_fields[match(unique(fields$id), all_ids)],
     n_fields = match(fields$id, all_ids) %>% factor(seq_along(all_ids)) %>%
-      table() %>% as.integer(),
-    row.names = NULL, stringsAsFactors = FALSE
-  ) %>% `class<-`(c("tbl", "data.frame"))
+      table() %>% as.integer()
+  )
+  class(res$updated) <- c("sc_dttm", class(res$updated))
+  res
 }
 
 

diff --git a/R/od_list.R b/R/od_list.R
@@ -3,7 +3,7 @@
 #' [od_list()] returns a `data.frame ` containing all datasets published at
 #' [data.statistik.gv.at](https://data.statistik.gv.at)
 #'
-#' @param unique some datasets are pulbished under multiple groups.
+#' @param unique some datasets are published under multiple groups.
 #'   They will only be listed once with the first group they appear in unless
 #'   this parameter is set to `FALSE`.
 #' @param server the open data server to use. Either `ext` for the external
@@ -43,11 +43,10 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
     xml2::xml_find_all(".//a")
 
   # ids
-  df <- data.frame(
-    category = "NA",
+  df <- data_frame(
+    category = rep("NA", length(el)),
     id = el %>% xml2::xml_attr("aria-label"),
-    label = el %>% xml2::xml_text(),
-    stringsAsFactors = FALSE
+    label = el %>% xml2::xml_text()
   )
 
   ignored_labels <- c("[Alle \u00f6ffnen]", "[Alle schlie\u00dfen]",
@@ -67,7 +66,9 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
   df <- df[!(df$id %in% od_resource_blacklist), ]
   rownames(df) <- NULL
   attr(df, "od") <- r$times[["total"]]
-  df %>% `class<-`(c("tbl", "data.frame"))
+  class(df$id) <- c("ogd_id", "character")
+  class(df) <- c("tbl_df", class(df))
+  df
 }
 
 #' Get a catalogue for OGD datasets
@@ -95,7 +96,7 @@ od_list <- function(unique = TRUE, server = c("ext", "red")) {
 #' |json      |`list<od_json>`| Full json metadata
 #'
 #' The type `datetime` refers to the `POSIXct` format as returned by [Sys.time()].
-#' The last column `"json"` containes the full json metadata as returned by
+#' The last column `"json"` contains the full json metadata as returned by
 #' [od_json()].
 #'
 #' @inheritParams od_table
@@ -120,7 +121,15 @@ od_catalogue <- function(server = "ext", local = TRUE) {
     ids <- od_revisions(server = server)
   }
   timestamp <- switch(as.character(local), "TRUE" = NULL, "FALSE" = Sys.time())
-  jsons <- lapply(ids, od_json, timestamp, server)
+  jsons <- lapply(
+    cli::cli_progress_along(
+      ids, type = "tasks", "downloading json metadata files"),
+    function(i) {
+      od_json(ids[i], timestamp, server)
+    }
+  )
+  if (!local)
+    cli::cli_text("\rDownloaded {.field {length(ids)}} metadata files with {.fn od_json}")
   as_df_jsons(jsons)
 }
 
@@ -130,7 +139,7 @@ as_df_jsons <- function(jsons) {
   }
 
   descs <- sapply(jsons, function(x) x$extras$attribute_description) %>% paste0(";", .)
-  out <- data.frame(
+  out <- data_frame(
     title = sapply(jsons, function(x) x$title),
     measures = gregexpr(";F-", descs) %>% sapply(length),
     fields = gregexpr(";C-", descs) %>% sapply(length),
@@ -145,12 +154,11 @@ as_df_jsons <- function(jsons) {
     update_frequency = sapply(jsons, function(x) x$extras$update_frequency),
     tags = I(lapply(jsons, function(x) unlist(x$tags))),
     categorization = sapply(jsons, function(x) unlist(x$extras$categorization[1])),
-    json = I(jsons),
-    stringsAsFactors = FALSE
+    json = I(jsons)
   )
   out$modified <- parse_time(out$modified)
   out$created <- parse_time(out$created)
-  class(out) <- c("tbl", class(out))
+  class(out$id) <- c("ogd_id", "character")
   out
 }
 
diff --git a/R/od_resource.R b/R/od_resource.R
@@ -20,8 +20,9 @@ od_resource_blacklist <- c(
 )
 
 od_resource_check_id <- function(id) {
-  if (substr(id, 1, 4) != "OGD_")
-    stop("Dataset ids must begin with \"OGD_\": ", shQuote(id), call. = FALSE)
+  if (!grepl("^OGD_", id) && !grepl("^STAT_", id))
+    stop("Dataset ids must begin with \"OGD_\" or \"STAT_\": ",
+         shQuote(id), call. = FALSE)
   if (id %in% od_resource_blacklist)
     stop("Dataset ", shQuote(id), " was blacklisted in STATcubeR ",
          "because of inconsistent formats", call. = FALSE)
@@ -141,15 +142,14 @@ od_resource_parse_all <- function(resources, server = "ext") {
   })
   od <- lapply(parsed, attr, "od")
 
-  data.frame(
+  data_frame(
     name = sapply(resources, function(x) x$name),
     last_modified = lapply(od, function(x) x$last_modified) %>% do.call(c, .),
     cached = lapply(od, function(x) x$cached) %>% do.call(c, .),
     size = sapply(od, function(x) x$size),
     download = vapply(od, function(x) x$download, 1.0),
     parsed = sapply(od, function(x) x$parsed),
-    data = I(parsed %>% lapply(`attr<-`, "od", NULL)),
-    stringsAsFactors = FALSE
+    data = I(parsed %>% lapply(`attr<-`, "od", NULL))
   )
 }
 
@@ -171,9 +171,9 @@ od_resources_check <- function(json) {
 
 od_normalize_columns <- function(x, suffix) {
   if (!is.null(suffix)) {
-    col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)))
+    col_indices <- c(1, 2, 2, switch(suffix, HEADER = 3, c(4, 3)), 5, 7)
     col_names <- c("code", "label", "label_de", "label_en",
-                   switch(suffix, HEADER = NULL, "parent"))
+                   switch(suffix, HEADER = NULL, "parent"), "de_desc", "en_desc")
     x <- x[, col_indices] %>% `names<-`(col_names)
     x$label <- NA_character_
     x$label_en <- as.character(x$label_en)
@@ -224,5 +224,8 @@ od_resource_all <- function(id, json = od_json(id), server = "ext") {
   check_header(out$data[[2]])
   out$data[[2]] %<>% od_normalize_columns("HEADER")
   out$data[seq(3, nrow(out))] %<>% lapply(od_normalize_columns, "FIELD")
-  out %>% `class<-`(c("tbl", "data.frame"))
+  class(out$name) <- c("ogd_file", "character")
+  class(out$last_modified) <- c("sc_dttm", class(out$last_modified))
+  class(out$cached) <- c("sc_dttm", class(out$cached))
+  out
 }