Skip to content

Commit 5d6f7a7

Browse files
committed
First version of cache implementation with new knitr API
1 parent fe4cd9f commit 5d6f7a7

File tree

5 files changed

+135
-101
lines changed

5 files changed

+135
-101
lines changed

R/config.R

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -756,11 +756,6 @@ python_config <- function(python,
756756
}
757757
}
758758

759-
as_numeric_version <- function(version) {
760-
version <- clean_version(version)
761-
numeric_version(version)
762-
}
763-
764759
# check for numpy
765760
numpy <- NULL
766761
if (!is.null(config$NumpyPath)) {
@@ -909,8 +904,9 @@ is_rstudio_desktop <- function() {
909904
identical(version$mode, "desktop")
910905
}
911906

912-
clean_version <- function(version) {
913-
gsub("\\.$", "", gsub("[A-Za-z_+].*$", "", version))
907+
as_numeric_version <- function(version) {
908+
version <- sub("\\.$", "", sub("[A-Za-z_+].*$", "", version))
909+
numeric_version(version)
914910
}
915911

916912
reticulate_python_versions <- function() {

R/knitr-cache.R

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#' A reticulate cache engine for Knitr
2+
#'
3+
#' This provides a `reticulate` cache engine for `knitr`. The cache engine
4+
#' allows `knitr` to save and load Python sessions between cached chunks. The
5+
#' cache engine depends on the `dill` Python module. Therefore, you must have
6+
#' `dill` installed in your Python environment.
7+
#'
8+
#' The engine can be activated by setting (for example)
9+
#'
10+
#' ```
11+
#' knitr::cache_engines$set(python = reticulate::cache_eng_python)
12+
#' ```
13+
#'
14+
#' Typically, this will be set within a document's setup chunk, or by the
15+
#' environment requesting that Python chunks be processed by this engine.
16+
#'
17+
#' @param options
18+
#' List of chunk options provided by `knitr` during chunk execution.
19+
#' Contains the caching path.
20+
#'
21+
#' @export
22+
cache_eng_python <- (function() {
23+
check_cache_available <- function() {
24+
# does the python version is supported by 'dill'?
25+
if (py_version() < "3.7") {
26+
warning("Python cache requires Python version >= 3.7")
27+
return(FALSE)
28+
}
29+
30+
# is the module 'dill' loadable?
31+
dill <- tryCatch(import("dill"), error = identity)
32+
if (inherits(dill, "error")) {
33+
error <- reticulate::py_last_error()
34+
if (!error$type %in% c("ImportError", "ModuleNotFoundError"))
35+
stop(error$value, call. = FALSE)
36+
warning("The Python module 'dill' was not found, it's required for Python cache")
37+
return(FALSE)
38+
}
39+
40+
# is the 'dill' version recent enough?
41+
dill_version <- as_numeric_version(dill$`__version__`)
42+
if (dill_version < "0.3.6") {
43+
warning("Python cache requires module dill>=0.3.6")
44+
return(FALSE)
45+
}
46+
47+
# Python cache is available
48+
TRUE
49+
}
50+
51+
cache_available <- function() {
52+
available <- knitr::opts_knit$get("reticulate.cache")
53+
if (is.null(available)) {
54+
available <- check_cache_available()
55+
knitr::opts_knit$set(reticulate.cache = available)
56+
}
57+
available
58+
}
59+
60+
cache_path <- function(path) {
61+
paste(path, "pkl", sep=".")
62+
}
63+
64+
cache_exists <- function(options) {
65+
file.exists(cache_path(options$hash))
66+
}
67+
68+
cache_load <- function(options) {
69+
eng_python_initialize(options, envir = environment())
70+
if (!cache_available()) return()
71+
dill <- import("dill")
72+
dill$load_module(filename = cache_path(options$hash), module = "__main__")
73+
}
74+
75+
filter <- NULL
76+
r_obj_filter <- function() {
77+
if (is.null(filter)) {
78+
filter <<- py_eval("lambda obj: obj.name == 'r' and type(obj.value) is __builtins__.__R__")
79+
}
80+
filter
81+
}
82+
83+
cache_save <- function(options) {
84+
if (!cache_available()) return()
85+
dill <- import("dill")
86+
tryCatch({
87+
dill$dump_module(cache_path(options$hash), refimported = TRUE, exclude = r_obj_filter())
88+
}, error = function(e) {
89+
cache_purge(options$hash)
90+
stop(e)
91+
})
92+
}
93+
94+
cache_purge <- function(glob_path) {
95+
unlink(cache_path(glob_path))
96+
}
97+
98+
list(exists = cache_exists, load = cache_load, save = cache_save, purge = cache_purge)
99+
})()

R/knitr-engine.R

Lines changed: 1 addition & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,6 @@ eng_python <- function(options) {
253253
outputs$push(output)
254254
}
255255

256-
if (options$cache > 0) {
257-
save_python_session(options$hash)
258-
}
259-
260256
# if we had held outputs, add those in now (merging text output as appropriate)
261257
text_output <- character()
262258

@@ -305,16 +301,6 @@ eng_python_initialize <- function(options, envir) {
305301
ensure_python_initialized()
306302
eng_python_initialize_hooks(options, envir)
307303

308-
if (options$cache > 0) {
309-
module <- tryCatch(import("dill"), error = identity)
310-
if (inherits(module, "error")) {
311-
if (module$message == "ImportError: No module named dill") {
312-
warning("The Python module dill was not found. This module is needed for full cache functionality.")
313-
} else {
314-
stop(module$message)
315-
}
316-
}
317-
}
318304
}
319305

320306
eng_python_knit_figure_path <- function(options, suffix = NULL) {
@@ -416,7 +402,7 @@ eng_python_initialize_matplotlib <- function(options, envir) {
416402
if ("matplotlib.backends" %in% names(sys$modules)) {
417403
matplotlib$pyplot$switch_backend("agg")
418404
} else {
419-
version <- numeric_version(matplotlib$`__version__`)
405+
version <- as_numeric_version(matplotlib$`__version__`)
420406
if (version < "3.3.0")
421407
matplotlib$use("agg", warn = FALSE, force = TRUE)
422408
else
@@ -661,52 +647,3 @@ eng_python_autoprint <- function(captured, options, autoshow) {
661647
}
662648

663649
}
664-
665-
save_python_session <- function(cache_path) {
666-
module <- tryCatch(import("dill"), error = identity)
667-
if (inherits(module, "error")) {
668-
if (module$message == "ImportError: No module named dill") return()
669-
signalCondition(module$message)
670-
}
671-
672-
r_obj_exists <- "'r' in globals()"
673-
r_is_R <- "type(r).__module__ == '__main__' and type(r).__name__ == 'R'"
674-
if (py_eval(r_obj_exists) && py_eval(r_is_R)) {
675-
py_run_string("del globals()['r']")
676-
}
677-
678-
cache_path <- file.path(knitr::opts_knit$get("output.dir"), cache_path)
679-
module$dump_session(filename = paste0(cache_path, ".pkl"), byref = TRUE)
680-
}
681-
682-
#' A reticulate cache engine for Knitr
683-
#'
684-
#' This provides a `reticulate` cache engine for `knitr`. The cache engine
685-
#' allows `knitr` to save and load Python sessions between cached chunks. The
686-
#' cache engine depends on the `dill` Python module. Therefore, you must have
687-
#' `dill` installed in your Python environment.
688-
#'
689-
#' The engine can be activated by setting (for example)
690-
#'
691-
#' ```
692-
#' knitr::cache_engines$set(python = reticulate::cache_eng_python)
693-
#' ```
694-
#'
695-
#' Typically, this will be set within a document's setup chunk, or by the
696-
#' environment requesting that Python chunks be processed by this engine.
697-
#'
698-
#' @param options
699-
#' List of chunk options provided by `knitr` during chunk execution.
700-
#' Contains the caching path.
701-
#'
702-
#' @export
703-
cache_eng_python <- function(options) {
704-
module <- tryCatch(import("dill"), error = identity)
705-
if (inherits(module, "error")) {
706-
if (module$message == "ImportError: No module named dill") return()
707-
stop(module$message)
708-
}
709-
710-
cache_path <- normalizePath(paste0(options$hash, ".pkl"), mustWork = TRUE)
711-
knitr:::in_input_dir(module$load_session(filename = cache_path))
712-
}

R/python.R

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -274,14 +274,14 @@ as.environment.python.builtin.object <- function(x) {
274274
if (inherits(x, "python.builtin.dict")) {
275275

276276
names <- py_dict_get_keys_as_str(x)
277-
names <- names[substr(names, 1, 1) != '_']
277+
names <- names[substr(names, 1, 1) != "_"]
278278
Encoding(names) <- "UTF-8"
279279
types <- rep_len(0L, length(names))
280280

281281
} else {
282282
# get the names and filter out internal attributes (_*)
283283
names <- py_suppress_warnings(py_list_attributes(x))
284-
names <- names[substr(names, 1, 1) != '_']
284+
names <- names[substr(names, 1, 1) != "_"]
285285
# replace function with `function`
286286
names <- sub("^function$", "`function`", names)
287287
names <- sort(names, decreasing = FALSE)
@@ -1351,43 +1351,45 @@ py_filter_classes <- function(classes) {
13511351
}
13521352

13531353
py_inject_r <- function() {
1354-
13551354
# don't inject 'r' if there's already an 'r' object defined
13561355
main <- import_main(convert = FALSE)
13571356
if (py_has_attr(main, "r"))
13581357
return(FALSE)
13591358

1360-
# define our 'R' class
1361-
py_run_string("class R(object): pass")
1362-
1363-
# extract it from the main module
1364-
main <- import_main(convert = FALSE)
1365-
R <- main$R
1359+
builtins <- import_builtins(convert = FALSE)
1360+
if (!py_has_attr(builtins, "__R__")) {
1361+
# define our 'R' class
1362+
py_run_string("class R(object): pass")
1363+
R <- main$R
1364+
1365+
# copy it to 'builtins'
1366+
py_set_attr(builtins, "__R__", R)
1367+
1368+
# remove the 'R' class object from '__main__'
1369+
py_del_attr(main, "R")
1370+
1371+
# define the getters, setters we'll attach to the Python class
1372+
getter <- function(self, code) {
1373+
envir <- py_resolve_envir()
1374+
object <- eval(parse(text = as_r_value(code)), envir = envir)
1375+
r_to_py(object, convert = is.function(object))
1376+
}
13661377

1367-
# define the getters, setters we'll attach to the Python class
1368-
getter <- function(self, code) {
1369-
envir <- py_resolve_envir()
1370-
object <- eval(parse(text = as_r_value(code)), envir = envir)
1371-
r_to_py(object, convert = is.function(object))
1372-
}
1378+
setter <- function(self, name, value) {
1379+
envir <- py_resolve_envir()
1380+
name <- as_r_value(name)
1381+
value <- as_r_value(value)
1382+
assign(name, value, envir = envir)
1383+
}
13731384

1374-
setter <- function(self, name, value) {
1375-
envir <- py_resolve_envir()
1376-
name <- as_r_value(name)
1377-
value <- as_r_value(value)
1378-
assign(name, value, envir = envir)
1385+
py_set_attr(R, "__getattr__", getter)
1386+
py_set_attr(R, "__setattr__", setter)
1387+
py_set_attr(R, "__getitem__", getter)
1388+
py_set_attr(R, "__setitem__", setter)
13791389
}
13801390

1381-
py_set_attr(R, "__getattr__", getter)
1382-
py_set_attr(R, "__setattr__", setter)
1383-
py_set_attr(R, "__getitem__", getter)
1384-
py_set_attr(R, "__setitem__", setter)
1385-
13861391
# now define the R object
1387-
py_run_string("r = R()")
1388-
1389-
# remove the 'R' class object
1390-
py_del_attr(main, "R")
1392+
py_run_string("r = __R__()")
13911393

13921394
# indicate success
13931395
TRUE

R/testthat-helpers.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ skip_if_no_scipy <- function() {
109109
skip("scipy not available for testing")
110110

111111
scipy <- import("scipy")
112-
if (clean_version(scipy$`__version__`) < "1.0")
112+
if (as_numeric_version(scipy$`__version__`) < "1.0")
113113
skip("scipy version is less than v1.0")
114114

115115
}

0 commit comments

Comments
 (0)