srvanderplas
diff --git a/‎_freeze/part-wrangling/08-functional-prog/execute-results/html.json‎
Lines changed: 2 additions & 2 deletions b/‎_freeze/part-wrangling/08-functional-prog/execute-results/html.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-25-1.png‎
122 KB b/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-25-1.png‎
122 KB
diff --git a/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-25-2.png‎
130 KB b/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-25-2.png‎
130 KB
diff --git a/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-26-1.png‎
162 KB b/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-26-1.png‎
162 KB
diff --git a/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-26-2.png‎
166 KB b/‎_freeze/part-wrangling/08-functional-prog/figure-html/unnamed-chunk-26-2.png‎
166 KB
diff --git a/‎part-wrangling/08-functional-prog.qmd‎
Lines changed: 35 additions & 10 deletions b/‎part-wrangling/08-functional-prog.qmd‎
Lines changed: 35 additions & 10 deletions
diff --git a/‎renv/activate.R‎
Lines changed: 5 additions & 17 deletions b/‎renv/activate.R‎
Lines changed: 5 additions & 17 deletions
@@ -417,7 +417,7 @@ res_tbl <- map_df(res_json, as_tibble) %>%
 writeLines(toJSON(res_tbl, pretty = TRUE), con = "../data/Star_Trek.json")
 ```
 
-![The Movie Database](../images/wrangling/tmdb.svg){fig-alt="The Movie Database logo"}
+![The Movie Database](../images/wrangling/tmdb.svg){fig-alt="The Movie Database logo" width="50%"}
 In this section we'll work with some data gathered from TMDB (the movie database). 
 I submitted a query for all movies that Patrick Stewart was involved with, and you can find the resulting JSON file [here](https://raw.githubusercontent.com/srvanderplas/stat-computing-r-python/main/data/Patrick_Stewart.json). 
 
@@ -436,9 +436,16 @@ library(jsonlite)
 data_url <- "https://raw.githubusercontent.com/srvanderplas/stat-computing-r-python/main/data/Patrick_Stewart.json"
 
 ps_json <- fromJSON(data_url)
-head(ps_json)
 ```
 
+
+<details><summary>Exploring the output structure</summary>
+```{r}
+# head(ps_json) # This output is too long
+map(ps_json, head) # show the first 6 rows of each element in the list
+```
+</details>
+
 By default, fromJSON does a LOT of heavy lifting for us: 
 
 1. Identifying the structure of the top-level data: cast, crew, and id information
@@ -451,28 +458,42 @@ It's hard to explain how *nice* this is to someone who hasn't had to parse this
 library(jsonlite)
 
 ps_messy <- fromJSON(data_url, simplifyVector = T, simplifyDataFrame = F)
+```
+
 
+<details><summary>Exploring the output structure (long version)</summary>
+```{r}
 # Top-level objects (show the first object in the list)
 ps_messy$cast[[1]]
 ps_messy$crew[[1]]
 ps_messy$id
 ```
+</details>
 
 Let's start with the cast list. Most objects seem to be single entries; the only thing that isn't is the `genre_ids` field. So let's see whether we can just convert each list entry to a data frame, and then deal with the `genre_ids` column afterwards.
 
 ```{r, error = T}
 cast_list <- ps_messy$cast
+```
+
 
+<details><summary>Data frame conversion</summary>
+```{r}
 as.data.frame(cast_list[[1]])
+```
+</details>
 
+```{r}
 map(cast_list, as.data.frame)
 ```
 
 Well, that didn't work, but the error message at least tells us what index is causing the problem: 6. Let's look at that data:
 
+<details><summary>Data frame conversion errors</summary>
 ```{r}
-cast_list[[6]]
+cast_list[[6]][1:5]
 ```
+</details>
 
 Ok, so `backdrop_path` is `NULL`, and `as.data.frame` can't handle the fact that some fields are defined (length 1) and others are NULL (length 0). We could possibly replace the NULL with NA first?
 
@@ -482,11 +503,12 @@ fix_nulls <- function(x) {
 }
 
 cast_list_fix <- map(cast_list, fix_nulls)
-cast_list_fix[[6]]
+
+cast_list_fix[[6]][1:5]
 
 map(cast_list_fix, as.data.frame)
 
-cast_list_fix[[8]]
+cast_list_fix[[8]][1:5]
 ```
 
 Ok, well, this time, we have an issue with position 8, and we have an empty list of genre_ids.
@@ -500,18 +522,18 @@ fix_nulls <- function(x) {
 }
 
 cast_list_fix <- map(cast_list, fix_nulls)
-cast_list_fix[[8]]
+cast_list_fix[[8]][1:5]
 
 cast_list_df <- map_df(cast_list_fix, as.data.frame)
-cast_list_df
+cast_list_df[1:10, 1:5]
 ```
 
 We still have too many rows for each entry because of the multiple `genre_ids`. 
 But we can fix that with the `nest` command.
 
 ```{r}
 cast_list <- nest(cast_list_df, genre_ids = genre_ids )
-cast_list
+cast_list[1:10,c(1:4, 17)]
 ```
 
 Then, we'd have to apply this whole process to the crew list as well. 
@@ -522,7 +544,7 @@ crew_list <- ps_messy$crew
 crew_list_fix <- map(crew_list, fix_nulls)
 crew_list_df <- map_df(crew_list_fix, as.data.frame)
 crew_list <- nest(crew_list_df, genre_ids = genre_ids )
-crew_list
+crew_list[1:5,c(1:4, 17)]
 ```
 
 Ok, so that actually worked, but only because the structure of the crew data is the same as the structure of the cast data. 
@@ -560,6 +582,9 @@ If we read the [documentation for read_json](https://pandas.pydata.org/docs/refe
 ```{python}
 patrick_stewart = pd.read_json(data_url, typ='series', orient = 'records')
 
+# List the objects
+patrick_stewart.index
+
 # First item in the cast list
 patrick_stewart.cast[0]
 ```
@@ -595,7 +620,7 @@ ps_movies[['id', 'original_title', 'character', 'job']].sort_values(['id'])
 ::: callout-tip
 
 ### Try It Out: JSON File Parsing
-![The Movie Database API](../images/wrangling/tmdb.svg){fig-alt="The Movie Database logo"}
+![The Movie Database](../images/wrangling/tmdb.svg){fig-alt="The Movie Database logo" width="50%"}
 
 I used TMDB to find all movies resulting from the query "Star Trek" and stored the resulting JSON file [here](https://raw.githubusercontent.com/srvanderplas/stat-computing-r-python/main/data/Star_Trek.json). 
 
 
@@ -63,10 +63,6 @@ local({
     if (is.environment(x) || length(x)) x else y
   }
 
-  `%??%` <- function(x, y) {
-    if (is.null(x)) y else x
-  }
-  
   bootstrap <- function(version, library) {
 
     # attempt to download renv
@@ -87,22 +83,11 @@ local({
 
   renv_bootstrap_repos <- function() {
 
-    # get CRAN repository
-    cran <- getOption("renv.repos.cran", "https://cloud.r-project.org")
-  
     # check for repos override
     repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA)
-    if (!is.na(repos)) {
-  
-      # check for RSPM; if set, use a fallback repository for renv
-      rspm <- Sys.getenv("RSPM", unset = NA)
-      if (identical(rspm, repos))
-        repos <- c(RSPM = rspm, CRAN = cran)
-  
+    if (!is.na(repos))
       return(repos)
 
-    }
-  
     # check for lockfile repositories
     repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity)
     if (!inherits(repos, "error") && length(repos))
@@ -119,7 +104,10 @@ local({
     repos <- getOption("repos")
 
     # ensure @CRAN@ entries are resolved
-    repos[repos == "@CRAN@"] <- cran
+    repos[repos == "@CRAN@"] <- getOption(
+      "renv.repos.cran",
+      "https://cloud.r-project.org"
+    )
 
     # add in renv.bootstrap.repos if set
     default <- c(FALLBACK = "https://cloud.r-project.org")