Skip to content

Commit

Permalink
support overriding repartition for paths
Browse files Browse the repository at this point in the history
  • Loading branch information
javierluraschi committed Aug 23, 2017
1 parent cc1a201 commit 74742c4
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion R/sparkwarc.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ spark_read_warc <- function(sc,

if (is.null(parser) || parser == "r") {
paths_df <- data.frame(paths = strsplit(path, ",")[[1]])
path_repartition <- if (identical(repartition, 0L)) nrow(paths_df) else repartition
paths_tbl <- sdf_copy_to(
sc,
paths_df,
name = "sparkwarc_paths",
overwrite = TRUE,
repartition = nrow(paths_df))
repartition = path_repartition)

df <- spark_apply(paths_tbl, function(df) {
entries <- apply(df, 1, function(path) {
Expand Down

0 comments on commit 74742c4

Please sign in to comment.