Skip to content

Commit

Permalink
Merge pull request #13 from mlverse/updates
Browse files Browse the repository at this point in the history
Updates
  • Loading branch information
edgararuiz authored Aug 9, 2023
2 parents ca2f9d2 + b2a9536 commit b7188a0
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ S3method(sample_frac,tbl_pyspark)
S3method(sample_n,tbl_pyspark)
S3method(sdf_copy_to,pyspark_connection)
S3method(sdf_read_column,spark_pyjobj)
S3method(sdf_register,spark_pyobj)
S3method(spark_connect_method,spark_method_databricks_connect)
S3method(spark_connect_method,spark_method_spark_connect)
S3method(spark_connection,pyspark_connection)
Expand Down Expand Up @@ -99,6 +100,7 @@ importFrom(sparklyr,invoke_new)
importFrom(sparklyr,random_string)
importFrom(sparklyr,sdf_copy_to)
importFrom(sparklyr,sdf_read_column)
importFrom(sparklyr,sdf_register)
importFrom(sparklyr,spark_connect_method)
importFrom(sparklyr,spark_connection)
importFrom(sparklyr,spark_dataframe)
Expand Down
2 changes: 1 addition & 1 deletion R/install.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ install_pyspark <- function(envname = "r-sparklyr",
"delta-spark"
)

pip_options <- "--index-url https://packagemanager.posit.co/pypi/2023-06-01/simple"
pip_options <- "--index-url https://packagemanager.posit.co/pypi/2023-06-15/simple"
# in cause user supplied pip_options in ...
pip_options <- c(pip_options, list(...)$pip_options)

Expand Down
12 changes: 12 additions & 0 deletions R/methods-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ tbl_pyspark_sdf <- function(x) {

tbl_temp_name <- function() glue("{temp_prefix()}{random_string()}")

#' @importFrom sparklyr sdf_register
#' @export
sdf_register.spark_pyobj <- function(x, name = NULL) {
sc <- spark_connection(sc)
tbl_pyspark_temp(
x = x$pyspark_obj,
conn = sc,
tmp_name = name
)
}


tbl_pyspark_temp <- function(x, conn, tmp_name = NULL) {
sc <- spark_connection(conn)
if (is.null(tmp_name)) {
Expand Down
2 changes: 0 additions & 2 deletions R/package.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@
NULL

.onLoad <- function(...) {

use_virtualenv("r-sparklyr", required = FALSE)
use_condaenv("r-sparklyr", required = FALSE)

}

pysparklyr_env <- new.env()
Expand Down
19 changes: 18 additions & 1 deletion R/spark-connect.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,24 @@ py_spark_connect <- function(master,
token = token,
cluster_id = cluster_id
)
python <- remote$getOrCreate()

check_rstudio <- try(RStudio.Version(), silent = TRUE)

if(inherits(check_rstudio, "try-error")) {
rstudio_chr <- NULL
} else {
rstudio_chr <- glue("rstudio/{check_rstudio$long_version}")
}

user_agent <- glue(
paste(
"sparklyr/{packageVersion('sparklyr')}",
rstudio_chr
)
)

agent <- remote$userAgent(user_agent)
python <- agent$getOrCreate()
con_class <- "connect_databricks"
master_label <- glue("Databricks Connect - Cluster: {cluster_id}")
}
Expand Down
8 changes: 5 additions & 3 deletions R/spark-pyobj.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ print.spark_pyobj <- function(x, ...) {

#' @export
sdf_read_column.spark_pyjobj <- function(x, column) {
sdf <- spark_dataframe(x)
col_df <- invoke(sdf, "select", column)
col_df <- collect(col_df)
col_df <- x %>%
spark_dataframe() %>%
invoke("select", column) %>%
collect()

col_df[[column]]
}

Expand Down
File renamed without changes.

0 comments on commit b7188a0

Please sign in to comment.