From 54a2346f36b36ca9629b12894a761e4ebda8db3f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 10 Aug 2017 18:19:48 +0000 Subject: [PATCH] Do not set fileSystem globally to hdfs --- Code/MRS/2-Train-Test.r | 6 +++--- Code/MRS/3-Deploy-Score.r | 2 +- Code/MRS/SetComputeContext.r | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Code/MRS/2-Train-Test.r b/Code/MRS/2-Train-Test.r index 58b59fd..359618d 100644 --- a/Code/MRS/2-Train-Test.r +++ b/Code/MRS/2-Train-Test.r @@ -42,7 +42,7 @@ trainDS <- RxHiveData(table = "flightsweathertrain", colInfo = colInfoFull) testDS <- RxHiveData(table = "flightsweathertest", colInfo = colInfoFull) # save the test data as XDF -airWeatherTestXdf <- RxXdfData(file.path(dataDir, "airWeatherTestXdf")) +airWeatherTestXdf <- RxXdfData(file.path(dataDir, "airWeatherTestXdf"), fileSystem = hdfs) rxDataStep(inData = testDS, outFile = airWeatherTestXdf, overwrite = TRUE) ################################################ @@ -67,7 +67,7 @@ save(logitModel, file = "logitModelSubset.RData") # Predict over test data (Logistic Regression). -logitPredict <- RxXdfData(file.path(dataDir, "logitPredictSubset")) +logitPredict <- RxXdfData(file.path(dataDir, "logitPredictSubset"), fileSystem = hdfs) # Use the scalable rxPredict() function @@ -99,7 +99,7 @@ summary(fastTreesEnsembleModel) save(fastTreesEnsembleModel, file = "fastTreesEnsembleModelSubset.RData") # Test -fastTreesEnsemblePredict <- RxXdfData(file.path(dataDir, "fastTreesEnsemblePredictSubset")) +fastTreesEnsemblePredict <- RxXdfData(file.path(dataDir, "fastTreesEnsemblePredictSubset"), fileSystem = hdfs) # Experimental feature to parallelize rxPredict when using a MicrosoftML model assign("predictMethod", "useDataStep", envir = MicrosoftML:::rxHashEnv) diff --git a/Code/MRS/3-Deploy-Score.r b/Code/MRS/3-Deploy-Score.r index 8266e3c..a875c84 100644 --- a/Code/MRS/3-Deploy-Score.r +++ b/Code/MRS/3-Deploy-Score.r @@ -14,7 +14,7 @@ rxSetComputeContext("local") load("logitModelSubset.RData") # loads logitModel # Reference the test data to be scored -airWeatherTestXdf <- RxXdfData( file.path(dataDir, "airWeatherTestXdf") ) +airWeatherTestXdf <- RxXdfData(file.path(dataDir, "airWeatherTestXdf"), fileSystem = hdfs) # Read the first 6 rows and remove the ArrDel15 column dataToBeScored <- base::subset(head(airWeatherTestXdf), select = -ArrDel15) diff --git a/Code/MRS/SetComputeContext.r b/Code/MRS/SetComputeContext.r index f38a834..5318890 100644 --- a/Code/MRS/SetComputeContext.r +++ b/Code/MRS/SetComputeContext.r @@ -19,7 +19,7 @@ if(file.exists("/dsvm")) # N.B. Can be used with local or RxSpark compute contexts ################################################ -rxOptions(fileSystem = RxHdfsFileSystem()) +hdfs <- RxHdfsFileSystem() dataDir <- "/user/RevoShare/remoteuser/Data"