Skip to content

Commit

Permalink
Download airline_20MM.csv to data dir
Browse files Browse the repository at this point in the history
  • Loading branch information
inchiosa committed Aug 6, 2017
1 parent a4d6c59 commit 5c8fea0
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
8 changes: 4 additions & 4 deletions Code/bigmemory/bigmemory.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if(!require("biganalytics")) install.packages('biganalytics')
##### Example of "bigmemory"

# change working directory
setwd("/home/remoteuser/Data")
setwd("/data/airline")

# call the library
library("bigmemory")
Expand All @@ -37,7 +37,7 @@ head(airline_big)
# size of big matrix object = 664 bytes/0.6 KB
object.size(airline_big)

# convert big matrix object ot R matrix object
# convert big matrix object to R matrix object
airline_matrix <- airline_big[,]

# size of R matrix object = 2080002048 bytes/2.08 GB
Expand All @@ -57,8 +57,8 @@ object.size(airline_df)
# call the library
library("biganalytics")

# perform simply data transformation on "CRSDepTime"
# round "CRSDepTime" to the nearest hour
# perform simple data transformation on "CRSDepTime"
# truncate "CRSDepTime" to whole hour
airline_big[, "CRSDepTime"] <- floor(airline_big[, "CRSDepTime"] / 100)

# fit a glm model
Expand Down
2 changes: 1 addition & 1 deletion Code/ff/ff.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ if(!require("biglm")) install.packages('biglm')
##### Example of "ff"

# change working directory
setwd("/home/remoteuser/Data")
setwd("/data/airline")

# call the library
library("ff")
Expand Down
9 changes: 7 additions & 2 deletions Scripts/DSVM_Customization_Script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,22 @@ rm *.gz

rm -rf WeatherSubsetCsv AirlineSubsetCsv

cd /data
mkdir airline
cd airline
wget http://strata2017r.blob.core.windows.net/airline/airline_20MM.csv

# Make directory used by Spark compute context
mkdir -p /var/RevoShare/remoteuser

#######################################################################################################################################
#######################################################################################################################################
## Change ownership of some of directories
chown -R remoteuser:remoteuser /home/remoteuser/KDD2017R

chown -R remoteuser:remoteuser /data/airline
chown remoteuser:remoteuser /var/RevoShare/remoteuser

sudo -u hadoop /opt/hadoop/current/bin/hadoop fs -chown -R remoteuser /user/RevoShare/remoteuser
sudo -u hadoop /opt/hadoop/current/bin/hdfs dfs -chown -R remoteuser /user/RevoShare/remoteuser

#######################################################################################################################################
#######################################################################################################################################
Expand Down

0 comments on commit 5c8fea0

Please sign in to comment.