Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions hdinsight/settings.ps1
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
# Use unique prefix for cluster resources
$prefix = "andrier"
$user = "andrie"

# Use user as a prefix for related resources
$prefix = $user

$settings = New-Object PSObject -Property @{
# Subscription
SubscriptionName = "Visual Studio Ultimate with MSDN"

# Storage account
StorageAccountName = "$($prefix)hadooptutorial"
StorageAccountLabel = "RHaddop-tutorial"
StorageAccountName = "$($prefix)rhadooptutorial"
StorageAccountLabel = "RHadoop-tutorial"
StorageAccountLocation = "West Europe"

# Cluster
HDInsightClusterName = "$($prefix)-r-hadoop-tutorial"
HDInsightContainerName = "$($prefix)-r-hadoop-tutorial-hdfs"
HDInsightUsername = "admin"
HDInsightUsername = $user
HDInsightPassword = "RHadoopTutorial2015!"
HDInsightClusterSizeInNodes = 2
HDInsightHeadNodeVMSize = "Large"

# Custom version of the script referenced at:
# Install and use R on HDInsight Hadoop clusters
# https://azure.microsoft.com/en-us/documentation/articles/hdinsight-hadoop-r-scripts
RInstallerScriptUri = "https://raw.githubusercontent.com/StanislawSwierc/RHadoop-tutorial/master/hdinsight/r-installer.ps1"
#RInstallerScriptUri = "https://raw.githubusercontent.com/$($user)/RHadoop-tutorial/master/hdinsight/r-installer.ps1"
}
65 changes: 58 additions & 7 deletions hdinsight/setup.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Add-AzureAccount
Select-AzureSubscription $settings.SubscriptionName

# Check if storage account exist and create it otherwise
if (!(Get-AzureStorageAccount -StorageAccountName $settings.StorageAccountName -ErrorAction SilentlyContinue)) {
if (!(Get-AzureStorageAccount -StorageAccountName $settings.StorageAccountName -ErrorAction SilentlyContinue -WarningAction silentlyContinue)) {
Write-Host "Creating new storage account: $($settings.StorageAccountName)"
New-AzureStorageAccount `
-StorageAccountName $settings.StorageAccountName `
Expand All @@ -35,6 +35,56 @@ Set-AzureSubscription `
# Get the storage account key
$storageAccountKey = (Get-AzureStorageKey $settings.StorageAccountName).Primary

# Create the storage context object
$storageContext = New-AzureStorageContext -StorageAccountName $settings.StorageAccountName -StorageAccountKey $storageAccountKey

# Check if container exists and create it otherwise
if (!(Get-AzureStorageContainer -Name $settings.HDInsightContainerName -ErrorAction SilentlyContinue)) {
Write-Host "Creating new storage container: $($settings.HDInsightContainerName)"
New-AzureStorageContainer -Name $settings.HDInsightContainerName
}

# Check if the dataset exist and upload it otherwise.
$datasetPrefix = "user/$($settings.HDInsightUsername)/nyctaxitrips"
if (!(Get-AzureStorageBlob -Prefix $datasetPrefix -Container $settings.HDInsightContainerName)) {
# Dataset consists of 12 trip_data and 12 trip_fare files. They can be copied from
# a publicly available storage container. The most efficient way is to start asynchronous
# copy operations and wait for their completion.
for ($i=1; $i -le 12; $i++) {
Start-CopyAzureStorageBlob `
-SrcUri "https://nyctaxitrips.blob.core.windows.net/data/trip_fare_$i.csv.zip" `
-DestBlob "$datasetPrefix/fare/trip_fare_$i.csv.zip" `
-DestContainer $settings.HDInsightContainerName `
-DestContext $storageContext

Start-CopyAzureStorageBlob `
-SrcUri "https://nyctaxitrips.blob.core.windows.net/data/trip_data_$i.csv.zip" `
-DestBlob "$datasetPrefix/data/trip_data_$i.csv.zip" `
-DestContainer $settings.HDInsightContainerName `
-DestContext $storageContext
}

# Wait for all operations to complete.
$pendingBlobsCount = 24
while ($pendingBlobsCount -gt 0) {
Write-Host $(Get-Date) : "Waiting for $pendingBlobsCount/24 asynchronous copy operations."
Start-Sleep -Seconds 60
$pendingBlobsCount = (Get-AzureStorageBlob -Prefix $datasetPrefix -Container $settings.HDInsightContainerName |
Get-AzureStorageBlobCopyState |
?{ $_.Status -eq "Pending" } |
Measure-Object).Count
}
}

# Prepare R installation script. Nodes should be able to download it by its Uri.
$scriptActionBlob = Set-AzureStorageBlobContent `
-Container $settings.HDInsightContainerName `
-File "r-installer.ps1" `
-Blob "user/$($settings.HDInsightUsername)/r-installer.ps1" `
-Force
$scriptActionBlobToken = New-AzureStorageBlobSASToken -ICloudBlob $scriptActionBlob.ICloudBlob -Permission r -ExpiryTime (Get-Date).AddDays(1)
$scriptActionUri = $scriptActionBlob.ICloudBlob.Uri.AbsoluteUri + $scriptActionBlobToken

# Create cluster configuration
$hdinsightConfig = New-AzureHDInsightClusterConfig `
-HeadNodeVMSize $settings.HDInsightHeadNodeVMSize `
Expand All @@ -44,23 +94,24 @@ $hdinsightConfig = New-AzureHDInsightClusterConfig `
-StorageAccountKey $storageAccountKey `
-StorageContainerName $settings.HDInsightContainerName |
Add-AzureHDInsightScriptAction `
-Name "Install R" `
-Name "Install R (x64) on Head and Data nodes" `
-ClusterRoleCollection HeadNode,DataNode `
-Uri $settings.RInstallerScriptUri
-Uri $scriptActionUri

# Convert plain text user name and password to PSCredential object
$hdinsightPasswordSecureString = ConvertTo-SecureString -String $settings.HDInsightPassword -AsPlainText -Force
$hdinsightCredential = New-Object -TypeName System.Management.Automation.PSCredential `
-ArgumentList $settings.HDInsightUsername, $hdinsightPasswordSecureString

# Create cluster
Write-Host "$(Get-Date) : Creating new cluster"
$hdinsightCluster = New-AzureHDInsightCluster `
-Name $settings.HDInsightClusterName `
-Config $hdinsightConfig `
-Credential $hdinsightCredential `
-Location $settings.StorageAccountLocation
-Location $settings.StorageAccountLocation `
-ErrorAction Continue
Write-Host "$(Get-Date) : Operation completed"

# Check cluster state
Get-AzureHDInsightCluster -Name $settings.HDInsightClusterName


Get-AzureHDInsightCluster -Name $settings.HDInsightClusterName