Skip to content

Commit 28b52e2

Browse files
Added comment to mySparkBatchApp.
1 parent bedaf6d commit 28b52e2

File tree

1 file changed

+11
-2
lines changed
  • Azure/DotNet4ApacheSpark/mySparkBatchApp/mySparkBatchApp

1 file changed

+11
-2
lines changed

Azure/DotNet4ApacheSpark/mySparkBatchApp/mySparkBatchApp/Program.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ static void Main(string[] args)
2121

2222
string filePath = args[0];
2323

24+
// Create Spark session
2425
SparkSession spark = SparkSession
2526
.Builder()
2627
.AppName("GitHub and Spark Batch")
2728
.GetOrCreate();
2829

30+
// Create initial DataFrame
2931
DataFrame projectsDf = spark
3032
.Read()
3133
.Schema("id INT, url STRING, owner_id INT, " +
@@ -34,6 +36,7 @@ static void Main(string[] args)
3436
"updated_at STRING")
3537
.Csv(filePath);
3638

39+
// Display results
3740
projectsDf.Show();
3841

3942
// Drop any rows with NA values
@@ -42,27 +45,33 @@ static void Main(string[] args)
4245

4346
// Remove unnecessary columns
4447
cleanedProjects = cleanedProjects.Drop("id", "url", "owner_id");
48+
49+
// Display results
4550
cleanedProjects.Show();
4651

4752
// Average number of times each language has been forked
4853
DataFrame groupedDF = cleanedProjects
4954
.GroupBy("language")
5055
.Agg(Avg(cleanedProjects["forked_from"]));
5156

52-
// Sort by most forked languages first
57+
// Sort by most forked languages first & Display results
5358
groupedDF.OrderBy(Desc("avg(forked_from)")).Show();
5459

60+
// Defines a UDF that determines if a date is greater than a specified date
5561
spark.Udf().Register<string, bool>(
5662
"MyUDF",
5763
(date) => DateTime.TryParse(date, out DateTime convertedDate) &&
5864
(convertedDate > referenceDate));
5965

66+
// Use UDF to add columns to the generated TempView
6067
cleanedProjects.CreateOrReplaceTempView("dateView");
61-
6268
DataFrame dateDf = spark.Sql(
6369
"SELECT *, MyUDF(dateView.updated_at) AS datebefore FROM dateView");
70+
71+
// Display results
6472
dateDf.Show();
6573

74+
// Stop Spark session
6675
spark.Stop();
6776
}
6877
}

0 commit comments

Comments
 (0)