From af44b7a42f117acba85731d02914d5c767ea3afd Mon Sep 17 00:00:00 2001 From: Mahmoud Parsian Date: Thu, 12 Mar 2015 22:50:28 -0700 Subject: [PATCH] basic-average --- basic-average/basic-average.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 basic-average/basic-average.txt diff --git a/basic-average/basic-average.txt b/basic-average/basic-average.txt new file mode 100644 index 0000000..9c15e9a --- /dev/null +++ b/basic-average/basic-average.txt @@ -0,0 +1,26 @@ +# ./pyspark +Python 2.6.9 (unknown, Sep 9 2014, 15:05:12) +[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.39)] on darwin +Welcome to + ____ __ + / __/__ ___ _____/ /__ + _\ \/ _ \/ _ `/ __/ '_/ + /__ / .__/\_,_/_/ /_/\_\ version 1.2.0 + /_/ + +Using Python version 2.6.9 (unknown, Sep 9 2014 15:05:12) +SparkContext available as sc. +>>> sc + +>>> +>>> nums = sc.parallelize([1, 2, 3, 4, 5, 6, 7, 8, 20]) +>>> nums.collect() +[1, 2, 3, 4, 5, 6, 7, 8, 20] +>>> sumAndCount = nums.map(lambda x: (x, 1)).fold((0, 0), (lambda x, y: (x[0] + y[0], x[1] + y[1]))) +>>> sumAndCount +(56, 9) +>>> +>>> avg = float(sumAndCount[0]) / float(sumAndCount[1]) +>>> avg +6.2222222222222223 +>>>