File tree Expand file tree Collapse file tree 2 files changed +55
-0
lines changed
Expand file tree Collapse file tree 2 files changed +55
-0
lines changed Original file line number Diff line number Diff line change 1+ from pyspark import SparkContext
2+ from commons .Utils import Utils
3+
4+ def getPostPrefix (line : str ):
5+ splits = Utils .COMMA_DELIMITER .split (line )
6+ postcode = splits [4 ]
7+ return None if not postcode else postcode .split (" " )[0 ]
8+
9+ def loadPostCodeMap ():
10+ lines = open ("in/uk-postcode.csv" , "r" ).read ().split ("\n " )
11+ splitsForLines = [Utils .COMMA_DELIMITER .split (line ) for line in lines if line != "" ]
12+ return {splits [0 ]: splits [7 ] for splits in splitsForLines }
13+
14+ if __name__ == "__main__" :
15+ sc = SparkContext ("local" , "UkMakerSpaces" )
16+ sc .setLogLevel ("ERROR" )
17+
18+ postCodeMap = sc .broadcast (loadPostCodeMap ())
19+
20+ makerSpaceRdd = sc .textFile ("in/uk-makerspaces-identifiable-data.csv" )
21+
22+ regions = makerSpaceRdd \
23+ .filter (lambda line : Utils .COMMA_DELIMITER .split (line )[0 ] != "Timestamp" ) \
24+ .filter (lambda line : getPostPrefix (line ) is not None ) \
25+ .map (lambda line : postCodeMap .value [getPostPrefix (line )] \
26+ if getPostPrefix (line ) in postCodeMap .value else "Unknow" )
27+
28+ for region , count in regions .countByValue ().items ():
29+ print ("{} : {}" .format (region , count ))
Original file line number Diff line number Diff line change 1+ from pyspark import SparkContext
2+ from commons .Utils import Utils
3+
4+ def getPostPrefixes (line : str ):
5+ postcode = Utils .COMMA_DELIMITER .split (line )[4 ]
6+ cleanedPostCode = postcode .replace ("\\ s+" , "" )
7+ return [cleanedPostCode [0 :i ] for i in range (0 ,len (cleanedPostCode )+ 1 )]
8+
9+ def loadPostCodeMap ():
10+ lines = open ("in/uk-postcode.csv" , "r" ).read ().split ("\n " )
11+ splitsForLines = [Utils .COMMA_DELIMITER .split (line ) for line in lines if line != "" ]
12+ return {splits [0 ]: splits [7 ] for splits in splitsForLines }
13+
14+ if __name__ == "__main__" :
15+ sc = SparkContext ("local" , "UkMakerSpaces" )
16+ sc .setLogLevel ("ERROR" )
17+ postCodeMap = loadPostCodeMap ()
18+ makerSpaceRdd = sc .textFile ("in/uk-makerspaces-identifiable-data.csv" )
19+
20+ regions = makerSpaceRdd \
21+ .filter (lambda line : Utils .COMMA_DELIMITER .split (line )[0 ] != "Timestamp" ) \
22+ .map (lambda line : next ((postCodeMap [prefix ] for prefix in getPostPrefixes (line ) \
23+ if prefix in postCodeMap ), "Unknow" ))
24+
25+ for region , count in regions .countByValue ().items ():
26+ print ("{} : {}" .format (region , count ))
You can’t perform that action at this time.
0 commit comments