From 5ec5b803a5798acbb8e09d9a134ef627bba1f9f5 Mon Sep 17 00:00:00 2001 From: ButenkoMS Date: Sun, 23 Jun 2024 16:12:58 +0300 Subject: [PATCH] Improve pure Python performance by an additional 10-20% --- calculateAverage.py | 20 ++++++++++++-------- calculateAveragePypy.py | 22 ++++++++++++---------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/calculateAverage.py b/calculateAverage.py index 9f2b86d..064393a 100644 --- a/calculateAverage.py +++ b/calculateAverage.py @@ -1,5 +1,6 @@ # time python3 calculateAverage.py import os +from gc import disable as gc_disable, enable as gc_enable import multiprocessing as mp @@ -63,20 +64,14 @@ def _process_file_chunk( result = dict() with open(file_name, "rb") as f: f.seek(chunk_start) + gc_disable() for line in f: chunk_start += len(line) if chunk_start > chunk_end: break location, measurement = line.split(b";") measurement = float(measurement) - if location not in result: - result[location] = [ - measurement, - measurement, - measurement, - 1, - ] # min, max, sum, count - else: + try: _result = result[location] if measurement < _result[0]: _result[0] = measurement @@ -84,6 +79,15 @@ def _process_file_chunk( _result[1] = measurement _result[2] += measurement _result[3] += 1 + except KeyError: + result[location] = [ + measurement, + measurement, + measurement, + 1, + ] # min, max, sum, count + + gc_enable() return result diff --git a/calculateAveragePypy.py b/calculateAveragePypy.py index 46cb18c..026944c 100644 --- a/calculateAveragePypy.py +++ b/calculateAveragePypy.py @@ -1,6 +1,7 @@ # time pypy3 calculateAveragePypy.py import os import multiprocessing as mp +from gc import disable as gc_disable, enable as gc_enable def get_file_chunks( @@ -65,6 +66,7 @@ def _process_file_chunk( with open(file_name, "r+b") as fh: fh.seek(chunk_start) + gc_disable() tail = b"" location = None @@ -96,15 +98,7 @@ def _process_file_chunk( value = float(data[index:newline]) index = newline + 1 - - if location not in result: - result[location] = [ - value, - value, - value, - 1, - ] # min, max, sum, count - else: + try: _result = result[location] if value < _result[0]: _result[0] = value @@ -112,9 +106,17 @@ def _process_file_chunk( _result[1] = value _result[2] += value _result[3] += 1 + except KeyError: + result[location] = [ + value, + value, + value, + 1, + ] # min, max, sum, count location = None - + + gc_enable() return result