Skip to content

Commit

Permalink
Merge pull request #8 from FI-Mihej/main
Browse files Browse the repository at this point in the history
Improve pure Python performance by an additional 10-20%
  • Loading branch information
ifnesi authored Jun 24, 2024
2 parents 8617de3 + 5ec5b80 commit edfa973
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
20 changes: 12 additions & 8 deletions calculateAverage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# time python3 calculateAverage.py
import os
from gc import disable as gc_disable, enable as gc_enable
import multiprocessing as mp


Expand Down Expand Up @@ -63,27 +64,30 @@ def _process_file_chunk(
result = dict()
with open(file_name, "rb") as f:
f.seek(chunk_start)
gc_disable()
for line in f:
chunk_start += len(line)
if chunk_start > chunk_end:
break
location, measurement = line.split(b";")
measurement = float(measurement)
if location not in result:
result[location] = [
measurement,
measurement,
measurement,
1,
] # min, max, sum, count
else:
try:
_result = result[location]
if measurement < _result[0]:
_result[0] = measurement
if measurement > _result[1]:
_result[1] = measurement
_result[2] += measurement
_result[3] += 1
except KeyError:
result[location] = [
measurement,
measurement,
measurement,
1,
] # min, max, sum, count

gc_enable()
return result


Expand Down
22 changes: 12 additions & 10 deletions calculateAveragePypy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# time pypy3 calculateAveragePypy.py
import os
import multiprocessing as mp
from gc import disable as gc_disable, enable as gc_enable


def get_file_chunks(
Expand Down Expand Up @@ -65,6 +66,7 @@ def _process_file_chunk(

with open(file_name, "r+b") as fh:
fh.seek(chunk_start)
gc_disable()

tail = b""
location = None
Expand Down Expand Up @@ -96,25 +98,25 @@ def _process_file_chunk(

value = float(data[index:newline])
index = newline + 1

if location not in result:
result[location] = [
value,
value,
value,
1,
] # min, max, sum, count
else:
try:
_result = result[location]
if value < _result[0]:
_result[0] = value
if value > _result[1]:
_result[1] = value
_result[2] += value
_result[3] += 1
except KeyError:
result[location] = [
value,
value,
value,
1,
] # min, max, sum, count

location = None


gc_enable()
return result


Expand Down

0 comments on commit edfa973

Please sign in to comment.