From ccaca9ed45f5feab2c71cfafaab1cae56532cf82 Mon Sep 17 00:00:00 2001 From: Askill Date: Mon, 8 Jul 2024 14:23:58 +0200 Subject: [PATCH] added encoding to open operations --- calculateAverage.py | 6 +++--- calculateAveragePypy.py | 6 +++--- createMeasurements.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/calculateAverage.py b/calculateAverage.py index 064393a..680af2b 100644 --- a/calculateAverage.py +++ b/calculateAverage.py @@ -15,7 +15,7 @@ def get_file_chunks( chunk_size = file_size // cpu_count start_end = list() - with open(file_name, "r+b") as f: + with open(file_name, encoding="utf-8", mode="r+b") as f: def is_new_line(position): if position == 0: @@ -62,7 +62,7 @@ def _process_file_chunk( ) -> dict: """Process each file chunk in a different process""" result = dict() - with open(file_name, "rb") as f: + with open(file_name, encoding="utf-8", mode="rb") as f: f.seek(chunk_start) gc_disable() for line in f: @@ -86,7 +86,7 @@ def _process_file_chunk( measurement, 1, ] # min, max, sum, count - + gc_enable() return result diff --git a/calculateAveragePypy.py b/calculateAveragePypy.py index 026944c..350b05b 100644 --- a/calculateAveragePypy.py +++ b/calculateAveragePypy.py @@ -15,7 +15,7 @@ def get_file_chunks( chunk_size = file_size // cpu_count start_end = list() - with open(file_name, "r+b") as f: + with open(file_name, encoding="utf-8", mode="r+b") as f: def is_new_line(position): if position == 0: @@ -64,7 +64,7 @@ def _process_file_chunk( """Process each file chunk in a different process""" result = dict() - with open(file_name, "r+b") as fh: + with open(file_name, encoding="utf-8", mode="r+b") as fh: fh.seek(chunk_start) gc_disable() @@ -115,7 +115,7 @@ def _process_file_chunk( ] # min, max, sum, count location = None - + gc_enable() return result diff --git a/createMeasurements.py b/createMeasurements.py index 2af9a1d..dc53618 100644 --- a/createMeasurements.py +++ b/createMeasurements.py @@ -456,7 +456,7 @@ def generate_measurement_file( batches = max(records // 10_000_000, 1) batch_ends = np.linspace(0, records, batches + 1).astype(int) - with open(file_name, "w") as f: + with open(file_name, encoding="utf-8", mode="w") as f: for i in tqdm(range(batches)): from_, to = batch_ends[i], batch_ends[i + 1] data = self.generate_batch(std_dev, to - from_)