added encoding to open operations

ifnesi · Jul 8, 2024 · ccaca9e · ccaca9e
1 parent edfa973
commit ccaca9e
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 7 deletions.
diff --git a/calculateAverage.py b/calculateAverage.py
@@ -15,7 +15,7 @@ def get_file_chunks(
     chunk_size = file_size // cpu_count
 
     start_end = list()
-    with open(file_name, "r+b") as f:
+    with open(file_name, encoding="utf-8", mode="r+b") as f:
 
         def is_new_line(position):
             if position == 0:
@@ -62,7 +62,7 @@ def _process_file_chunk(
 ) -> dict:
     """Process each file chunk in a different process"""
     result = dict()
-    with open(file_name, "rb") as f:
+    with open(file_name, encoding="utf-8", mode="rb") as f:
         f.seek(chunk_start)
         gc_disable()
         for line in f:
@@ -86,7 +86,7 @@ def _process_file_chunk(
                     measurement,
                     1,
                 ]  # min, max, sum, count
-        
+
         gc_enable()
     return result
 

diff --git a/calculateAveragePypy.py b/calculateAveragePypy.py
@@ -15,7 +15,7 @@ def get_file_chunks(
     chunk_size = file_size // cpu_count
 
     start_end = list()
-    with open(file_name, "r+b") as f:
+    with open(file_name, encoding="utf-8", mode="r+b") as f:
 
         def is_new_line(position):
             if position == 0:
@@ -64,7 +64,7 @@ def _process_file_chunk(
     """Process each file chunk in a different process"""
     result = dict()
 
-    with open(file_name, "r+b") as fh:
+    with open(file_name, encoding="utf-8", mode="r+b") as fh:
         fh.seek(chunk_start)
         gc_disable()
 
@@ -115,7 +115,7 @@ def _process_file_chunk(
                     ]  # min, max, sum, count
 
                 location = None
-        
+
         gc_enable()
     return result
 

diff --git a/createMeasurements.py b/createMeasurements.py
@@ -456,7 +456,7 @@ def generate_measurement_file(
         batches = max(records // 10_000_000, 1)
         batch_ends = np.linspace(0, records, batches + 1).astype(int)
 
-        with open(file_name, "w") as f:
+        with open(file_name, encoding="utf-8", mode="w") as f:
             for i in tqdm(range(batches)):
                 from_, to = batch_ends[i], batch_ends[i + 1]
                 data = self.generate_batch(std_dev, to - from_)