@@ -90,7 +90,7 @@ def sample_sharegpt_requests(
90
90
fixed_output_len : Optional [int ] = None ,
91
91
) -> List [Tuple [str , int , int , None ]]:
92
92
# Load the dataset.
93
- with open (dataset_path ) as f :
93
+ with open (dataset_path , encoding = 'utf-8' ) as f :
94
94
dataset = json .load (f )
95
95
# Filter out the conversations with less than 2 turns.
96
96
dataset = [data for data in dataset if len (data ["conversations" ]) >= 2 ]
@@ -139,7 +139,7 @@ def sample_sonnet_requests(
139
139
), "'args.sonnet-input-len' must be greater than 'args.prefix-input-len'."
140
140
141
141
# Load the dataset.
142
- with open (dataset_path ) as f :
142
+ with open (dataset_path , encoding = 'utf-8' ) as f :
143
143
poem_lines = f .readlines ()
144
144
145
145
# Tokenize the poem lines.
@@ -726,7 +726,7 @@ def main(args: argparse.Namespace):
726
726
file_name = args .result_filename
727
727
if args .result_dir :
728
728
file_name = os .path .join (args .result_dir , file_name )
729
- with open (file_name , "w" ) as outfile :
729
+ with open (file_name , "w" , encoding = 'utf-8' ) as outfile :
730
730
json .dump (result_json , outfile )
731
731
732
732
0 commit comments