@@ -736,7 +736,7 @@ def test_read_csv_squeeze(self, test_case):
736736
737737 str_single_element = "1"
738738 str_single_col = "1\n 2\n 3\n "
739- str_four_cols = "1, 2, 3, 4\n " "5 , 6, 7, 8\n " "9 , 10, 11, 12\n "
739+ str_four_cols = "1, 2, 3, 4\n 5 , 6, 7, 8\n 9 , 10, 11, 12\n "
740740 case_to_data = {
741741 "single_element" : str_single_element ,
742742 "single_column" : str_single_col ,
@@ -750,7 +750,7 @@ def test_read_csv_squeeze(self, test_case):
750750
751751 def test_read_csv_mangle_dupe_cols (self ):
752752 unique_filename = get_unique_filename ()
753- str_non_unique_cols = "col,col,col,col\n " "5 , 6, 7, 8\n " "9 , 10, 11, 12\n "
753+ str_non_unique_cols = "col,col,col,col\n 5 , 6, 7, 8\n 9 , 10, 11, 12\n "
754754 eval_io_from_str (str_non_unique_cols , unique_filename , mangle_dupe_cols = True )
755755
756756 # NA and Missing Data Handling tests
@@ -1023,6 +1023,82 @@ def test_read_csv_error_handling(
10231023 error_bad_lines = error_bad_lines ,
10241024 )
10251025
1026+ # Internal parameters tests
1027+ @pytest .mark .parametrize ("use_str_data" , [True , False ])
1028+ @pytest .mark .parametrize ("engine" , [None , "python" , "c" ])
1029+ @pytest .mark .parametrize ("delimiter" , ["," , " " ])
1030+ @pytest .mark .parametrize ("delim_whitespace" , [True , False ])
1031+ @pytest .mark .parametrize ("low_memory" , [True , False ])
1032+ @pytest .mark .parametrize ("memory_map" , [True , False ])
1033+ @pytest .mark .parametrize ("float_precision" , [None , "high" , "round_trip" ])
1034+ def test_read_csv_internal (
1035+ self ,
1036+ make_csv_file ,
1037+ use_str_data ,
1038+ engine ,
1039+ delimiter ,
1040+ delim_whitespace ,
1041+ low_memory ,
1042+ memory_map ,
1043+ float_precision ,
1044+ ):
1045+ if Engine .get () != "Python" and delimiter == " " :
1046+ pytest .xfail (
1047+ "read_csv with Ray engine doesn't \
1048+ raise exceptions while Pandas raises - issue #2320"
1049+ )
1050+
1051+ # In this case raised TypeError: cannot use a string pattern on a bytes-like object,
1052+ # so TypeError should be excluded from raising_exceptions list in order to check, that
1053+ # the same exceptions are raised by Pandas and Modin
1054+ case_with_TypeError_exc = (
1055+ engine == "python"
1056+ and delimiter == ","
1057+ and delim_whitespace
1058+ and low_memory
1059+ and memory_map
1060+ and float_precision is None
1061+ )
1062+
1063+ raising_exceptions = io_ops_bad_exc # default value
1064+ if case_with_TypeError_exc :
1065+ raising_exceptions = list (io_ops_bad_exc )
1066+ raising_exceptions .remove (TypeError )
1067+
1068+ kwargs = {
1069+ "engine" : engine ,
1070+ "delimiter" : delimiter ,
1071+ "delim_whitespace" : delim_whitespace ,
1072+ "low_memory" : low_memory ,
1073+ "memory_map" : memory_map ,
1074+ "float_precision" : float_precision ,
1075+ }
1076+
1077+ unique_filename = get_unique_filename ()
1078+
1079+ if use_str_data :
1080+ str_delim_whitespaces = (
1081+ "col1 col2 col3 col4\n 5 6 7 8\n 9 10 11 12\n "
1082+ )
1083+ eval_io_from_str (
1084+ str_delim_whitespaces ,
1085+ unique_filename ,
1086+ raising_exceptions = raising_exceptions ,
1087+ ** kwargs ,
1088+ )
1089+ else :
1090+ make_csv_file (
1091+ filename = unique_filename ,
1092+ delimiter = delimiter ,
1093+ )
1094+
1095+ eval_io (
1096+ filepath_or_buffer = unique_filename ,
1097+ fn_name = "read_csv" ,
1098+ raising_exceptions = raising_exceptions ,
1099+ ** kwargs ,
1100+ )
1101+
10261102
10271103def test_from_parquet (make_parquet_file ):
10281104 make_parquet_file (NROWS )
0 commit comments