@@ -111,6 +111,7 @@ def _make_parquet_file(
111111 """Helper function to generate parquet files/directories.
112112
113113 Args:
114+ filename: The name of test file, that should be created.
114115 row_size: Number of rows for the dataframe.
115116 force: Create a new file/directory even if one already exists.
116117 directory: Create a partitioned directory using pyarrow.
@@ -498,6 +499,14 @@ def setup_fwf_file(filename=TEST_FWF_FILENAME, force=True, fwf_data=None):
498499
499500
500501def eval_to_file (modin_obj , pandas_obj , fn , extension , ** fn_kwargs ):
502+ """Helper function to test `to_<extension>` methods.
503+
504+ Args:
505+ modin_obj: Modin DataFrame or Series to test `to_<extension>` method.
506+ pandas_obj: Pandas DataFrame or Series to test `to_<extension>` method.
507+ fn: name of the method, that should be tested.
508+ extension: Extension of the test file.
509+ """
501510 unique_filename_modin = get_unique_filename (extension = extension )
502511 unique_filename_pandas = get_unique_filename (extension = extension )
503512
@@ -1118,22 +1127,12 @@ def test_read_csv_parse_dates(
11181127
11191128 @pytest .mark .skipif (Engine .get () == "Python" , reason = "Using pandas implementation" )
11201129 def test_read_csv_s3 (self ):
1121- dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv"
1122- pandas_df = pandas .read_csv (dataset_url )
1123-
1124- # This first load is to trigger all the import deprecation warnings
1125- modin_df = pd .read_csv (dataset_url )
1126-
1127- # This will warn if it defaults to pandas behavior, but it shouldn't
1128- with pytest .warns (None ) as record :
1129- modin_df = pd .read_csv (dataset_url )
1130-
1131- assert not any (
1132- "defaulting to pandas implementation" in str (err ) for err in record .list
1130+ eval_io (
1131+ fn_name = "read_csv" ,
1132+ # read_csv kwargs
1133+ filepath_or_buffer = "s3://noaa-ghcn-pds/csv/1788.csv" ,
11331134 )
11341135
1135- df_equals (modin_df , pandas_df )
1136-
11371136 @pytest .mark .parametrize ("names" , [list ("XYZ" ), None ])
11381137 @pytest .mark .parametrize ("skiprows" , [1 , 2 , 3 , 4 , None ])
11391138 def test_read_csv_skiprows_names (self , names , skiprows ):
@@ -1307,67 +1306,40 @@ def wrapped_read_table(file, method):
13071306
13081307
13091308class TestParquet :
1310- def test_read_parquet (self , make_parquet_file ):
1309+ @pytest .mark .parametrize ("columns" , [None , ["col1" ]])
1310+ def test_read_parquet (self , make_parquet_file , columns ):
13111311 unique_filename = get_unique_filename (extension = "parquet" )
13121312 make_parquet_file (filename = unique_filename )
13131313
13141314 eval_io (
13151315 fn_name = "read_parquet" ,
13161316 # read_parquet kwargs
13171317 path = unique_filename ,
1318+ columns = columns ,
13181319 )
13191320
1320- def test_read_parquet_with_columns (self , make_parquet_file ):
1321- unique_filename = get_unique_filename (extension = "parquet" )
1322- make_parquet_file (filename = unique_filename )
1323-
1324- eval_io (
1325- fn_name = "read_parquet" ,
1326- # read_parquet kwargs
1327- path = unique_filename ,
1328- columns = ["col1" ],
1329- )
1330-
1331- def test_read_parquet_partition (self , make_parquet_file ):
1321+ @pytest .mark .parametrize ("columns" , [None , ["col1" ]])
1322+ def test_read_parquet_directory (self , make_parquet_file , columns ): #
13321323
13331324 unique_filename = get_unique_filename (extension = None )
13341325 make_parquet_file (filename = unique_filename , directory = True )
13351326 eval_io (
13361327 fn_name = "read_parquet" ,
13371328 # read_parquet kwargs
13381329 path = unique_filename ,
1330+ columns = columns ,
13391331 )
13401332
1341- def test_read_parquet_partition_with_columns (self , make_parquet_file ):
1342-
1343- unique_filename = get_unique_filename (extension = None )
1344- make_parquet_file (filename = unique_filename , directory = True )
1345- eval_io (
1346- fn_name = "read_parquet" ,
1347- # read_parquet kwargs
1348- path = unique_filename ,
1349- columns = ["col1" ],
1350- )
1351-
1352- def test_read_parquet_partitioned_columns (self , make_parquet_file ):
1353-
1354- unique_filename = get_unique_filename (extension = None )
1355- make_parquet_file (filename = unique_filename , partitioned_columns = ["col1" ])
1356- eval_io (
1357- fn_name = "read_parquet" ,
1358- # read_parquet kwargs
1359- path = unique_filename ,
1360- )
1361-
1362- def test_read_parquet_partitioned_columns_with_columns (self , make_parquet_file ):
1333+ @pytest .mark .parametrize ("columns" , [None , ["col1" ]])
1334+ def test_read_parquet_partitioned_directory (self , make_parquet_file , columns ):
13631335 unique_filename = get_unique_filename (extension = None )
13641336 make_parquet_file (filename = unique_filename , partitioned_columns = ["col1" ])
13651337
13661338 eval_io (
13671339 fn_name = "read_parquet" ,
13681340 # read_parquet kwargs
13691341 path = unique_filename ,
1370- columns = [ "col1" ] ,
1342+ columns = columns ,
13711343 )
13721344
13731345 def test_read_parquet_pandas_index (self ):
@@ -1452,14 +1424,16 @@ def test_to_parquet(self):
14521424
14531425
14541426class TestJson :
1455- def test_read_json (self ):
1427+ @pytest .mark .parametrize ("lines" , [False , True ])
1428+ def test_read_json (self , lines ):
14561429 unique_filename = get_unique_filename (extension = "json" )
14571430 try :
14581431 setup_json_file (filename = unique_filename )
14591432 eval_io (
14601433 fn_name = "read_json" ,
14611434 # read_json kwargs
14621435 path_or_buf = unique_filename ,
1436+ lines = lines ,
14631437 )
14641438 finally :
14651439 teardown_test_files ([unique_filename ])
@@ -1472,19 +1446,6 @@ def test_read_json_categories(self):
14721446 dtype = {"one" : "int64" , "two" : "category" },
14731447 )
14741448
1475- def test_read_json_lines (self ):
1476- unique_filename = get_unique_filename (extension = "json" )
1477- try :
1478- setup_json_lines_file (filename = unique_filename )
1479- eval_io (
1480- fn_name = "read_json" ,
1481- # read_json kwargs
1482- path_or_buf = unique_filename ,
1483- lines = True ,
1484- )
1485- finally :
1486- teardown_test_files ([unique_filename ])
1487-
14881449 @pytest .mark .parametrize (
14891450 "data" ,
14901451 [json_short_string , json_short_bytes , json_long_string , json_long_bytes ],
@@ -1645,25 +1606,11 @@ def test_to_excel(self):
16451606
16461607class TestHdf :
16471608 @pytest .mark .skipif (os .name == "nt" , reason = "Windows not supported" )
1648- def test_read_hdf (self ):
1609+ @pytest .mark .parametrize ("format" , [None , "table" ])
1610+ def test_read_hdf (self , format ):
16491611 unique_filename = get_unique_filename (extension = "hdf" )
16501612 try :
1651- setup_hdf_file (filename = unique_filename , format = None )
1652- eval_io (
1653- fn_name = "read_hdf" ,
1654- # read_hdf kwargs
1655- path_or_buf = unique_filename ,
1656- key = "df" ,
1657- )
1658- finally :
1659- teardown_test_files ([unique_filename ])
1660-
1661- @pytest .mark .skipif (os .name == "nt" , reason = "Windows not supported" )
1662- def test_read_hdf_format (self ):
1663- unique_filename = get_unique_filename (extension = "hdf" )
1664- try :
1665- setup_hdf_file (filename = unique_filename , format = "table" )
1666-
1613+ setup_hdf_file (filename = unique_filename , format = format )
16671614 eval_io (
16681615 fn_name = "read_hdf" ,
16691616 # read_hdf kwargs
@@ -1766,35 +1713,24 @@ def test_read_sql_with_chunksize(self, make_sql_connection):
17661713 for modin_df , pandas_df in zip (modin_gen , pandas_gen ):
17671714 df_equals (modin_df , pandas_df )
17681715
1769- def test_to_sql_without_index (self , make_sql_connection ):
1770- table_name = "tbl_without_index"
1716+ @pytest .mark .parametrize ("index" , [False , True ])
1717+ def test_to_sql (self , make_sql_connection , index ):
1718+ table_name = f"test_to_sql_{ str (index )} "
17711719 modin_df , pandas_df = create_test_dfs (TEST_DATA )
17721720
17731721 # We do not pass the table name so the fixture won't generate a table
1774- conn = make_sql_connection ("test_to_sql.db" )
1775- modin_df .to_sql (table_name , conn , index = False )
1776- df_modin_sql = pandas .read_sql (table_name , con = conn )
1777-
1778- # We do not pass the table name so the fixture won't generate a table
1779- conn = make_sql_connection ("test_to_sql_pandas.db" )
1780- pandas_df .to_sql (table_name , conn , index = False )
1781- df_pandas_sql = pandas .read_sql (table_name , con = conn )
1782-
1783- assert df_modin_sql .sort_index ().equals (df_pandas_sql .sort_index ())
1784-
1785- def test_to_sql_with_index (self , make_sql_connection ):
1786- table_name = "tbl_with_index"
1787- modin_df , pandas_df = create_test_dfs (TEST_DATA )
1788-
1789- # We do not pass the table name so the fixture won't generate a table
1790- conn = make_sql_connection ("test_to_sql_with_index_1.db" )
1791- modin_df .to_sql (table_name , conn )
1792- df_modin_sql = pandas .read_sql (table_name , con = conn , index_col = "index" )
1722+ conn = make_sql_connection (f"{ table_name } _modin.db" )
1723+ modin_df .to_sql (table_name , conn , index = index )
1724+ df_modin_sql = pandas .read_sql (
1725+ table_name , con = conn , index_col = "index" if index else None
1726+ )
17931727
17941728 # We do not pass the table name so the fixture won't generate a table
1795- conn = make_sql_connection ("test_to_sql_with_index_2.db" )
1796- pandas_df .to_sql (table_name , conn )
1797- df_pandas_sql = pandas .read_sql (table_name , con = conn , index_col = "index" )
1729+ conn = make_sql_connection (f"{ table_name } _pandas.db" )
1730+ pandas_df .to_sql (table_name , conn , index = index )
1731+ df_pandas_sql = pandas .read_sql (
1732+ table_name , con = conn , index_col = "index" if index else None
1733+ )
17981734
17991735 assert df_modin_sql .sort_index ().equals (df_pandas_sql .sort_index ())
18001736
0 commit comments