-
Notifications
You must be signed in to change notification settings - Fork 670
Closed
Description
System information
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Ubuntu
- Modin version (
modin.__version__): master (0.8.3+22.ge99b629) - Python version: 3.7
- Code we can use to reproduce:
import ray
import os
import ray.util
ray.util.connect("<service_ip>:50051")
import modin.pandas as pd
pd.DEFAULT_NPARTITIONS = 10
df = pd.read_csv("s3://<bucket>/HIGGS_100k.csv")
Describe the problem
Modin fails to load csv from s3 with ray client and throws an error.
Source code / logs
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-10-9b3c648a226d> in <module>()
----> 1 df = pd.read_csv("s3://<s3_bucket>/HIGGS_100k.csv")
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/pandas/io.py in parser_func(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
114
115 kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
--> 116 return _read(**kwargs)
117
118 return parser_func
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/pandas/io.py in _read(**kwargs)
133
134 Engine.subscribe(_update_engine)
--> 135 pd_obj = EngineDispatcher.read_csv(**kwargs)
136 # This happens when `read_csv` returns a TextFileReader object for iterating through
137 if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/data_management/factories/dispatcher.py in read_csv(cls, **kwargs)
102 @classmethod
103 def read_csv(cls, **kwargs):
--> 104 return cls.__engine._read_csv(**kwargs)
105
106 @classmethod
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/data_management/factories/factories.py in _read_csv(cls, **kwargs)
85 @classmethod
86 def _read_csv(cls, **kwargs):
---> 87 return cls.io_cls.read_csv(**kwargs)
88
89 @classmethod
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/base/io/file_dispatcher.py in read(cls, *args, **kwargs)
27 @classmethod
28 def read(cls, *args, **kwargs):
---> 29 query_compiler = cls._read(*args, **kwargs)
30 # TODO (devin-petersohn): Make this section more general for non-pandas kernel
31 # implementations.
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/base/io/text/csv_dispatcher.py in _read(cls, filepath_or_buffer, **kwargs)
192 dtypes = cls.get_dtypes(dtypes_ids) if len(dtypes_ids) > 0 else None
193
--> 194 partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)
195 # If parse_dates is present, the column names that we have might not be
196 # the same length as the returned column names. If we do need to modify
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/base/io/text/text_file_dispatcher.py in build_partition(cls, partition_ids, row_lengths, column_widths)
51 for j in range(len(partition_ids[i]))
52 ]
---> 53 for i in range(len(partition_ids))
54 ]
55 )
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/base/io/text/text_file_dispatcher.py in <listcomp>(.0)
51 for j in range(len(partition_ids[i]))
52 ]
---> 53 for i in range(len(partition_ids))
54 ]
55 )
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/base/io/text/text_file_dispatcher.py in <listcomp>(.0)
49 width=column_widths[j],
50 )
---> 51 for j in range(len(partition_ids[i]))
52 ]
53 for i in range(len(partition_ids))
/home/bhavya.agarwal/.local/lib/python3.7/site-packages/modin/engines/ray/pandas_on_ray/frame/partition.py in __init__(self, object_id, length, width, ip, call_queue)
25 class PandasOnRayFramePartition(BaseFramePartition):
26 def __init__(self, object_id, length=None, width=None, ip=None, call_queue=None):
---> 27 assert type(object_id) is ray.ObjectID
28
29 self.oid = object_id
AssertionError:
Metadata
Metadata
Assignees
Labels
bug 🦗Something isn't workingSomething isn't working