Skip to content

Commit a7d3093

Browse files
FIX-#2239: Compute row index start using pandas (#2240)
* FIX-#2239: Compute row index start using pandas Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com> * FIX-#2239: Documentation Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com> * FIX-#2239: Improve testing for case Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
1 parent 8866ca8 commit a7d3093

File tree

3 files changed

+172
-6
lines changed

3 files changed

+172
-6
lines changed

modin/engines/base/io/text/csv_reader.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,6 @@ def _read(cls, filepath_or_buffer, **kwargs):
180180
if index_col is None:
181181
row_lengths = cls.materialize(index_ids)
182182
new_index = pandas.RangeIndex(sum(row_lengths))
183-
# pandas has a really weird edge case here.
184-
if kwargs.get("names", None) is not None and skiprows > 1:
185-
new_index = pandas.RangeIndex(
186-
skiprows - 1, new_index.stop + skiprows - 1
187-
)
188183
else:
189184
index_objs = cls.materialize(index_ids)
190185
row_lengths = [len(o) for o in index_objs]
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
1585542839.000000, 1585542839.000000, 1585542839.000000
2+
32.000000, 32.000000, 32.000000
3+
-38,-14,51
4+
-38,-13,51
5+
-38,-14,51
6+
-38,-14,50
7+
-38,-13,51
8+
-38,-14,50
9+
-38,-14,51
10+
-38,-13,51
11+
-38,-14,51
12+
-38,-13,51
13+
-38,-14,51
14+
-38,-14,50
15+
-38,-13,51
16+
-38,-14,50
17+
-38,-14,51
18+
-38,-13,51
19+
-38,-14,51
20+
-38,-13,51
21+
-38,-14,51
22+
-38,-14,50
23+
-38,-13,51
24+
-38,-14,50
25+
-38,-14,51
26+
-38,-13,51
27+
-38,-14,51
28+
-38,-13,51
29+
-38,-14,51
30+
-38,-14,50
31+
-38,-13,51
32+
-38,-14,50
33+
-38,-14,51
34+
-38,-13,51
35+
-38,-14,51
36+
-38,-13,51
37+
-38,-14,51
38+
-38,-14,50
39+
-38,-13,51
40+
-38,-14,50
41+
-38,-14,51
42+
-38,-13,51
43+
-38,-14,51
44+
-38,-13,51
45+
-38,-14,51
46+
-38,-14,50
47+
-38,-13,51
48+
-38,-14,50
49+
-38,-14,51
50+
-38,-13,51
51+
-38,-14,51
52+
-38,-13,51
53+
-38,-14,51
54+
-38,-14,50
55+
-38,-13,51
56+
-38,-14,50
57+
-38,-14,51
58+
-38,-13,51
59+
-38,-14,51
60+
-38,-13,51
61+
-38,-14,51
62+
-38,-14,50
63+
-38,-13,51
64+
-38,-14,50
65+
-38,-14,51
66+
-38,-13,51
67+
-38,-14,51
68+
-38,-13,51
69+
-38,-14,51
70+
-38,-14,50
71+
-38,-13,51
72+
-38,-14,50
73+
-38,-14,51
74+
-38,-13,51
75+
-38,-14,51
76+
-38,-13,51
77+
-38,-14,51
78+
-38,-14,50
79+
-38,-13,51
80+
-38,-14,50
81+
-38,-14,51
82+
-38,-13,51
83+
-38,-14,51
84+
-38,-13,51
85+
-38,-14,51
86+
-38,-14,50
87+
-38,-13,51
88+
-38,-14,50
89+
-38,-14,51
90+
-38,-13,51
91+
-38,-14,51
92+
-38,-13,51
93+
-38,-14,51
94+
-38,-14,50
95+
-38,-13,51
96+
-38,-14,50
97+
-38,-14,51
98+
-38,-13,51
99+
-38,-14,51
100+
-38,-13,51
101+
-38,-14,51
102+
-38,-14,50
103+
-38,-13,51
104+
-38,-14,50
105+
-38,-14,51
106+
-38,-13,51
107+
-38,-14,51
108+
-38,-13,51
109+
-38,-14,51
110+
-38,-14,50
111+
-38,-13,51
112+
-38,-14,50
113+
-38,-14,51
114+
-38,-13,51
115+
-38,-14,51
116+
-38,-13,51
117+
-38,-14,51
118+
-38,-14,50
119+
-38,-13,51
120+
-38,-14,50
121+
-38,-14,51
122+
-38,-13,51
123+
-38,-14,51
124+
-38,-13,51
125+
-38,-14,51
126+
-38,-14,50
127+
-38,-13,51
128+
-38,-14,50
129+
-38,-14,51
130+
-38,-13,51
131+
-38,-14,51
132+
-38,-13,51
133+
-38,-14,51
134+
-38,-14,50
135+
-38,-13,51
136+
-38,-14,50
137+
-38,-14,51
138+
-38,-13,51
139+
-38,-14,51
140+
-38,-13,51
141+
-38,-14,51
142+
-38,-14,50
143+
-38,-13,51
144+
-38,-14,50
145+
-38,-14,51
146+
-38,-13,51

modin/pandas/test/test_io.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,7 @@ def test_from_csv_chunksize(make_csv_file):
11131113
df_equals(modin_df, pd_df)
11141114

11151115

1116-
@pytest.mark.parametrize("nrows", [123, None])
1116+
@pytest.mark.parametrize("nrows", [1, 2, 123, None])
11171117
def test_from_csv_skiprows(make_csv_file, nrows):
11181118
make_csv_file()
11191119

@@ -1129,6 +1129,22 @@ def test_from_csv_skiprows(make_csv_file, nrows):
11291129
)
11301130
df_equals(modin_df, pandas_df)
11311131

1132+
pandas_df = pandas.read_csv(
1133+
TEST_CSV_FILENAME,
1134+
header=None,
1135+
names=["c1", "c2", "c3", "c4"],
1136+
skiprows=2,
1137+
nrows=nrows,
1138+
)
1139+
modin_df = pd.read_csv(
1140+
TEST_CSV_FILENAME,
1141+
header=None,
1142+
names=["c1", "c2", "c3", "c4"],
1143+
skiprows=2,
1144+
nrows=nrows,
1145+
)
1146+
df_equals(modin_df, pandas_df)
1147+
11321148
pandas_df = pandas.read_csv(
11331149
TEST_CSV_FILENAME,
11341150
names=["c1", "c2", "c3", "c4"],
@@ -1144,6 +1160,15 @@ def test_from_csv_skiprows(make_csv_file, nrows):
11441160
df_equals(modin_df, pandas_df)
11451161

11461162

1163+
@pytest.mark.parametrize("names", [list("XYZ"), None])
1164+
@pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None])
1165+
def test_from_csv_skiprows_names(names, skiprows):
1166+
path = "modin/pandas/test/data/issue_2239.csv"
1167+
pandas_df = pandas.read_csv(path, names=names, skiprows=skiprows)
1168+
modin_df = pd.read_csv(path, names=names, skiprows=skiprows)
1169+
df_equals(pandas_df, modin_df)
1170+
1171+
11471172
@pytest.mark.parametrize(
11481173
"encoding", ["latin8", "ISO-8859-1", "latin1", "iso-8859-1", "cp1252", "utf8"]
11491174
)

0 commit comments

Comments
 (0)