Skip to content

Commit

Permalink
Fix a bug preventing align from working with a basic slice. (#850)
Browse files Browse the repository at this point in the history
This PR fixes a bug with `align` so that slicing a `GeoSeries`, then calling a binpred with slices of the `GeoSeries` with unmatching indexes works. It also adds a few more valid tests.

As of this PR one test still fails, I'm working on it but it feels like a rabbit hole. I'm inclined to leave the case for later:
The case that doesn't work is the hardest case wherein two `GeoSeries` have different length as well as indexes in one that is not present in the other.

Authors:
  - H. Thomson Comer (https://github.com/thomcom)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: #850
  • Loading branch information
thomcom authored Dec 15, 2022
1 parent 5ea5bbb commit 749751a
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 26 deletions.
38 changes: 28 additions & 10 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,22 +668,40 @@ def align(self, other):
dtype: geometry)
"""
index = (
other.index if len(other.index) >= len(self.index) else self.index
)

# Merge the two indices and sort them
if (
len(self.index) == len(other.index)
and (self.index == other.index).all()
):
return self, other

idx1 = cudf.DataFrame({"idx": self.index})
idx2 = cudf.DataFrame({"idx": other.index})
index = idx1.merge(idx2, how="outer").sort_values("idx")["idx"]
index.name = None

# Align the two GeoSeries
aligned_left = self._align_to_index(index)
aligned_right = other._align_to_index(index)
aligned_right.index = index

# If the two GeoSeries have the same length, keep the original index
# Otherwise, use the union of the two indices
if len(other) == len(aligned_right):
aligned_right.index = other.index
else:
aligned_right.index = index
if len(self) == len(aligned_left):
aligned_left.index = self.index
else:
aligned_left.index = index

# Aligned GeoSeries are sorted by index
aligned_right = aligned_right.sort_index()
aligned_left.index = (
self.index
if len(self.index) == len(aligned_left)
else aligned_right.index
)
aligned_left = aligned_left.sort_index()
return (
aligned_left,
aligned_right.loc[aligned_left.index],
aligned_right,
)

def _gather(
Expand Down
164 changes: 148 additions & 16 deletions python/cuspatial/cuspatial/tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,104 @@
import cuspatial


def test_align():
gpdpdf = gpd.GeoSeries(
def test_align_more_values():
gpdlhs = gpd.GeoSeries(
[
Polygon(((-8, -8), (-8, 8), (8, 8), (8, -8))),
Polygon(((-2, -2), (-2, 2), (2, 2), (2, -2))),
]
)
gpdshort = gpdpdf.iloc[0:1]
pdf = cuspatial.from_geopandas(gpdpdf)
short = cuspatial.from_geopandas(gpdshort)
expected = gpdshort.align(gpdpdf)
got = short.align(pdf)
gpdrhs = gpdlhs.iloc[0:1]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_some_different_values():
gpdlhs = gpd.GeoSeries(
[
Polygon(((1, 2), (3, 4), (5, 6), (7, 8))),
Polygon(((9, 10), (11, 12), (13, 14), (15, 16))),
Polygon(((17, 18), (19, 20), (21, 22), (23, 24))),
Polygon(((25, 26), (27, 28), (29, 30), (31, 32))),
Polygon(((33, 34), (35, 36), (37, 38), (39, 40))),
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
gpdlhs = gpdlhs
gpdlhs.index = [0, 1, 2, 6, 7, 8]
gpdrhs = gpdlhs
gpdrhs.index = [0, 1, 2, 3, 4, 5]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_more_and_some_different_values():
gpdlhs = gpd.GeoSeries(
[
Polygon(((1, 2), (3, 4), (5, 6), (7, 8))),
Polygon(((9, 10), (11, 12), (13, 14), (15, 16))),
Polygon(((17, 18), (19, 20), (21, 22), (23, 24))),
Polygon(((25, 26), (27, 28), (29, 30), (31, 32))),
Polygon(((33, 34), (35, 36), (37, 38), (39, 40))),
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
gpdrhs = gpdlhs[0:3]
gpdrhs.index = [0, 1, 2]
gpdlhs = gpdlhs
gpdlhs.index = [0, 1, 2, 3, 4, 5]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_reorder():
gpdalign = gpd.GeoSeries(
def test_align_with_slice():
gpdlhs = gpd.GeoSeries(
[
Polygon(((1, 2), (3, 4), (5, 6), (7, 8))),
Polygon(((9, 10), (11, 12), (13, 14), (15, 16))),
Polygon(((17, 18), (19, 20), (21, 22), (23, 24))),
Polygon(((25, 26), (27, 28), (29, 30), (31, 32))),
Polygon(((33, 34), (35, 36), (37, 38), (39, 40))),
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
lhs = cuspatial.from_geopandas(gpdlhs)
gpdlhs = gpdlhs[0:3]
gpdrhs = gpdlhs[3:6]
lhs = lhs[0:3]
rhs = lhs[3:6]
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_out_of_orders_values():
gpdlhs = gpd.GeoSeries(
[
None,
None,
Expand All @@ -38,14 +118,66 @@ def test_align_reorder():
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
gpdreordered = gpdalign.iloc[np.random.permutation(len(gpdalign))]
align = cuspatial.from_geopandas(gpdalign)
reordered = cuspatial.from_geopandas(gpdreordered)
expected = gpdalign.align(gpdreordered)
got = align.align(reordered)
gpdrhs = gpdlhs.iloc[np.random.permutation(len(gpdlhs))]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_same_index():
gpdlhs = gpd.GeoSeries(
[
Polygon(((1, 2), (3, 4), (5, 6), (7, 8))),
Polygon(((9, 10), (11, 12), (13, 14), (15, 16))),
Polygon(((17, 18), (19, 20), (21, 22), (23, 24))),
Polygon(((25, 26), (27, 28), (29, 30), (31, 32))),
Polygon(((33, 34), (35, 36), (37, 38), (39, 40))),
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
gpdrhs = gpdlhs
gpdlhs.index = [0, 1, 2, 3, 4, 5]
gpdrhs.index = [0, 1, 2, 3, 4, 5]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())


def test_align_similar_index():
gpdlhs = gpd.GeoSeries(
[
Polygon(((1, 2), (3, 4), (5, 6), (7, 8))),
Polygon(((9, 10), (11, 12), (13, 14), (15, 16))),
Polygon(((17, 18), (19, 20), (21, 22), (23, 24))),
Polygon(((25, 26), (27, 28), (29, 30), (31, 32))),
Polygon(((33, 34), (35, 36), (37, 38), (39, 40))),
Polygon(((41, 42), (43, 44), (45, 46), (47, 48))),
]
)
gpdrhs = gpdlhs
gpdlhs.index = [0, 1, 2, 3, 4, 5]
gpdrhs.index = [1, 2, 3, 4, 5, 6]
lhs = cuspatial.from_geopandas(gpdlhs)
rhs = cuspatial.from_geopandas(gpdrhs)
expected = gpdlhs.align(gpdrhs)
got = lhs.align(rhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())
expected = gpdreordered.align(gpdalign)
got = reordered.align(align)
expected = gpdrhs.align(gpdlhs)
got = rhs.align(lhs)
pd.testing.assert_series_equal(expected[0], got[0].to_pandas())
pd.testing.assert_series_equal(expected[1], got[1].to_pandas())

0 comments on commit 749751a

Please sign in to comment.