Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ futures = "0.3"
geoarrow-array = "0.6"
geoarrow-schema = "0.6"
geodatafusion = "0.1.1"
geo-traits = "0.3.0"
geo-types = "0.7.16"
http = "1.1.0"
humantime = "2.2.0"
Expand Down
30 changes: 9 additions & 21 deletions java/lance-jni/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,8 @@ fn inner_create_index(
| IndexType::Inverted
| IndexType::NGram
| IndexType::ZoneMap
| IndexType::BloomFilter => {
| IndexType::BloomFilter
| IndexType::RTree => {
// For scalar indices, create a scalar IndexParams
let (index_type_str, params_opt) = get_scalar_index_params(env, params_jobj)?;
let scalar_params = lance_index::scalar::ScalarIndexParams {
Expand Down
4 changes: 3 additions & 1 deletion protos/index.proto
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,6 @@ message JsonIndexDetails {
string path = 1;
google.protobuf.Any target_details = 2;
}
message BloomFilterIndexDetails {}
message BloomFilterIndexDetails {}

message RTreeIndexDetails {}
30 changes: 9 additions & 21 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions python/python/benchmarks/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,3 +505,64 @@ def test_late_materialization(test_dataset, benchmark, use_index):
filter=f"{column} = 0",
batch_size=32,
)


@pytest.fixture(scope="module")
def test_geo_dataset(tmpdir_factory):
from geoarrow.rust.core import (
point,
points,
)

num_rows = 1_000_000
points_2d = points(
[np.random.randn(num_rows) * 100, np.random.randn(num_rows) * 100]
)

schema = pa.schema(
[
pa.field(point("xy")).with_name("points"),
]
)
table = pa.Table.from_arrays([points_2d], schema=schema)
uri = str(tmpdir_factory.mktemp("test_geo_dataset"))
lance.write_dataset(table, uri)
ds = lance.dataset(uri)
return ds


@pytest.mark.benchmark(group="geo")
@pytest.mark.parametrize(
"use_index",
(False, True),
ids=["no_index", "with_index"],
)
def test_geo_rtree(test_geo_dataset, benchmark, use_index):
if use_index:
test_geo_dataset.create_scalar_index(
column="points",
index_type="RTREE",
replace=True,
)

print(
test_geo_dataset.scanner(
columns=["points"],
filter="""
St_Contains(points,
ST_GeomFromText('POLYGON (( 0 0, 2 0, 0 2, 2 2, 0 0 ))'))
""",
batch_size=32,
use_scalar_index=use_index,
).explain_plan(True)
)
benchmark(
test_geo_dataset.to_table,
columns=["points"],
filter="""
St_Contains(points,
ST_GeomFromText('POLYGON (( 0 0, 2 0, 0 2, 2 2, 0 0 ))'))
""",
batch_size=32,
use_scalar_index=use_index,
)
4 changes: 3 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2342,6 +2342,7 @@ def create_scalar_index(
Literal["NGRAM"],
Literal["ZONEMAP"],
Literal["BLOOMFILTER"],
Literal["RTREE"],
IndexConfig,
],
name: Optional[str] = None,
Expand Down Expand Up @@ -2545,11 +2546,12 @@ def create_scalar_index(
"LABEL_LIST",
"INVERTED",
"BLOOMFILTER",
"RTREE",
]:
raise NotImplementedError(
(
'Only "BTREE", "BITMAP", "NGRAM", "ZONEMAP", "LABEL_LIST", '
'or "INVERTED" or "BLOOMFILTER" are supported for '
'"INVERTED", "BLOOMFILTER" or "RTREE" are supported for '
f"scalar columns. Received {index_type}",
)
)
Expand Down
49 changes: 49 additions & 0 deletions python/python/tests/test_geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,52 @@ def test_geo_sql(tmp_path: Path):
assert np.allclose(
np.array(result["dist"]), np.array([2.5495097567963922]), atol=1e-8
)


def test_rtree_index(tmp_path: Path):
# LineStrings
num_lines = 10000
line_offsets = np.arange(num_lines + 1, dtype=np.int32) * 2
linestrings_2d = linestrings(
[np.random.randn(num_lines * 2) * 100, np.random.randn(num_lines * 2) * 100],
line_offsets,
)
assert len(linestrings_2d) == num_lines

schema = pa.schema(
[
pa.field("id", pa.int64()),
pa.field(linestring("xy")).with_name("linestring"),
]
)
table = pa.Table.from_arrays(
[np.arange(num_lines, dtype=np.int64), linestrings_2d], schema=schema
)
ds = lance.write_dataset(table, str(tmp_path / "test_rtree_index.lance"))

def query(ds: lance.LanceDataset, has_index=False):
sql = """
SELECT `id`, linestring
FROM dataset
WHERE
St_Intersects(linestring, ST_GeomFromText('LINESTRING ( 2 0, 0 2 )')) \
"""

batches = ds.sql("EXPLAIN ANALYZE " + sql).build().to_batch_records()
explain = pa.Table.from_batches(batches).to_pandas().to_string()

if has_index:
assert "ScalarIndexQuery" in explain
else:
assert "ScalarIndexQuery" not in explain

batches = ds.sql(sql).build().to_batch_records()
return pa.Table.from_batches(batches)

table_without_index = query(ds)

ds.create_scalar_index("linestring", "RTREE")

table_with_index = query(ds, has_index=True)

assert table_with_index == table_without_index
5 changes: 5 additions & 0 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,7 @@ impl Dataset {
"ZONEMAP" => IndexType::ZoneMap,
"BLOOMFILTER" => IndexType::BloomFilter,
"LABEL_LIST" => IndexType::LabelList,
"RTREE" => IndexType::RTree,
"INVERTED" | "FTS" => IndexType::Inverted,
"IVF_FLAT" | "IVF_PQ" | "IVF_SQ" | "IVF_RQ" | "IVF_HNSW_FLAT" | "IVF_HNSW_PQ"
| "IVF_HNSW_SQ" => IndexType::Vector,
Expand Down Expand Up @@ -1812,6 +1813,10 @@ impl Dataset {
index_type: "bloomfilter".to_string(),
params: None,
}),
"RTREE" => Box::new(ScalarIndexParams {
index_type: "rtree".to_string(),
params: None,
}),
"SCALAR" => {
let Some(kwargs) = kwargs else {
return Err(PyValueError::new_err(
Expand Down
3 changes: 3 additions & 0 deletions rust/lance-geo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ datafusion.workspace = true
geoarrow-array.workspace = true
geoarrow-schema.workspace = true
geodatafusion.workspace = true
geo-traits.workspace = true
geo-types.workspace = true
lance-core.workspace = true
serde.workspace = true

[lints]
workspace = true
Loading
Loading