Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,48 @@ public List<Range> toRangeList() {
return Range.toRanges(roaring64NavigableMap::iterator);
}

/**
* Returns true if there is at least one value in the range [rangeMin, rangeMax] (inclusive on
* both ends) contained in this bitmap.
*
* <p>Uses {@code rankLong} for O(log N) performance instead of iterating all values.
*/
public boolean intersectsRange(long rangeMin, long rangeMax) {
if (rangeMin > rangeMax) {
throw new IllegalArgumentException(
"rangeMin (" + rangeMin + ") must be <= rangeMax (" + rangeMax + ")");
}
long countUpToMax = roaring64NavigableMap.rankLong(rangeMax);
long countBeforeMin = rangeMin <= 0 ? 0 : roaring64NavigableMap.rankLong(rangeMin - 1);
return countUpToMax > countBeforeMin;
}

/**
* Returns the values in this bitmap that fall within [rangeMin, rangeMax] (inclusive) as a
* {@code long[]}.
*
* <p>Uses {@code rankLong} and {@code select} to skip directly to the first element in range,
* giving O(K log N) performance where K is the number of matching elements, instead of O(N)
* iteration from the beginning.
*/
public long[] toArrayInRange(long rangeMin, long rangeMax) {
if (rangeMin > rangeMax) {
throw new IllegalArgumentException(
"rangeMin (" + rangeMin + ") must be <= rangeMax (" + rangeMax + ")");
}
long countUpToMax = roaring64NavigableMap.rankLong(rangeMax);
long countBeforeMin = rangeMin <= 0 ? 0 : roaring64NavigableMap.rankLong(rangeMin - 1);
long count = countUpToMax - countBeforeMin;
if (count <= 0) {
return new long[0];
}
long[] result = new long[(int) count];
for (int i = 0; i < count; i++) {
result[i] = roaring64NavigableMap.select(countBeforeMin + i);
}
return result;
}

public static RoaringNavigableMap64 bitmapOf(long... dat) {
RoaringNavigableMap64 roaringBitmap64 = new RoaringNavigableMap64();
for (long ele : dat) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

/** Tests for {@link RoaringNavigableMap64}. */
public class RoaringNavigableMap64Test {
Expand Down Expand Up @@ -108,4 +109,137 @@ public void testAddRangeLargeValues() {
assertThat(values.get(0)).isEqualTo(start);
assertThat(values.get(100)).isEqualTo(end);
}

@Test
public void testIntersectsRangeBasic() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.addRange(new Range(10, 20));

// Exact overlap
assertThat(bitmap.intersectsRange(10, 20)).isTrue();
// Partial overlap at start
assertThat(bitmap.intersectsRange(5, 15)).isTrue();
// Partial overlap at end
assertThat(bitmap.intersectsRange(15, 25)).isTrue();
// Query range fully contains bitmap range
assertThat(bitmap.intersectsRange(0, 100)).isTrue();
// Query range fully inside bitmap range
assertThat(bitmap.intersectsRange(12, 18)).isTrue();
// Single point overlap at boundary
assertThat(bitmap.intersectsRange(10, 10)).isTrue();
assertThat(bitmap.intersectsRange(20, 20)).isTrue();
// No overlap -- below
assertThat(bitmap.intersectsRange(0, 9)).isFalse();
// No overlap -- above
assertThat(bitmap.intersectsRange(21, 30)).isFalse();
}

@Test
public void testIntersectsRangeEmptyBitmap() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
assertThat(bitmap.intersectsRange(0, 100)).isFalse();
}

@Test
public void testIntersectsRangeDisjointRanges() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.addRange(new Range(0, 5));
bitmap.addRange(new Range(20, 25));

// Query falls in the gap
assertThat(bitmap.intersectsRange(6, 19)).isFalse();
// Query overlaps first range only
assertThat(bitmap.intersectsRange(3, 8)).isTrue();
// Query overlaps second range only
assertThat(bitmap.intersectsRange(18, 22)).isTrue();
// Query spans both ranges
assertThat(bitmap.intersectsRange(4, 21)).isTrue();
}

@Test
public void testIntersectsRangeWithSingleElement() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.add(42);

assertThat(bitmap.intersectsRange(42, 42)).isTrue();
assertThat(bitmap.intersectsRange(40, 44)).isTrue();
assertThat(bitmap.intersectsRange(0, 41)).isFalse();
assertThat(bitmap.intersectsRange(43, 100)).isFalse();
}

@Test
public void testIntersectsRangeStartAtZero() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.add(0);
bitmap.add(1);

assertThat(bitmap.intersectsRange(0, 0)).isTrue();
assertThat(bitmap.intersectsRange(0, 1)).isTrue();
assertThat(bitmap.intersectsRange(2, 5)).isFalse();
}

@Test
public void testIntersectsRangeInvalidArgument() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
assertThatThrownBy(() -> bitmap.intersectsRange(10, 5))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("rangeMin");
}

@Test
public void testToArrayInRangeBasic() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.addRange(new Range(10, 20));

// Exact range
assertThat(bitmap.toArrayInRange(10, 20)).hasSize(11);
// Sub-range
assertThat(bitmap.toArrayInRange(12, 15)).containsExactly(12L, 13L, 14L, 15L);
// Wider range
assertThat(bitmap.toArrayInRange(0, 100)).hasSize(11);
// No overlap
assertThat(bitmap.toArrayInRange(0, 9)).isEmpty();
assertThat(bitmap.toArrayInRange(21, 30)).isEmpty();
// Single element boundary
assertThat(bitmap.toArrayInRange(10, 10)).containsExactly(10L);
assertThat(bitmap.toArrayInRange(20, 20)).containsExactly(20L);
}

@Test
public void testToArrayInRangeDisjointBitmap() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.add(5);
bitmap.add(10);
bitmap.add(100);

// Range covers only some elements
assertThat(bitmap.toArrayInRange(5, 10)).containsExactly(5L, 10L);
assertThat(bitmap.toArrayInRange(6, 99)).containsExactly(10L);
assertThat(bitmap.toArrayInRange(0, 1000)).containsExactly(5L, 10L, 100L);
}

@Test
public void testToArrayInRangeEmpty() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
assertThat(bitmap.toArrayInRange(0, 100)).isEmpty();
}

@Test
public void testToArrayInRangeStartAtZero() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
bitmap.add(0);
bitmap.add(1);
bitmap.add(2);

assertThat(bitmap.toArrayInRange(0, 1)).containsExactly(0L, 1L);
assertThat(bitmap.toArrayInRange(0, 0)).containsExactly(0L);
}

@Test
public void testToArrayInRangeInvalidArgument() {
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
assertThatThrownBy(() -> bitmap.toArrayInRange(10, 5))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("rangeMin");
}
}
35 changes: 35 additions & 0 deletions paimon-lumina/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
## Paimon Lumina

This module integrates [Lumina](https://github.com/alibaba/paimon-cpp/tree/main/third_party/lumina)
as a vector index for Apache Paimon's global index framework.

Lumina vector search library is derived from an internal repository maintained by
Alibaba Storage Service Team. It is accessed via JNI through the `lumina-jni` artifact.

### Supported Index Types

| Index Type | Description |
|------------|-------------|
| **DISKANN** | DiskANN graph-based index (default) |

### Supported Vector Metrics

| Metric | Description |
|--------|-------------|
| **L2** | Euclidean distance (default) |
| **COSINE** | Cosine distance |
| **INNER_PRODUCT** | Dot product |

### Configuration Options

| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `vector.dim` | int | 128 | Vector dimension |
| `vector.metric` | enum | L2 | Distance metric |
| `vector.index-type` | enum | DISKANN | Index type |
| `vector.encoding-type` | string | rawf32 | Encoding type (rawf32, sq8, pq) |
| `vector.size-per-index` | int | 2,000,000 | Max vectors per index file |
| `vector.training-size` | int | 500,000 | Vectors used for pretraining |
| `vector.search-factor` | int | 10 | Multiplier for search limit when filtering |
| `vector.diskann.search-list-size` | int | 100 | DiskANN search list size |
| `vector.pretrain-sample-ratio` | double | 1.0 | Pretrain sample ratio |
101 changes: 101 additions & 0 deletions paimon-lumina/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<artifactId>paimon-parent</artifactId>
<groupId>org.apache.paimon</groupId>
<version>1.4-SNAPSHOT</version>
</parent>

<artifactId>paimon-lumina</artifactId>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just one paimon-lumina is OK, no need to have index and e2e.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please create a README.md to this, explain what is lumina.

<name>Paimon : Lumina Index</name>

<repositories>
<repository>
<id>lumina</id>
<url>https://lumina-binary.oss-cn-shanghai.aliyuncs.com/mvn-repo/</url>
</repository>
</repositories>
Comment on lines +34 to +39
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This module adds a custom Maven repository (https://lumina-binary.oss-cn-shanghai.aliyuncs.com/mvn-repo/). This has build reproducibility and supply-chain implications (and may violate ASF/release expectations if artifacts aren't in Maven Central / ASF repos). If possible, depend on artifacts published to Maven Central (or an ASF-managed repo), or gate this repository behind an explicit Maven profile so default builds don’t rely on an extra remote repository.

Suggested change
<repositories>
<repository>
<id>lumina</id>
<url>https://lumina-binary.oss-cn-shanghai.aliyuncs.com/mvn-repo/</url>
</repository>
</repositories>
<profiles>
<profile>
<id>lumina-repo</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<repositories>
<repository>
<id>lumina</id>
<url>https://lumina-binary.oss-cn-shanghai.aliyuncs.com/mvn-repo/</url>
</repository>
</repositories>
</profile>
</profiles>

Copilot uses AI. Check for mistakes.

<dependencies>
<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-common</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.aliyun.lumina</groupId>
<artifactId>lumina-jni</artifactId>
<version>0.1.0</version>
</dependency>

<!-- test dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>${junit5.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-format</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-test-utils</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
Loading
Loading