Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] File index interface and file format. #3068

Merged
merged 21 commits into from
Mar 27, 2024
Prev Previous commit
Next Next commit
fix comment
  • Loading branch information
yejunhao committed Mar 26, 2024
commit 5eb3a465a1c4a9bf228f24b2c17c239a025e1a83
Original file line number Diff line number Diff line change
Expand Up @@ -18,63 +18,108 @@

package org.apache.paimon.fileindex;

import org.apache.paimon.predicate.CompoundPredicate;
import org.apache.paimon.predicate.FieldRef;
import org.apache.paimon.predicate.FunctionVisitor;
import org.apache.paimon.predicate.LeafPredicate;
import org.apache.paimon.types.DataType;

import java.util.List;

/**
* Secondary index filter interface. Return true, means we need to search this file, else means
* needn't.
*/
public interface FileIndex {
public interface FileIndex extends FunctionVisitor<Boolean> {

void add(Object key);

default boolean testStartsWith(Object key) {
byte[] serializedBytes();

@Override
default Boolean visit(LeafPredicate predicate) {
return true;
}

@Override
default Boolean visit(CompoundPredicate predicate) {
return true;
}

default boolean testLessThan(Object key) {
@Override
default Boolean visitIsNotNull(FieldRef fieldRef) {
return true;
}

default boolean testGreaterOrEqual(Object key) {
@Override
default Boolean visitIsNull(FieldRef fieldRef) {
return true;
}

default boolean testNotContains(Object key) {
@Override
default Boolean visitStartsWith(FieldRef fieldRef, Object literal) {
return true;
}

default boolean testLessOrEqual(Object key) {
@Override
default Boolean visitLessThan(FieldRef fieldRef, Object literal) {
return true;
}

default boolean testContains(Object key) {
@Override
default Boolean visitGreaterOrEqual(FieldRef fieldRef, Object literal) {
return true;
}

default boolean testGreaterThan(Object key) {
@Override
default Boolean visitNotEqual(FieldRef fieldRef, Object literal) {
return true;
}

default boolean testIn(Object[] keys) {
for (Object key : keys) {
if (testContains(key)) {
@Override
default Boolean visitLessOrEqual(FieldRef fieldRef, Object literal) {
return true;
}

@Override
default Boolean visitEqual(FieldRef fieldRef, Object literal) {
return true;
}

@Override
default Boolean visitGreaterThan(FieldRef fieldRef, Object literal) {
return true;
}

@Override
default Boolean visitIn(FieldRef fieldRef, List<Object> literals) {
for (Object key : literals) {
if (visitEqual(fieldRef, key)) {
return true;
}
}
return false;
}

default boolean testNotIn(Object[] keys) {
for (Object key : keys) {
if (testNotContains(key)) {
@Override
default Boolean visitNotIn(FieldRef fieldRef, List<Object> literals) {
for (Object key : literals) {
if (visitNotEqual(fieldRef, key)) {
return true;
}
}
return false;
}

byte[] serializedBytes();
@Override
default Boolean visitAnd(List<Boolean> children) {
return true;
}

@Override
default Boolean visitOr(List<Boolean> children) {
return true;
}

FileIndex recoverFrom(byte[] bytes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,20 @@

import org.apache.paimon.predicate.CompoundPredicate;
import org.apache.paimon.predicate.FieldRef;
import org.apache.paimon.predicate.FunctionVisitor;
import org.apache.paimon.predicate.LeafPredicate;
import org.apache.paimon.predicate.Or;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateVisitor;

import java.util.List;

/** Predicate test. */
public class FileIndexPredicateTester implements PredicateVisitor<Boolean> {
leaves12138 marked this conversation as resolved.
Show resolved Hide resolved

private final String columnName;
private final PredicateFunctionChecker predicateFunctionChecker;
private final FileIndex fileIndex;

public FileIndexPredicateTester(String columnName, FileIndex fileIndex) {
this.columnName = columnName;
this.predicateFunctionChecker = new PredicateFunctionChecker(fileIndex);
this.fileIndex = fileIndex;
}

@Override
Expand All @@ -45,7 +42,7 @@ public Boolean visit(LeafPredicate predicate) {
return predicate
.function()
.visit(
predicateFunctionChecker,
fileIndex,
new FieldRef(
predicate.index(), predicate.fieldName(), predicate.type()),
predicate.literals());
Expand Down Expand Up @@ -73,86 +70,4 @@ public Boolean visit(CompoundPredicate predicate) {
return true;
}
}

private static final class PredicateFunctionChecker implements FunctionVisitor<Boolean> {

private final FileIndex fileIndex;

public PredicateFunctionChecker(FileIndex fileIndex) {
this.fileIndex = fileIndex;
}

@Override
public Boolean visitIsNotNull(FieldRef fieldRef) {
return fileIndex.testNotContains(null);
}

@Override
public Boolean visitIsNull(FieldRef fieldRef) {
return fileIndex.testContains(null);
}

@Override
public Boolean visitStartsWith(FieldRef fieldRef, Object literal) {
return fileIndex.testStartsWith(literal);
}

@Override
public Boolean visitLessThan(FieldRef fieldRef, Object literal) {
return fileIndex.testLessThan(literal);
}

@Override
public Boolean visitGreaterOrEqual(FieldRef fieldRef, Object literal) {
return fileIndex.testGreaterOrEqual(literal);
}

@Override
public Boolean visitNotEqual(FieldRef fieldRef, Object literal) {
return fileIndex.testNotContains(literal);
}

@Override
public Boolean visitLessOrEqual(FieldRef fieldRef, Object literal) {
return fileIndex.testLessOrEqual(literal);
}

@Override
public Boolean visitEqual(FieldRef fieldRef, Object literal) {
return fileIndex.testContains(literal);
}

@Override
public Boolean visitGreaterThan(FieldRef fieldRef, Object literal) {
return fileIndex.testGreaterThan(literal);
}

@Override
public Boolean visitIn(FieldRef fieldRef, List<Object> literals) {
Object[] keys = new byte[literals.size()][];
for (int i = 0; i < literals.size(); i++) {
keys[i] = literals.get(i);
}
return fileIndex.testIn(keys);
}

@Override
public Boolean visitNotIn(FieldRef fieldRef, List<Object> literals) {
Object[] keys = new byte[literals.size()][];
for (int i = 0; i < literals.size(); i++) {
keys[i] = literals.get(i);
}
return fileIndex.testNotIn(keys);
}

@Override
public Boolean visitAnd(List<Boolean> children) {
return true;
}

@Override
public Boolean visitOr(List<Boolean> children) {
return true;
}
}
}