Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geoparquet filtering and simplification #895

Merged
merged 25 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f618666
Add s3 transfer manager dependency
bchapuis Sep 30, 2024
f640c29
Improve spliterator concurrency
bchapuis Sep 30, 2024
21816d7
Improve the geoparquet reader
bchapuis Sep 30, 2024
2f53920
Improve the geoparquet reader
bchapuis Oct 5, 2024
c49861a
Make the spliterator an internal class
bchapuis Oct 5, 2024
07944ac
Remove unused writer class and use low level ParquetFileReader API
bchapuis Oct 7, 2024
79147b0
Merge GeoParquetGroup interface and implementation
bchapuis Oct 8, 2024
15b96e1
Remove the nanotime type
bchapuis Oct 8, 2024
443c90c
Distinguish types with single and repeated values
bchapuis Oct 8, 2024
21ea189
Remove the wrappers
bchapuis Oct 8, 2024
61f792c
Improve api
bchapuis Oct 8, 2024
26b41b1
Pass properties instead of parent object
bchapuis Oct 9, 2024
721bdb5
Add filtering capabilities
bchapuis Oct 9, 2024
49aebb8
Improve the creation of the filter predicate
bchapuis Oct 10, 2024
848dc35
Refactor the geoparquet reader
bchapuis Oct 10, 2024
19da919
Add some javadoc
bchapuis Oct 10, 2024
930cc0f
Move the config in a dedicated class
bchapuis Oct 10, 2024
8de3f0f
Move disabled tests in dedicated class
bchapuis Oct 11, 2024
ffbc74c
Fix issues in group
bchapuis Oct 12, 2024
461d30a
Add benchmarking module
bchapuis Oct 12, 2024
2d0eacf
Skip geoparquet files based on their bbox
bchapuis Oct 12, 2024
1325256
Format code
bchapuis Oct 12, 2024
1f9cc17
Fix sonar issue
bchapuis Oct 12, 2024
985ef2c
Suppress warnings in benchmarks
bchapuis Oct 13, 2024
cdb9f6a
Improve documentation and remove println
bchapuis Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor the geoparquet reader
  • Loading branch information
bchapuis committed Oct 11, 2024
commit 848dc359821a29cfdf4ed47f2c420e2238e3ab0d
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,21 @@
import org.apache.baremaps.data.storage.DataColumn.Cardinality;
import org.apache.baremaps.data.storage.DataColumn.Type;
import org.apache.baremaps.geoparquet.GeoParquetGroup;
import org.apache.baremaps.geoparquet.GeoParquetGroup.Field;
import org.apache.baremaps.geoparquet.GeoParquetGroup.GroupField;
import org.apache.baremaps.geoparquet.GeoParquetGroup.Schema;
import org.apache.baremaps.geoparquet.GeoParquetSchema;
import org.apache.baremaps.geoparquet.GeoParquetSchema.Field;
import org.apache.baremaps.geoparquet.GeoParquetSchema.GroupField;
import org.apache.parquet.io.api.Binary;

public class GeoParquetTypeConversion {

private GeoParquetTypeConversion() {}

public static DataSchema asSchema(String table, Schema schema) {
public static DataSchema asSchema(String table, GeoParquetSchema schema) {
List<DataColumn> columns = asDataColumns(schema);
return new DataSchemaImpl(table, columns);
}

private static List<DataColumn> asDataColumns(Schema field) {
private static List<DataColumn> asDataColumns(GeoParquetSchema field) {
return field.fields().stream()
.map(GeoParquetTypeConversion::asDataColumn)
.toList();
Expand Down Expand Up @@ -67,7 +67,7 @@ private static DataColumn asDataColumn(Field field) {
}

public static List<Object> asRowValues(GeoParquetGroup group) {
Schema schema = group.getGeoParquetSchema();
GeoParquetSchema schema = group.getGeoParquetSchema();
List<Field> fields = schema.fields();
List<Object> values = new ArrayList<>();
for (int i = 0; i < fields.size(); i++) {
Expand All @@ -79,7 +79,7 @@ public static List<Object> asRowValues(GeoParquetGroup group) {

public static Map<String, Object> asNested(GeoParquetGroup group) {
Map<String, Object> nested = new HashMap<>();
Schema schema = group.getGeoParquetSchema();
GeoParquetSchema schema = group.getGeoParquetSchema();
List<Field> fields = schema.fields();
for (int i = 0; i < fields.size(); i++) {
if (group.getValues(i).isEmpty()) {
Expand All @@ -93,7 +93,7 @@ public static Map<String, Object> asNested(GeoParquetGroup group) {
}

public static Object asValue(Field field, GeoParquetGroup group, int i) {
if (field.cardinality() == GeoParquetGroup.Cardinality.REPEATED) {
if (field.cardinality() == GeoParquetSchema.Cardinality.REPEATED) {
return switch (field.type()) {
case BINARY -> group.getBinaryValues(i).stream().map(Binary::getBytes).toList();
case BOOLEAN -> group.getBooleanValues(i);
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,26 @@

package org.apache.baremaps.geoparquet;

/**
* Exception thrown when an error occurs during the processing of GeoParquet files.
*/
public class GeoParquetException extends RuntimeException {

/**
* Constructs a new GeoParquetException with the specified message.
*
* @param message the message
*/
public GeoParquetException(String message) {
super(message);
}

/**
* Constructs a new GeoParquetException with the specified detail message and cause.
*
* @param message the message
* @param cause the cause
*/
public GeoParquetException(String message, Throwable cause) {
super(message, cause);
}
Expand Down
Loading