Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geoparquet filtering and simplification #895

Merged
merged 25 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f618666
Add s3 transfer manager dependency
bchapuis Sep 30, 2024
f640c29
Improve spliterator concurrency
bchapuis Sep 30, 2024
21816d7
Improve the geoparquet reader
bchapuis Sep 30, 2024
2f53920
Improve the geoparquet reader
bchapuis Oct 5, 2024
c49861a
Make the spliterator an internal class
bchapuis Oct 5, 2024
07944ac
Remove unused writer class and use low level ParquetFileReader API
bchapuis Oct 7, 2024
79147b0
Merge GeoParquetGroup interface and implementation
bchapuis Oct 8, 2024
15b96e1
Remove the nanotime type
bchapuis Oct 8, 2024
443c90c
Distinguish types with single and repeated values
bchapuis Oct 8, 2024
21ea189
Remove the wrappers
bchapuis Oct 8, 2024
61f792c
Improve api
bchapuis Oct 8, 2024
26b41b1
Pass properties instead of parent object
bchapuis Oct 9, 2024
721bdb5
Add filtering capabilities
bchapuis Oct 9, 2024
49aebb8
Improve the creation of the filter predicate
bchapuis Oct 10, 2024
848dc35
Refactor the geoparquet reader
bchapuis Oct 10, 2024
19da919
Add some javadoc
bchapuis Oct 10, 2024
930cc0f
Move the config in a dedicated class
bchapuis Oct 10, 2024
8de3f0f
Move disabled tests in dedicated class
bchapuis Oct 11, 2024
ffbc74c
Fix issues in group
bchapuis Oct 12, 2024
461d30a
Add benchmarking module
bchapuis Oct 12, 2024
2d0eacf
Skip geoparquet files based on their bbox
bchapuis Oct 12, 2024
1325256
Format code
bchapuis Oct 12, 2024
1f9cc17
Fix sonar issue
bchapuis Oct 12, 2024
985ef2c
Suppress warnings in benchmarks
bchapuis Oct 13, 2024
cdb9f6a
Improve documentation and remove println
bchapuis Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix issues in group
  • Loading branch information
bchapuis committed Oct 12, 2024
commit ffbc74caa55697019aa45284a91eadaf5783cc71
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,6 @@
return (List<Object>) data[fieldIndex];
}

private GeoParquetGroup getGroup(int fieldIndex) {
return (GeoParquetGroup) data[fieldIndex];
}

private void addValue(int fieldIndex, Object value) {
Object currentValue = data[fieldIndex];
if (currentValue instanceof List<?>) {
Expand Down Expand Up @@ -225,48 +221,6 @@
return new GeoParquetException(msg);
}

public String toString() {
return toString("");
}

public String toString(String indent) {
StringBuilder builder = new StringBuilder();
appendToString(builder, indent);
return builder.toString();
}

private void appendToString(StringBuilder builder, String indent) {
int i = 0;
for (org.apache.parquet.schema.Type field : parquetSchema.getFields()) {
String name = field.getName();
Object object = data[i];
++i;
if (object != null) {
if (object instanceof List<?>values) {
for (Object value : values) {
builder.append(indent).append(name);
if (value == null) {
builder.append(": NULL\n");
} else if (value instanceof GeoParquetGroup group) {
builder.append('\n');
group.appendToString(builder, indent + " ");
} else {
builder.append(": ").append(value).append('\n');
}
}
} else {
builder.append(indent).append(name);
if (object instanceof GeoParquetGroup group) {
builder.append('\n');
group.appendToString(builder, indent + " ");
} else {
builder.append(": ").append(object).append('\n');
}
}
}
}
}

public GeoParquetSchema getGeoParquetSchema() {
return geoParquetSchema;
}
Expand All @@ -280,30 +234,10 @@
}

// Getter methods for different data types
public String getString(int fieldIndex, int index) {
public String getStringValue(int fieldIndex, int index) {
return getBinaryValue(fieldIndex, index).toStringUsingUTF8();
}

public int getInteger(int fieldIndex, int index) {
return (int) getValue(fieldIndex, index);
}

public long getLong(int fieldIndex, int index) {
return (long) getValue(fieldIndex, index);
}

public double getDouble(int fieldIndex, int index) {
return (double) getValue(fieldIndex, index);
}

public float getFloat(int fieldIndex, int index) {
return (float) getValue(fieldIndex, index);
}

public boolean getBoolean(int fieldIndex, int index) {
return (boolean) getValue(fieldIndex, index);
}

public Binary getBinaryValue(int fieldIndex, int index) {
return (Binary) getValue(fieldIndex, index);
}
Expand Down Expand Up @@ -343,7 +277,7 @@
}

public String getStringValue(int fieldIndex) {
return getString(fieldIndex, 0);
return getStringValue(fieldIndex, 0);
}

public Geometry getGeometryValue(int fieldIndex) {
Expand Down Expand Up @@ -514,4 +448,55 @@
return getEnvelopeValues(parquetSchema.getFieldIndex(fieldName));
}

public String toString() {

Check notice

Code scanning / CodeQL

Missing Override annotation Note

This method overrides
Object.toString
; it is advisable to add an Override annotation.
return toString("");
}

private String toString(String indent) {
StringBuilder builder = new StringBuilder();
int fieldCount = parquetSchema.getFields().size();

for (int i = 0; i < fieldCount; i++) {
String fieldName = parquetSchema.getFieldName(i);
Object fieldValue = data[i];
if (fieldValue != null) {
appendFieldToString(builder, indent, fieldName, fieldValue);
}
}

return builder.toString();
}

private void appendFieldToString(StringBuilder builder, String indent, String fieldName,
Object fieldValue) {
if (fieldValue instanceof List<?>values) {
for (Object value : values) {
appendValueToString(builder, indent, fieldName, value);
}
} else {
appendValueToString(builder, indent, fieldName, fieldValue);
}
}

private void appendValueToString(StringBuilder builder, String indent, String fieldName,
Object value) {
builder.append(indent).append(fieldName);
if (value == null) {
builder.append(": NULL\n");
} else if (value instanceof GeoParquetGroup group) {
builder.append("\n").append(group.toString(indent + " "));
} else {
String valueString = getValueAsString(value);
builder.append(": ").append(valueString).append("\n");
}
}

private String getValueAsString(Object value) {
if (value instanceof Binary binary) {
return binary.toStringUsingUTF8();
} else {
return value.toString();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public record Column(
@JsonProperty("crs") JsonNode crs,
@JsonProperty("orientation") String orientation,
@JsonProperty("edges") String edges,
@JsonProperty("bbox") Double[] bbox) {
@JsonProperty("bbox") List<Double> bbox) {
}

/**
Expand Down Expand Up @@ -94,13 +94,12 @@ public int getSrid(String column) {
String code = idNode.get("code").asText();

// Determine SRID based on authority and code
switch (authority) {
case "OGC":
return getOgcSrid(code); // Handle OGC specific SRIDs
case "EPSG":
return getEpsgCode(code); // Handle EPSG SRIDs
default:
return 4326; // Default SRID if authority is unrecognized
if (authority.equals("EPSG")) {
return getEpsgCode(code);
} else if (authority.equals("OGC")) {
return getOgcSrid(code);
} else {
return 4326; // Default SRID if authority is unrecognized
}
}

Expand All @@ -111,11 +110,10 @@ public int getSrid(String column) {
* @return the SRID, or 0 if the code is unrecognized
*/
private int getOgcSrid(String code) {
switch (code) {
case "CRS84":
return 4326;
default:
return 0; // Unrecognized OGC code
if ("CRS84".equals(code)) {
return 4326;
} else {
return 0; // Unrecognized OGC code
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ public enum Cardinality {
* Sealed interfaces were introduced in Java 17 and can be used with pattern matching since Java
* 21.
*/
sealed
public interface Field {
public sealed
interface Field {
String name();

Type type();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class GeoParquetReaderTest {
void read() {
URI geoParquet = TestFiles.GEOPARQUET.toUri();
GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
assertEquals(5, geoParquetReader.read().count());
assertEquals(5, geoParquetReader.read().peek(System.out::println).count());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.junit.jupiter.api.Test;
import org.locationtech.jts.geom.Envelope;

public class OvertureMapsTest {
class OvertureMapsTest {

@Disabled("Requires access to the Internet")
@Test
Expand Down
Loading