Skip to content

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.hypertrace.core.documentstore.expression.impl;

import com.google.common.base.Preconditions;
import lombok.EqualsAndHashCode;
import lombok.Value;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;

Expand All @@ -20,6 +21,7 @@
* </code> the rhs of the join condition "latest.item" can be expressed as: <code>
* AliasedIdentifierExpression.builder().name("item").alias("alias1").build() </code>
*/
@EqualsAndHashCode(callSuper = true)
@Value
public class AliasedIdentifierExpression extends IdentifierExpression {
String contextAlias;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.hypertrace.core.documentstore.expression.impl;

import java.util.Optional;
import lombok.EqualsAndHashCode;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;

Expand All @@ -14,28 +13,66 @@
@EqualsAndHashCode(callSuper = true)
public class ArrayIdentifierExpression extends IdentifierExpression {

private final ArrayType arrayType;
private final DataType arrayElementType;

public ArrayIdentifierExpression(String name) {
this(name, null);
ArrayIdentifierExpression(String name) {
this(name, DataType.UNSPECIFIED);
}

public ArrayIdentifierExpression(String name, ArrayType arrayType) {
ArrayIdentifierExpression(String name, DataType arrayElementType) {
super(name);
this.arrayType = arrayType;
this.arrayElementType = arrayElementType;
}

public static ArrayIdentifierExpression of(String name) {
return new ArrayIdentifierExpression(name);
}

public static ArrayIdentifierExpression of(String name, ArrayType arrayType) {
return new ArrayIdentifierExpression(name, arrayType);
static ArrayIdentifierExpression of(String name, DataType arrayElementType) {
return new ArrayIdentifierExpression(name, arrayElementType);
}

/** Returns the array type if specified, empty otherwise */
public Optional<ArrayType> getArrayType() {
return Optional.ofNullable(arrayType);
public static ArrayIdentifierExpression ofStrings(final String name) {
return of(name, DataType.STRING);
}

public static ArrayIdentifierExpression ofInts(final String name) {
return of(name, DataType.INTEGER);
}

public static ArrayIdentifierExpression ofLongs(final String name) {
return of(name, DataType.LONG);
}

public static ArrayIdentifierExpression ofFloats(final String name) {
return of(name, DataType.FLOAT);
}

public static ArrayIdentifierExpression ofDoubles(final String name) {
return of(name, DataType.DOUBLE);
}

public static ArrayIdentifierExpression ofBooleans(final String name) {
return of(name, DataType.BOOLEAN);
}

public static ArrayIdentifierExpression ofTimestampsTz(final String name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class is supposed to be agnostic of the underlying database. Using Postgres specific-types here breaks the abstraction.
I'm especially concerned since we have a ofTimestamp() and ofTimestampsTz().
This might create a tight coupling.

If necessary, we should have abstracted type-definitions for document-store, which can later map to the database type-specific types during the query translation.

If we want to make it ideal, we should keep PostgresDataType at the module/library level.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@suresh-prakash Sure, added generic types to remove this coupling. We've specific extractors for PG now that do this mapping.

return of(name, DataType.TIMESTAMPTZ);
}

public static ArrayIdentifierExpression ofDates(final String name) {
return of(name, DataType.DATE);
}

/**
* Returns the data type of array elements.
*
* <p>This is used by database-specific type extractors to generate appropriate type casts.
*
* @return The element DataType (UNSPECIFIED if no type was explicitly set)
*/
public DataType getElementDataType() {
return arrayElementType;
}

/**
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.hypertrace.core.documentstore.expression.impl;

/**
* Database-agnostic data types for explicit type annotation in queries.
*
* <p>This enum provides type metadata for {@link IdentifierExpression} and {@link
* ArrayIdentifierExpression} fields in flat collections, enabling type-safe query generation
* without runtime type inference.
*
* <p>These types are mapped to database-specific types at query parsing time. For example, when
* generating PostgreSQL queries, {@code STRING} maps to {@code text}, {@code INTEGER} maps to
* {@code int4}, etc.
*
* @see ArrayIdentifierExpression
* @see IdentifierExpression
*/
public enum DataType {
UNSPECIFIED,
STRING,
INTEGER,
LONG,
FLOAT,
DOUBLE,
BOOLEAN,
// timestamp with time-zone information. For example: 2004-10-19 10:23:54+02.
// For more info, see: https://www.postgresql.org/docs/current/datatype-datetime.html
TIMESTAMPTZ,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment on this enum? It is not evident what the TZ suffix represents.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added doc.

DATE
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
* Expression representing either an identifier/column name
*
* <p>Example: IdentifierExpression.of("col1");
*
* <p>For flat relational collections, you can optionally provide a {@link DataType} to enable
* type-safe query generation without runtime type inference:
*
* <p>Example: IdentifierExpression.ofInt("price");
*/
@Value
@NonFinal
Expand All @@ -25,12 +30,59 @@ public class IdentifierExpression
implements GroupTypeExpression, SelectTypeExpression, SortTypeExpression {

String name;
// Type information of this identifier for flat collections, this is optional to maintain backward
// compatibility
DataType dataType;

IdentifierExpression(String name) {
this.name = name;
this.dataType = DataType.UNSPECIFIED;
}

public static IdentifierExpression of(final String name) {
Preconditions.checkArgument(name != null && !name.isBlank(), "name is null or blank");
return new IdentifierExpression(name);
}

static IdentifierExpression of(final String name, final DataType dataType) {
Preconditions.checkArgument(name != null && !name.isBlank(), "name is null or blank");
return new IdentifierExpression(name, dataType);
}

public static IdentifierExpression ofString(final String name) {
return of(name, DataType.STRING);
}

public static IdentifierExpression ofInt(final String name) {
return of(name, DataType.INTEGER);
}

public static IdentifierExpression ofLong(final String name) {
return of(name, DataType.LONG);
}

public static IdentifierExpression ofFloat(final String name) {
return of(name, DataType.FLOAT);
}

public static IdentifierExpression ofDouble(final String name) {
return of(name, DataType.DOUBLE);
}

public static IdentifierExpression ofBoolean(final String name) {
return of(name, DataType.BOOLEAN);
}

// Timestamp with time-zone information. For example: 2004-10-19 10:23:54+02. For more info, see:
// https://www.postgresql.org/docs/current/datatype-datetime.html
public static IdentifierExpression ofTimestampTz(final String name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either a documentation comment will be helpful, or would be better to drop the Tz suffix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added doc for this.

return of(name, DataType.TIMESTAMPTZ);
}

public static IdentifierExpression ofDate(final String name) {
return of(name, DataType.DATE);
}

@Override
public <T> T accept(final GroupTypeExpressionVisitor visitor) {
return visitor.visit(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ public enum JsonFieldType {
NUMBER_ARRAY,
BOOLEAN_ARRAY,
OBJECT_ARRAY,
OBJECT
OBJECT,
UNSPECIFIED
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class JsonIdentifierExpression extends IdentifierExpression {

String columnName; // e.g., "customAttr" (the top-level JSONB column)
List<String> jsonPath; // e.g., ["myAttribute", "nestedField"]
JsonFieldType fieldType; // Optional: PRIMITIVE or ARRAY for optimization
JsonFieldType fieldType;

public static JsonIdentifierExpression of(final String columnName) {
throw new IllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.ArrayType;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression.DocumentConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
Expand All @@ -21,9 +20,8 @@
* <p>Returns:
*
* <ul>
* <li>The PostgreSQL array type string (e.g., "text[]", "integer[]") if {@link ArrayType} is
* specified
* <li>{@code null} if {@link ArrayIdentifierExpression} is used without an explicit type
* <li>The PostgreSQL array type string (e.g., "text[]", "integer[]")
* <li>{@code null} if {@link ArrayIdentifierExpression} has UNSPECIFIED type
* </ul>
*/
public class PostgresArrayTypeExtractor implements SelectTypeExpressionVisitor {
Expand All @@ -32,7 +30,8 @@ public PostgresArrayTypeExtractor() {}

@Override
public String visit(ArrayIdentifierExpression expression) {
return expression.getArrayType().map(ArrayType::getPostgresType).orElse(null);
PostgresDataType pgType = PostgresDataType.fromDataType(expression.getElementDataType());
return pgType == PostgresDataType.UNKNOWN ? null : pgType.getArraySqlType();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field;

import org.hypertrace.core.documentstore.expression.impl.DataType;

/**
* PostgreSQL-specific data types with their SQL type strings.
*
* <p>This enum maps generic {@link DataType} values to PostgreSQL-specific type strings used in SQL
* queries for type casting.
*/
public enum PostgresDataType {
TEXT("text"),
INTEGER("integer"),
BIGINT("bigint"),
REAL("real"),
DOUBLE_PRECISION("double precision"),
BOOLEAN("boolean"),
TIMESTAMPTZ("timestamptz"),
DATE("date"),
UNKNOWN("unknown");

private final String sqlType;

PostgresDataType(String sqlType) {
this.sqlType = sqlType;
}

public String getSqlType() {
return sqlType;
}

public String getArraySqlType() {
return sqlType + "[]";
}

/**
* Maps a generic DataType to its PostgreSQL equivalent.
*
* @param dataType the generic data type
* @return the corresponding PostgresDataType, or null if UNSPECIFIED
* @throws IllegalArgumentException if the DataType is unknown
*/
public static PostgresDataType fromDataType(DataType dataType) {
switch (dataType) {
case UNSPECIFIED:
return UNKNOWN;
case STRING:
return TEXT;
case INTEGER:
return INTEGER;
case LONG:
return BIGINT;
case FLOAT:
return REAL;
case DOUBLE:
return DOUBLE_PRECISION;
case BOOLEAN:
return BOOLEAN;
case TIMESTAMPTZ:
return TIMESTAMPTZ;
case DATE:
return DATE;
default:
throw new IllegalArgumentException("Unknown DataType: " + dataType);
}
}
}
Loading
Loading