Skip to content

Commit

Permalink
[flights] Remove cast causing Clickhouse error (#1341)
Browse files Browse the repository at this point in the history
Casting to simple `Float` is dangerous, as the column can be
`Nullable(Float)`, and Clickhouse panics. It's also unnecessary, as we
stopped keeping geo as Strings.

Gets rid of
```
Error processing request: clickhouse: iterating over rows failed:  code: 349, message:
Cannot convert NULL value to non-Nullable type: while executing
'FUNCTION CAST(__table1.geoip_location_lat :: 1, 'Float'_String : 9) -> CAST(__table1.geoip_location_lat, 'Float'_String) Float32 : 12'	
```
  • Loading branch information
trzysiek authored Mar 4, 2025
1 parent a65abfd commit 3b79092
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,10 @@ func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggreg
// That's bucket (group by) formula for geotile_grid
// zoom/x/y
// SELECT precisionZoom as zoom,
// FLOOR(((toFloat64("Location::lon") + 180.0) / 360.0) * POWER(2, zoom)) AS x_tile,
// FLOOR((("Location::lon" + 180.0) / 360.0) * POWER(2, zoom)) AS x_tile,
// FLOOR(
// (
// 1 - LOG(TAN(RADIANS(toFloat64("Location::lat"))) + (1 / COS(RADIANS(toFloat64("Location::lat"))))) / PI()
// 1 - LOG(TAN(RADIANS("Location::lat")) + (1 / COS(RADIANS("Location::lat")))) / PI()
// ) / 2.0 * POWER(2, zoom)
// ) AS y_tile, count()
// FROM
Expand All @@ -350,15 +350,13 @@ func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggreg
lon := model.NewGeoLon(fieldName)
lat := model.NewGeoLat(fieldName)

toFloatFunLon := model.NewFunction("toFloat64", lon)
var infixX model.Expr
infixX = model.NewParenExpr(model.NewInfixExpr(toFloatFunLon, "+", model.NewLiteral(180.0)))
infixX = model.NewParenExpr(model.NewInfixExpr(lon, "+", model.NewLiteral(180.0)))
infixX = model.NewParenExpr(model.NewInfixExpr(infixX, "/", model.NewLiteral(360.0)))
infixX = model.NewInfixExpr(infixX, "*",
model.NewFunction("POWER", model.NewLiteral(2), zoomLiteral))
xTile := model.NewFunction("FLOOR", infixX)
toFloatFunLat := model.NewFunction("toFloat64", lat)
radians := model.NewFunction("RADIANS", toFloatFunLat)
radians := model.NewFunction("RADIANS", lat)
tan := model.NewFunction("TAN", radians)
cos := model.NewFunction("COS", radians)
Log := model.NewFunction("LOG", model.NewInfixExpr(tan, "+",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,8 @@ func generateMetricSelectedColumns(ctx context.Context, metricsAggr metricsAggre
// TODO we have create columns according to the schema
latColumn := model.NewGeoLat(colName)
lonColumn := model.NewGeoLon(colName)
castLat := model.NewFunction("CAST", latColumn, model.NewLiteral(fmt.Sprintf("'%s'", "Float")))
castLon := model.NewFunction("CAST", lonColumn, model.NewLiteral(fmt.Sprintf("'%s'", "Float")))
result = append(result, model.NewFunction("avgOrNull", castLat))
result = append(result, model.NewFunction("avgOrNull", castLon))
result = append(result, model.NewFunction("avgOrNull", latColumn))
result = append(result, model.NewFunction("avgOrNull", lonColumn))
result = append(result, model.NewCountFunc())
}
default:
Expand Down
16 changes: 8 additions & 8 deletions platform/testdata/aggregation_requests_2.go
Original file line number Diff line number Diff line change
Expand Up @@ -4559,18 +4559,18 @@ var AggregationTests2 = []AggregationTestCase{
},
ExpectedPancakeSQL: `
SELECT CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 8))
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
AS "aggr__my_buckets__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
8)) AS "aggr__my_buckets__key_2", count(*) AS "aggr__my_buckets__count"
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
AS "aggr__my_buckets__key_2", count(*) AS "aggr__my_buckets__count"
FROM __quesma_table_name
GROUP BY CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 8))
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
AS "aggr__my_buckets__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
8)) AS "aggr__my_buckets__key_2"
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(
RADIANS(__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
AS "aggr__my_buckets__key_2"
LIMIT 10`,
},
{ // [69]
Expand Down
19 changes: 9 additions & 10 deletions platform/testdata/kibana_sample_data_ecommerce.go
Original file line number Diff line number Diff line change
Expand Up @@ -2468,14 +2468,14 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
},
ExpectedPancakeSQL: `
SELECT CAST(5.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("geoip.location"))+180)/360)*POWER(2, 5))
FLOOR(((__quesma_geo_lon("geoip.location")+180)/360)*POWER(2, 5))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))))/PI())/2*POWER(2,
5)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(CAST(__quesma_geo_lat("geoip_location"), 'Float')) AS
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geoip.location")))+(1/COS(RADIANS(
__quesma_geo_lat("geoip.location")))))/PI())/2*POWER(2, 5))
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(__quesma_geo_lat("geoip_location")) AS
"metric__gridSplit__gridCentroid_col_0",
avgOrNull(CAST(__quesma_geo_lon("geoip_location"), 'Float')) AS
avgOrNull(__quesma_geo_lon("geoip_location")) AS
"metric__gridSplit__gridCentroid_col_1",
count(*) AS "metric__gridSplit__gridCentroid_col_2",
sumOrNull("taxful_total_price") AS
Expand All @@ -2484,11 +2484,10 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
WHERE ("geoip.location" IS NOT NULL AND ("order_date">=fromUnixTimestamp64Milli(
1740143222223) AND "order_date"<=fromUnixTimestamp64Milli(1740748022223)))
GROUP BY CAST(5.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("geoip.location"))+180)/360)*POWER(2, 5))
FLOOR(((__quesma_geo_lon("geoip.location")+180)/360)*POWER(2, 5))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))))/PI())/2*POWER(2,
5)) AS "aggr__gridSplit__key_2"`,
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geoip.location")))+(1/COS(RADIANS(
__quesma_geo_lat("geoip.location")))))/PI())/2*POWER(2, 5)) AS "aggr__gridSplit__key_2"`,
},
{ // [13]
TestName: "Orders by Country (request 2/3)",
Expand Down
19 changes: 9 additions & 10 deletions platform/testdata/kibana_sample_data_flights.go
Original file line number Diff line number Diff line change
Expand Up @@ -2500,14 +2500,14 @@ var KibanaSampleDataFlights = []AggregationTestCase{
},
ExpectedPancakeSQL: `
SELECT CAST(7.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 7))
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 7))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
7)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(CAST(__quesma_geo_lat("originlocation"), 'Float')) AS
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 7))
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(__quesma_geo_lat("originlocation")) AS
"metric__gridSplit__gridCentroid_col_0",
avgOrNull(CAST(__quesma_geo_lon("originlocation"), 'Float')) AS
avgOrNull(__quesma_geo_lon("originlocation")) AS
"metric__gridSplit__gridCentroid_col_1",
count(*) AS "metric__gridSplit__gridCentroid_col_2",
sumOrNull("FlightDelayMin") AS
Expand All @@ -2516,11 +2516,10 @@ var KibanaSampleDataFlights = []AggregationTestCase{
WHERE ("OriginLocation" IS NOT NULL AND ("timestamp">=fromUnixTimestamp64Milli(
1740230608853) AND "timestamp"<=fromUnixTimestamp64Milli(1740835408853)))
GROUP BY CAST(7.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 7))
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 7))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
7)) AS "aggr__gridSplit__key_2"`,
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 7)) AS "aggr__gridSplit__key_2"`,
},
{ // [13]
TestName: "Delay Buckets",
Expand Down
19 changes: 9 additions & 10 deletions platform/testdata/kibana_sample_data_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1884,26 +1884,25 @@ var KibanaSampleDataLogs = []AggregationTestCase{
},
ExpectedPancakeSQL: `
SELECT CAST(6.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("geo.coordinates"))+180)/360)*POWER(2, 6))
FLOOR(((__quesma_geo_lon("geo.coordinates")+180)/360)*POWER(2, 6))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))))/PI())/2*POWER(2
, 6)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(CAST(__quesma_geo_lat("geo_coordinates"), 'Float')) AS
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geo.coordinates")))+(1/COS(RADIANS(
__quesma_geo_lat("geo.coordinates")))))/PI())/2*POWER(2, 6))
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
avgOrNull(__quesma_geo_lat("geo_coordinates")) AS
"metric__gridSplit__gridCentroid_col_0",
avgOrNull(CAST(__quesma_geo_lon("geo_coordinates"), 'Float')) AS
avgOrNull(__quesma_geo_lon("geo_coordinates")) AS
"metric__gridSplit__gridCentroid_col_1",
count(*) AS "metric__gridSplit__gridCentroid_col_2",
sumOrNull("bytes") AS "metric__gridSplit__sum_of_bytes_col_0"
FROM __quesma_table_name
WHERE ("geo.coordinates" IS NOT NULL AND ("timestamp">=fromUnixTimestamp64Milli(
1740178800000) AND "timestamp"<=fromUnixTimestamp64Milli(1740831278103)))
GROUP BY CAST(6.000000 AS Float32) AS "aggr__gridSplit__key_0",
FLOOR(((toFloat64(__quesma_geo_lon("geo.coordinates"))+180)/360)*POWER(2, 6))
FLOOR(((__quesma_geo_lon("geo.coordinates")+180)/360)*POWER(2, 6))
AS "aggr__gridSplit__key_1",
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))+(1/
COS(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))))/PI())/2*POWER(2
, 6)) AS "aggr__gridSplit__key_2"`,
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geo.coordinates")))+(1/COS(
RADIANS(__quesma_geo_lat("geo.coordinates")))))/PI())/2*POWER(2, 6)) AS "aggr__gridSplit__key_2"`,
},
{ // [9]
TestName: "Total Requests and Bytes (2/2 request)",
Expand Down

0 comments on commit 3b79092

Please sign in to comment.