From 67fcf12104fc348f06541788b93087c3bc663c1a Mon Sep 17 00:00:00 2001 From: bowen xiao Date: Fri, 16 Aug 2024 12:42:04 -0700 Subject: [PATCH] refactor custom string query for pinot_query_validator (#6230) * Bug fix: isCron return error * Refactor attr string query * change unit test * upgrade pinot to 1.3.0 * upgrade pinot version to latest --- common/persistence/pinot/pinot_visibility_store_test.go | 8 ++++---- common/pinot/pinotQueryValidator.go | 4 ++-- common/pinot/pinotQueryValidator_test.go | 8 ++++---- docker/buildkite/docker-compose-local-pinot.yml | 6 +++--- docker/buildkite/docker-compose-pinot.yml | 6 +++--- docker/docker-compose-pinot.yml | 6 +++--- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/common/persistence/pinot/pinot_visibility_store_test.go b/common/persistence/pinot/pinot_visibility_store_test.go index b6b0e506fed..1e82f1a5505 100644 --- a/common/persistence/pinot/pinot_visibility_store_test.go +++ b/common/persistence/pinot/pinot_visibility_store_test.go @@ -1210,7 +1210,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String and or order by*')) +AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String and or order by.*'')') Order BY StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1228,7 +1228,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND ((JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*')) or (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'and*'))) +AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*Or.*'')') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*and.*'')')) Order by StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1246,7 +1246,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND WorkflowID = 'wid' and ((JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*')) or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10)) +AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10)) Order BY StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1300,7 +1300,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String field is for text*')) +AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String field is for text.*'')') Order by DomainID Desc LIMIT 11, 10 `, testTableName), diff --git a/common/pinot/pinotQueryValidator.go b/common/pinot/pinotQueryValidator.go index dc04cbcd82a..003eda7555d 100644 --- a/common/pinot/pinotQueryValidator.go +++ b/common/pinot/pinotQueryValidator.go @@ -466,8 +466,8 @@ func processCustomString(comparisonExpr *sqlparser.ComparisonExpr, colNameStr st "AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '^$'))", colNameStr, colNameStr) } - return fmt.Sprintf("(JSON_MATCH(Attr, '\"$.%s\" is not null') "+ - "AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '%s*'))", colNameStr, colNameStr, colValStr) + return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+ + "AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''.*%s.*'')')", colNameStr, colNameStr, colValStr) } func trimTimeFieldValueFromNanoToMilliSeconds(original *sqlparser.SQLVal) (*sqlparser.SQLVal, error) { diff --git a/common/pinot/pinotQueryValidator_test.go b/common/pinot/pinotQueryValidator_test.go index 1f6a5c464f3..2e28e517458 100644 --- a/common/pinot/pinotQueryValidator_test.go +++ b/common/pinot/pinotQueryValidator_test.go @@ -55,7 +55,7 @@ func TestValidateQuery(t *testing.T) { err: "right comparison is invalid: &{ wid { }}"}, "Case3-1: query with custom field": { query: "CustomStringField = 'custom'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom*'))", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')", }, "Case3-2: query with custom field value is empty": { query: "CustomStringField = ''", @@ -63,7 +63,7 @@ func TestValidateQuery(t *testing.T) { }, "Case4: custom field query with or in string": { query: "CustomStringField='Or'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*'))", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*Or.*'')')", }, "Case5: custom keyword field query": { query: "CustomKeywordField = 'custom'", @@ -71,7 +71,7 @@ func TestValidateQuery(t *testing.T) { }, "Case6-1: complex query I: with parenthesis": { query: "(CustomStringField = 'custom and custom2 or custom3 order by') or CustomIntField between 1 and 10", - validated: "((JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*')) or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", }, "Case6-2: complex query II: with only system keys": { query: "DomainID = 'd-id' and (RunID = 'run-id' or WorkflowID = 'wid')", @@ -83,7 +83,7 @@ func TestValidateQuery(t *testing.T) { }, "Case6-4: complex query IV": { query: "WorkflowID = 'wid' and (CustomStringField = 'custom and custom2 or custom3 order by' or CustomIntField between 1 and 10)", - validated: "WorkflowID = 'wid' and ((JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*')) or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", + validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", }, "Case6-5: complex query with partial match": { query: "RunID like '123' or WorkflowID like '123'", diff --git a/docker/buildkite/docker-compose-local-pinot.yml b/docker/buildkite/docker-compose-local-pinot.yml index 0a835c87d5b..f0ccf4f242f 100644 --- a/docker/buildkite/docker-compose-local-pinot.yml +++ b/docker/buildkite/docker-compose-local-pinot.yml @@ -82,7 +82,7 @@ services: - zookeeper pinot-controller: - image: apachepinot/pinot:0.12.1 + image: apachepinot/pinot:1.3.0 command: "StartController -zkAddress zookeeper:2181 -controllerPort 9001" container_name: pinot-controller restart: unless-stopped @@ -97,7 +97,7 @@ services: aliases: - pinot-controller pinot-broker: - image: apachepinot/pinot:0.12.1 + image: apachepinot/pinot:1.3.0 command: "StartBroker -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-broker" @@ -112,7 +112,7 @@ services: aliases: - pinot-broker pinot-server: - image: apachepinot/pinot:0.12.1 + image: apachepinot/pinot:1.3.0 command: "StartServer -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-server" diff --git a/docker/buildkite/docker-compose-pinot.yml b/docker/buildkite/docker-compose-pinot.yml index 3c24e5b023d..354e5dd3667 100644 --- a/docker/buildkite/docker-compose-pinot.yml +++ b/docker/buildkite/docker-compose-pinot.yml @@ -82,7 +82,7 @@ services: - zookeeper pinot-controller: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:latest command: "StartController -zkAddress zookeeper:2181 -controllerPort 9001" container_name: pinot-controller restart: unless-stopped @@ -97,7 +97,7 @@ services: aliases: - pinot-controller pinot-broker: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:latest command: "StartBroker -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-broker" @@ -112,7 +112,7 @@ services: aliases: - pinot-broker pinot-server: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:latest command: "StartServer -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-server" diff --git a/docker/docker-compose-pinot.yml b/docker/docker-compose-pinot.yml index ea696b9e148..6ec71c7f7d6 100644 --- a/docker/docker-compose-pinot.yml +++ b/docker/docker-compose-pinot.yml @@ -40,7 +40,7 @@ services: ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_TICK_TIME: 2000 pinot-controller: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:1.3.0 command: "StartController -zkAddress zookeeper:2181" container_name: pinot-controller restart: unless-stopped @@ -51,7 +51,7 @@ services: depends_on: - zookeeper pinot-broker: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:1.3.0 command: "StartBroker -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-broker" @@ -62,7 +62,7 @@ services: depends_on: - pinot-controller pinot-server: - image: apachepinot/pinot:1.1.0 + image: apachepinot/pinot:1.3.0 command: "StartServer -zkAddress zookeeper:2181" restart: unless-stopped container_name: "pinot-server"