Skip to content

Commit

Permalink
Add tests of lexer dialects (sqlparse, sqlfluff)
Browse files Browse the repository at this point in the history
This PR adds tests of our ports of dialects from sqlparse and sqlfluff.

The test files are the extracted SQL queries from the tests of sqlparse
and sqlfluff. The expected outputs (tokens) are collected by running
the queries through the original lexers of sqlparse and sqlfluff.

By doing this, we get a really good test coverage (>10k SQL queries,
many edge case/"strange"/"tricky" SQL queries collected) and we can
be almost certain that our implementation behaves identically to the
original sqlparse/sqlfluff implementations.

The test files are stored in a separate repo (added to Quesma as a Git
submodule), because they are large (~10MB).
  • Loading branch information
avelanarius committed Feb 28, 2025
1 parent 199bcea commit dbf833f
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "platform/parsers/sql/testdata"]
path = platform/parsers/sql/testdata
url = https://github.com/avelanarius/quesma-testdata-wip.git
40 changes: 39 additions & 1 deletion platform/parsers/sql/lexer/dialect_sqlparse/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
package dialect_sqlparse

import (
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
"testing"

"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/testutils"

"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -38,6 +40,42 @@ func TestSimpleSelect(t *testing.T) {
assert.Equal(t, "tabela", tokens[6].RawValue)
}

func TestSqlparsedTestcases(t *testing.T) {
testfiles := []string{
"../../testdata/testdata/dialect_sqlparse/parsed-sqlparse-testcases.txt",
"../../testdata/testdata/dialect_sqlparse/parsed-sqlfluff-all-testcases.txt",
}

for _, testfile := range testfiles {
t.Run(testfile, func(t *testing.T) {
testcases := testutils.LoadParsedTestcases(testfile)
for _, testcase := range testcases {
t.Run(testcase.Query, func(t *testing.T) {
tokens := core.Lex(testcase.Query, SqlparseRules)
assert.Equal(t, len(testcase.ExpectedTokens), len(tokens))

commonLength := min(len(testcase.ExpectedTokens), len(tokens))

for i := 0; i < commonLength; i++ {
assert.Equalf(t, testcase.ExpectedTokens[i].TokenType, tokens[i].Type.Name, "Token type at position %d", i)
assert.Equalf(t, testcase.ExpectedTokens[i].TokenValue, tokens[i].RawValue, "Token value at position %d", i)
}

if t.Failed() {
for i := 0; i < commonLength; i++ {
if testcase.ExpectedTokens[i].TokenType != tokens[i].Type.Name || testcase.ExpectedTokens[i].TokenValue != tokens[i].RawValue {
t.Logf("Mismatch token at position %d: %s(%s). Got: %s(%s)", i, testcase.ExpectedTokens[i].TokenType, testcase.ExpectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
} else {
t.Logf("Expected token at position %d: %s(%s). Got: %s(%s)", i, testcase.ExpectedTokens[i].TokenType, testcase.ExpectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
}
}
}
})
}
})
}
}

func FuzzLex(f *testing.F) {
f.Add("SELECT * FROM tabela")
f.Add("SELECT id, name, email FROM customers WHERE age > 21")
Expand Down
42 changes: 41 additions & 1 deletion platform/parsers/sql/lexer/dialects_sqlfluff/ansi/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,51 @@
package ansi

import (
"testing"

"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/testutils"
"github.com/stretchr/testify/assert"
"testing"
)

func TestSqlfluffAnsiTestcases(t *testing.T) {
testfiles := []string{
"../../../testdata/testdata/dialects_sqlfluff/parsed-sqlfluff-ansi-testcases.txt",
"../../../testdata/testdata/dialects_sqlfluff/parsed-sqlparse-testcases.txt",
}

for _, testfile := range testfiles {
t.Run(testfile, func(t *testing.T) {
testcases := testutils.LoadParsedTestcases(testfile)
for _, testcase := range testcases {
t.Run(testcase.Query, func(t *testing.T) {
expectedTokens := testcase.ExpectedTokens[:len(testcase.ExpectedTokens)-1] // remove the last token, which is an EOF token

tokens := core.Lex(testcase.Query, SqlfluffAnsiRules)
assert.Equal(t, len(expectedTokens), len(tokens))

commonLength := min(len(expectedTokens), len(tokens))

for i := 0; i < commonLength; i++ {
assert.Equalf(t, expectedTokens[i].TokenType, tokens[i].Type.Name, "Token type at position %d", i)
assert.Equalf(t, expectedTokens[i].TokenValue, tokens[i].RawValue, "Token value at position %d", i)
}

if t.Failed() {
for i := 0; i < commonLength; i++ {
if expectedTokens[i].TokenType != tokens[i].Type.Name || expectedTokens[i].TokenValue != tokens[i].RawValue {
t.Logf("Mismatch token at position %d: %s(%s). Got: %s(%s)", i, expectedTokens[i].TokenType, expectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
} else {
t.Logf("Expected token at position %d: %s(%s). Got: %s(%s)", i, expectedTokens[i].TokenType, expectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
}
}
}
})
}
})
}
}

func FuzzLex(f *testing.F) {
f.Add("SELECT * FROM tabela")
f.Add("SELECT id, name, email FROM customers WHERE age > 21")
Expand Down
65 changes: 65 additions & 0 deletions platform/parsers/sql/lexer/testutils/testdata_loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright Quesma, licensed under the Elastic License 2.0.
// SPDX-License-Identifier: Elastic-2.0

package testutils

import (
"bytes"
"os"
)

type ParsedTestcase struct {
Query string
ExpectedTokens []ExpectedToken
}

type ExpectedToken struct {
TokenType string
TokenValue string
}

// Loads a list of test queries and their expected tokens (extracted from existing parsers).
// The structure of the file is as follows:
//
// [QUERY1]
// <end_of_query/>
// [TOKEN_TYPE_1]
// [TOKEN_VALUE_1]
// <end_of_token/>
// [TOKEN_TYPE_2]
// [TOKEN_VALUE_2]
// <end_of_token/>
// ...
// <end_of_tokens/>
// [QUERY2]
// ...
func LoadParsedTestcases(filename string) []ParsedTestcase {
contents, err := os.ReadFile(filename)
if err != nil {
panic(err)
}

testcases := bytes.Split(contents, []byte("\n<end_of_tokens/>\n"))
testcases = testcases[:len(testcases)-1]

var parsedTestcases []ParsedTestcase
for _, testcase := range testcases {
endOfQuerySplit := bytes.Split(testcase, []byte("\n<end_of_query/>\n"))

query := string(endOfQuerySplit[0])

tokens := bytes.Split(endOfQuerySplit[1], []byte("\n<end_of_token/>\n"))
tokens = tokens[:len(tokens)-1]

var expectedTokens []ExpectedToken
for _, tokenDescription := range tokens {
tokenDescriptionSplit := bytes.SplitN(tokenDescription, []byte("\n"), 2)
tokenType := string(tokenDescriptionSplit[0])
tokenValue := string(tokenDescriptionSplit[1])
expectedTokens = append(expectedTokens, ExpectedToken{tokenType, tokenValue})
}

parsedTestcases = append(parsedTestcases, ParsedTestcase{query, expectedTokens})
}
return parsedTestcases
}
1 change: 1 addition & 0 deletions platform/parsers/sql/testdata
Submodule testdata added at e7995a

0 comments on commit dbf833f

Please sign in to comment.