-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add tests of lexer dialects (sqlparse, sqlfluff)
This PR adds tests of our ports of dialects from sqlparse and sqlfluff. The test files are the extracted SQL queries from the tests of sqlparse and sqlfluff. The expected outputs (tokens) are collected by running the queries through the original lexers of sqlparse and sqlfluff. By doing this, we get a really good test coverage (>10k SQL queries, many edge case/"strange"/"tricky" SQL queries collected) and we can be almost certain that our implementation behaves identically to the original sqlparse/sqlfluff implementations. The test files are stored in a separate repo (added to Quesma as a Git submodule), because they are large (~10MB).
- Loading branch information
1 parent
199bcea
commit dbf833f
Showing
5 changed files
with
149 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "platform/parsers/sql/testdata"] | ||
path = platform/parsers/sql/testdata | ||
url = https://github.com/avelanarius/quesma-testdata-wip.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// Copyright Quesma, licensed under the Elastic License 2.0. | ||
// SPDX-License-Identifier: Elastic-2.0 | ||
|
||
package testutils | ||
|
||
import ( | ||
"bytes" | ||
"os" | ||
) | ||
|
||
type ParsedTestcase struct { | ||
Query string | ||
ExpectedTokens []ExpectedToken | ||
} | ||
|
||
type ExpectedToken struct { | ||
TokenType string | ||
TokenValue string | ||
} | ||
|
||
// Loads a list of test queries and their expected tokens (extracted from existing parsers). | ||
// The structure of the file is as follows: | ||
// | ||
// [QUERY1] | ||
// <end_of_query/> | ||
// [TOKEN_TYPE_1] | ||
// [TOKEN_VALUE_1] | ||
// <end_of_token/> | ||
// [TOKEN_TYPE_2] | ||
// [TOKEN_VALUE_2] | ||
// <end_of_token/> | ||
// ... | ||
// <end_of_tokens/> | ||
// [QUERY2] | ||
// ... | ||
func LoadParsedTestcases(filename string) []ParsedTestcase { | ||
contents, err := os.ReadFile(filename) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
testcases := bytes.Split(contents, []byte("\n<end_of_tokens/>\n")) | ||
testcases = testcases[:len(testcases)-1] | ||
|
||
var parsedTestcases []ParsedTestcase | ||
for _, testcase := range testcases { | ||
endOfQuerySplit := bytes.Split(testcase, []byte("\n<end_of_query/>\n")) | ||
|
||
query := string(endOfQuerySplit[0]) | ||
|
||
tokens := bytes.Split(endOfQuerySplit[1], []byte("\n<end_of_token/>\n")) | ||
tokens = tokens[:len(tokens)-1] | ||
|
||
var expectedTokens []ExpectedToken | ||
for _, tokenDescription := range tokens { | ||
tokenDescriptionSplit := bytes.SplitN(tokenDescription, []byte("\n"), 2) | ||
tokenType := string(tokenDescriptionSplit[0]) | ||
tokenValue := string(tokenDescriptionSplit[1]) | ||
expectedTokens = append(expectedTokens, ExpectedToken{tokenType, tokenValue}) | ||
} | ||
|
||
parsedTestcases = append(parsedTestcases, ParsedTestcase{query, expectedTokens}) | ||
} | ||
return parsedTestcases | ||
} |