Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Need a help with this bnf for sql expressions #2

Open
mingodad opened this issue Aug 2, 2021 · 2 comments
Open

Need a help with this bnf for sql expressions #2

mingodad opened this issue Aug 2, 2021 · 2 comments

Comments

@mingodad
Copy link

mingodad commented Aug 2, 2021

Hello !

I'm testing this tool with a subset of sqlite expressions and it hits maximum recursion too often could someone give some help to improve it ?

I noticed that for terminals it's best to add space inside the strings than to use --separator.

I'm testing it here https://baturin.org/tools/bnfgen/ with:

--max-reductions = 1000000
--max-nonproductive-reductions = 10000
<start> ::= "select ( " <expr> " ) as expr;" ;

<expr>	::=
	<term>
	| "(" <expr> ")"
	| <id>
	#| JOIN_KW
	#| <nm> "." <nm>
	#| <nm> "." <nm> "." <nm>
	#| VARIABLE
	| <expr> " collate " <ids>
	| " CAST" "(" <expr> " as " <typetoken> ")"
	| <id> "(" <distinct>{0,1} <exprlist> ")"
	| <id> "(" "*" ")"
	#| <id> "(" distinct{0,1} <exprlist> ")" filter_over
	#| <id> "(" "*" ")" filter_over
	| "(" <nexprlist> "," <expr> ")"
	| <expr> <binary_op> <expr>
	#| <expr> likeop <expr>
	#| <expr> likeop <expr> " ESCAPE " <expr>
	| <expr> <suffix_op>
	| <unary_op> <expr>
	| <expr> <between_op> <expr> " AND " <expr>
	| <expr> <in_op> "(" <exprlist> ")"
	| "(" <select> ")"
	| <expr> <in_op> "(" <select> ")"
	#| <expr> <in_op> nm dbnm paren_exprlist
	| " EXISTS" "(" <select> ")"
	| " CASE " <expr>{0,1} <case_exprlist> <case_else>{0,1} " END "
	#| " RAISE" "(" "IGNORE" ")"
	#| " RAISE" "(" raisetype "," nm ")"
	;

<exprlist>	::=
	<nexprlist>
	;

<nexprlist>	::=
	<nexprlist> "," <expr>
	| <expr>
	;

<paren_exprlist>	::=
	#/* empty */
	"(" <exprlist>{0,1} ")"
	;

<term>	::=
	" NULL "
	| <number>
	#| BLOB
	| <STRING>
	#| CTIME_KW
	;

<binary_op> ::=
	<logical_op>
	| <comparison_op>
	| <equality_op>
	| <bitwise_op>
	| <add_op>
	| <mul_op>
	| " || " #CONCAT
	| " IS "
	| " IS NOT "
	;

<comparison_op> ::=
	" < " | " > " | " >= " | " <= "
	;

<equality_op> ::=
	" = " | " <> "
	;

<bitwise_op> ::=
	" & " | " | " | " << " | " >> "
	;

<add_op> ::=
	" + " | " - "
	;

<mul_op> ::=
	" * " | " / " | " % "
	;

<logical_op> ::=
	" AND " | " OR "
	;

<unary_op> ::=
	" NOT "
	| " !"
	| " +"
	| " -"
	;

<suffix_op> ::=
	" NOT NULL "
	| " ISNULL "
	| " NOTNULL "
	;

<STRING> ::= "'str'";

<id> ::=
	"ab"
	| "cd"
	| "ef"
	| "gh"
	;

<nm> ::= <id>
	| <STRING>
	#| JOIN_KW
	;

<ids> ::=
	<id> | <STRING>
	;

<distinct>	::=
	" DISTINCT "
	| " ALL "
	;

<in_op> ::=
	" IN "
	| " NOT IN "
	;

<between_op> ::=
	" BETWEEN "
	| " NOT BETWEEN "
	;

<typetoken>	::=	#/* empty */
	<typename>
	| <typename> "(" <signed> ")"
	| <typename> "(" <signed> "," <signed> ")"
	;

<typename>	::=
	<ids>
	| <typename> <ids>
	;

<signed>	::=
	<plus_num>
	| <minus_num>
	;

<plus_num> ::=
	" +"{0,1} <number>
	;

<minus_num> ::=
	" -" <number>
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;

<select> ::=
	" select 1 as one "
	;

<case_exprlist>	::=
	<case_exprlist> " WHEN " <expr> " THEN " <expr>
	| " WHEN " <expr> " THEN " <expr>
	;

<case_else>	::=
	" ELSE " <expr>
	;

Cheers !

@mingodad
Copy link
Author

mingodad commented Aug 2, 2021

Here is a basic expression that gives good results most of the time:

<start> ::= <expr> ;

<expr> ::=
	<expr> <operations> <expr>
	| "(" <expr> ")"
	| <number>
	;

<operations> ::=
	"+" | "-" | "*" | "/" | "%"
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{1, 3};
<float> ::= <nzdigit>{1, 3} "." <digit>{1, 3};
<number> ::= <integer> | <float>;

@mingodad
Copy link
Author

mingodad commented Aug 3, 2021

Here is a simple C program for the simple expr grammar to help understand one possible simple implementation of it:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

/*
<start> ::= <expr> ;

<expr> ::=
	<expr> <operations> <expr>
	| "(" <expr> ")"
	| <number>
	;

<operations> ::=
	"+" | "-" | "*" | "/" | "%"
	;

<nzdigit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
<digit> ::= "0" | <nzdigit> ;

<integer> ::= <nzdigit> <digit>{0, 3};
<float> ::= <integer> "." <integer>;
<number> ::= <integer> | <float>;
*/

static int xrandom(int low, int up) {
	double r = (double)rand() * (1.0 / ((double)RAND_MAX + 1.0));
	r *= (double)(up - low) + 1.0;
	return ((int)r + low);
}

static int max_xcount = 2048;
static int xcount = 0;
static void xoutput(const char *str) {
	while(*str) {
		putc(*str, stdout);
		++xcount;
		++str;
	}
}


void expr_nzdigit() {
	switch(xrandom(1,9)) {
		case 1: xoutput("1"); break;
		case 2: xoutput("2"); break;
		case 3: xoutput("3"); break;
		case 4: xoutput("4"); break;
		case 5: xoutput("5"); break;
		case 6: xoutput("6"); break;
		case 7: xoutput("7"); break;
		case 8: xoutput("8"); break;
		case 9: xoutput("9"); break;
	}
}

void expr_digit() {
	switch(xrandom(0,9)) {
		case 0: xoutput("0"); break;
		case 1: xoutput("1"); break;
		case 2: xoutput("2"); break;
		case 3: xoutput("3"); break;
		case 4: xoutput("4"); break;
		case 5: xoutput("5"); break;
		case 6: xoutput("6"); break;
		case 7: xoutput("7"); break;
		case 8: xoutput("8"); break;
		case 9: xoutput("9"); break;
	}
}

void expr_integer() {
	expr_nzdigit();
	int imax = xrandom(0,3);
	for(int i=0; i < imax; ++i) expr_digit();
}

void expr_float() {
	expr_integer();
	xoutput(".");
	expr_integer();
}

void expr_number() {
	switch(xrandom(0,1)) {
		case 0: expr_integer(); break;
		case 1: expr_float(); break;
	}
}

void expr_operations() {
	switch(xrandom(0,4)) {
		case 0: xoutput("+"); break;
		case 1: xoutput("-"); break;
		case 2: xoutput("*"); break;
		case 3: xoutput("/"); break;
		case 4: xoutput("%"); break;
	}
}

void expr_expr() {
	if(xcount > max_xcount) return;
	switch(xrandom(0,2)) {
		case 0: expr_expr(); expr_operations(); expr_expr(); break;
		case 1: xoutput("("); expr_expr(); xoutput(")"); break;
		case 2: expr_number(); break;
	}
}

void expr_start() {
	xcount = 0;
	srand(clock());
	expr_expr();
	xoutput("\n");
	if(xcount > max_xcount)
		printf("Output exceeds limit of %d\n", max_xcount);
}

/*
Accpets one integer parameter to limit the output
*/
int main(int argc, char *argv[]) {
	if(argc > 1) {
		max_xcount = atoi(argv[1]);
	}
	expr_start();
	return 0;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant