@@ -258,7 +258,9 @@ describe('html', () => {
258258} )
259259
260260describe ( 'getSegments' , ( ) => {
261- it ( 'numbers and operators' , ( ) => {
261+ // Test that we can parse all forms of numbers.
262+ // See https://dev.mysql.com/doc/refman/8.0/en/number-literals.html for the syntax (at least for MYSQL).
263+ it ( 'numbers' , ( ) => {
262264 expect ( getSegments ( '34 - -.5 + +0.5 * 1.23E45 / 4E-3' ) )
263265 . toStrictEqual ( [
264266 { name : 'number' , content : '34' } ,
@@ -281,6 +283,219 @@ describe('getSegments', () => {
281283 ] )
282284 } )
283285
286+ // Test that we can parse the non-logical operators, i.e. +, -, <>, etc. but not AND, OR, etc.
287+ //
288+ // All the non-logical operators are parsed into "special" segments, although the converse isn't true,
289+ // because ",", ";", ":", and "." are also parsed as "special" segments.
290+ // The logical operators like AND and BETWEEN are parsed as "keyword" segments.
291+ //
292+ // In particular, this describe() block tests that:
293+ //
294+ // * All non-logical operators listed at https://www.w3schools.com/sql/sql_operators.asp etc. are recognized.
295+ // * Multi-character operators like >= are parsed as a single segment, even though > and = are both operators too.
296+ // * Minus and dot are treated as part of a number when they are next to a digit, ex: "x > -5" or even "x>-5".
297+ // * Minus is treated as a binary operator when there are spaces around it, ex: "x - 5".
298+ //
299+ // Conversely, it avoids testing strings like "x-5" because our regex-lexer architecture isn't
300+ // sophisticated enough to realize the minus must be a binary operator.
301+ describe ( 'non-logical operators' , ( ) => {
302+ it ( 'arithmetic' , ( ) => {
303+ expect ( getSegments ( 'a + 1 - -.2 * 34 /.56 % 7' ) )
304+ . toStrictEqual ( [
305+ { name : 'identifier' , content : 'a' } ,
306+ { name : 'whitespace' , content : ' ' } ,
307+ { name : 'special' , content : '+' } ,
308+ { name : 'whitespace' , content : ' ' } ,
309+ { name : 'number' , content : '1' } ,
310+ { name : 'whitespace' , content : ' ' } ,
311+ { name : 'special' , content : '-' } ,
312+ { name : 'whitespace' , content : ' ' } ,
313+ { name : 'number' , content : '-.2' } ,
314+ { name : 'whitespace' , content : ' ' } ,
315+ { name : 'special' , content : '*' } ,
316+ { name : 'whitespace' , content : ' ' } ,
317+ { name : 'number' , content : '34' } ,
318+ { name : 'whitespace' , content : ' ' } ,
319+ { name : 'special' , content : '/' } ,
320+ { name : 'number' , content : '.56' } ,
321+ { name : 'whitespace' , content : ' ' } ,
322+ { name : 'special' , content : '%' } ,
323+ { name : 'whitespace' , content : ' ' } ,
324+ { name : 'number' , content : '7' }
325+ ] )
326+ } )
327+
328+ it ( 'bitwise' , ( ) => {
329+ expect ( getSegments ( 'a & 8 | 9 ^b>>c<<d' ) )
330+ . toStrictEqual ( [
331+ { name : 'identifier' , content : 'a' } ,
332+ { name : 'whitespace' , content : ' ' } ,
333+ { name : 'special' , content : '&' } ,
334+ { name : 'whitespace' , content : ' ' } ,
335+ { name : 'number' , content : '8' } ,
336+ { name : 'whitespace' , content : ' ' } ,
337+ { name : 'special' , content : '|' } ,
338+ { name : 'whitespace' , content : ' ' } ,
339+ { name : 'number' , content : '9' } ,
340+ { name : 'whitespace' , content : ' ' } ,
341+ { name : 'special' , content : '^' } ,
342+ { name : 'identifier' , content : 'b' } ,
343+ { name : 'special' , content : '>>' } ,
344+ { name : 'identifier' , content : 'c' } ,
345+ { name : 'special' , content : '<<' } ,
346+ { name : 'identifier' , content : 'd' }
347+ ] )
348+ } )
349+
350+ it ( 'single character comparison' , ( ) => {
351+ expect ( getSegments ( 'a = b' ) )
352+ . toStrictEqual ( [
353+ { name : 'identifier' , content : 'a' } ,
354+ { name : 'whitespace' , content : ' ' } ,
355+ { name : 'special' , content : '=' } ,
356+ { name : 'whitespace' , content : ' ' } ,
357+ { name : 'identifier' , content : 'b' }
358+ ] )
359+ expect ( getSegments ( 'a > b' ) )
360+ . toStrictEqual ( [
361+ { name : 'identifier' , content : 'a' } ,
362+ { name : 'whitespace' , content : ' ' } ,
363+ { name : 'special' , content : '>' } ,
364+ { name : 'whitespace' , content : ' ' } ,
365+ { name : 'identifier' , content : 'b' }
366+ ] )
367+ expect ( getSegments ( 'a<b' ) )
368+ . toStrictEqual ( [
369+ { name : 'identifier' , content : 'a' } ,
370+ { name : 'special' , content : '<' } ,
371+ { name : 'identifier' , content : 'b' }
372+ ] )
373+ } )
374+
375+ it ( 'multi character comparison' , ( ) => {
376+ expect ( getSegments ( 'a>=-5' ) )
377+ . toStrictEqual ( [
378+ { name : 'identifier' , content : 'a' } ,
379+ { name : 'special' , content : '>=' } ,
380+ { name : 'number' , content : '-5' }
381+ ] )
382+ expect ( getSegments ( 'a <= b' ) )
383+ . toStrictEqual ( [
384+ { name : 'identifier' , content : 'a' } ,
385+ { name : 'whitespace' , content : ' ' } ,
386+ { name : 'special' , content : '<=' } ,
387+ { name : 'whitespace' , content : ' ' } ,
388+ { name : 'identifier' , content : 'b' }
389+ ] )
390+ expect ( getSegments ( 'a!=.5' ) )
391+ . toStrictEqual ( [
392+ { name : 'identifier' , content : 'a' } ,
393+ { name : 'special' , content : '!=' } ,
394+ { name : 'number' , content : '.5' }
395+ ] )
396+ expect ( getSegments ( 'a!<b' ) )
397+ . toStrictEqual ( [
398+ { name : 'identifier' , content : 'a' } ,
399+ { name : 'special' , content : '!<' } ,
400+ { name : 'identifier' , content : 'b' }
401+ ] )
402+ expect ( getSegments ( 'a!>b' ) )
403+ . toStrictEqual ( [
404+ { name : 'identifier' , content : 'a' } ,
405+ { name : 'special' , content : '!>' } ,
406+ { name : 'identifier' , content : 'b' }
407+ ] )
408+ } )
409+
410+ it ( 'compound operators' , ( ) => {
411+ expect ( getSegments ( 'UPDATE STUDENTS SET MARKS+=10,A-=5,B*=6,C/=7,D%=8,E&=F,G^-=H,I|*=J WHERE MARKS<85;' ) )
412+ . toStrictEqual ( [
413+ { name : 'keyword' , content : 'UPDATE' } ,
414+ { name : 'whitespace' , content : ' ' } ,
415+ { name : 'identifier' , content : 'STUDENTS' } ,
416+ { name : 'whitespace' , content : ' ' } ,
417+ { name : 'keyword' , content : 'SET' } ,
418+ { name : 'whitespace' , content : ' ' } ,
419+ { name : 'identifier' , content : 'MARKS' } ,
420+ { name : 'special' , content : '+=' } ,
421+ { name : 'number' , content : '10' } ,
422+ { name : 'special' , content : ',' } ,
423+ { name : 'identifier' , content : 'A' } ,
424+ { name : 'special' , content : '-=' } ,
425+ { name : 'number' , content : '5' } ,
426+ { name : 'special' , content : ',' } ,
427+ { name : 'identifier' , content : 'B' } ,
428+ { name : 'special' , content : '*=' } ,
429+ { name : 'number' , content : '6' } ,
430+ { name : 'special' , content : ',' } ,
431+ { name : 'identifier' , content : 'C' } ,
432+ { name : 'special' , content : '/=' } ,
433+ { name : 'number' , content : '7' } ,
434+ { name : 'special' , content : ',' } ,
435+ { name : 'identifier' , content : 'D' } ,
436+ { name : 'special' , content : '%=' } ,
437+ { name : 'number' , content : '8' } ,
438+ { name : 'special' , content : ',' } ,
439+ { name : 'identifier' , content : 'E' } ,
440+ { name : 'special' , content : '&=' } ,
441+ { name : 'identifier' , content : 'F' } ,
442+ { name : 'special' , content : ',' } ,
443+ { name : 'identifier' , content : 'G' } ,
444+ { name : 'special' , content : '^-=' } ,
445+ { name : 'identifier' , content : 'H' } ,
446+ { name : 'special' , content : ',' } ,
447+ { name : 'identifier' , content : 'I' } ,
448+ { name : 'special' , content : '|*=' } ,
449+ { name : 'identifier' , content : 'J' } ,
450+ { name : 'whitespace' , content : ' ' } ,
451+ { name : 'keyword' , content : 'WHERE' } ,
452+ { name : 'whitespace' , content : ' ' } ,
453+ { name : 'identifier' , content : 'MARKS' } ,
454+ { name : 'special' , content : '<' } ,
455+ { name : 'number' , content : '85' } ,
456+ { name : 'special' , content : ';' }
457+ ] )
458+ } )
459+ } )
460+
461+ it ( 'other special characters' , ( ) => {
462+ expect ( getSegments ( 'select foo.a, foo.b from foo;' ) )
463+ . toStrictEqual ( [
464+ { name : 'keyword' , content : 'select' } ,
465+ { name : 'whitespace' , content : ' ' } ,
466+ { name : 'identifier' , content : 'foo' } ,
467+ { name : 'special' , content : '.' } ,
468+ { name : 'identifier' , content : 'a' } ,
469+ { name : 'special' , content : ',' } ,
470+ { name : 'whitespace' , content : ' ' } ,
471+ { name : 'identifier' , content : 'foo' } ,
472+ { name : 'special' , content : '.' } ,
473+ { name : 'identifier' , content : 'b' } ,
474+ { name : 'whitespace' , content : ' ' } ,
475+ { name : 'keyword' , content : 'from' } ,
476+ { name : 'whitespace' , content : ' ' } ,
477+ { name : 'identifier' , content : 'foo' } ,
478+ { name : 'special' , content : ';' }
479+ ] )
480+ expect ( getSegments ( 'INSERT INTO MyTable (ID) VALUES (:myId)' ) )
481+ . toStrictEqual ( [
482+ { name : 'keyword' , content : 'INSERT INTO' } ,
483+ { name : 'whitespace' , content : ' ' } ,
484+ { name : 'function' , content : 'MyTable' } ,
485+ { name : 'whitespace' , content : ' ' } ,
486+ { name : 'bracket' , content : '(' } ,
487+ { name : 'identifier' , content : 'ID' } ,
488+ { name : 'bracket' , content : ')' } ,
489+ { name : 'whitespace' , content : ' ' } ,
490+ { name : 'keyword' , content : 'VALUES' } ,
491+ { name : 'whitespace' , content : ' ' } ,
492+ { name : 'bracket' , content : '(' } ,
493+ { name : 'special' , content : ':' } ,
494+ { name : 'identifier' , content : 'myId' } ,
495+ { name : 'bracket' , content : ')' }
496+ ] )
497+ } )
498+
284499 it ( 'complex query' , ( ) => {
285500 expect ( getSegments ( "SELECT COUNT(id), `id`, `username` FROM `users` WHERE `email` = 'test@example.com' AND `foo` = 'BAR' OR 1=1" ) )
286501 . toStrictEqual ( [
0 commit comments