Skip to content

Commit 2c6577d

Browse files
mathilde.oustlantandialbrecht
authored andcommitted
Added HQL Keywords
1 parent 0008b97 commit 2c6577d

File tree

1 file changed

+97
-2
lines changed

1 file changed

+97
-2
lines changed

sqlparse/keywords.py

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def is_keyword(value):
1616
return (KEYWORDS_COMMON.get(val)
1717
or KEYWORDS_ORACLE.get(val)
1818
or KEYWORDS_PLPGSQL.get(val)
19+
or KEYWORDS_HQL.get(val)
1920
or KEYWORDS.get(val, tokens.Name)), value
2021

2122

@@ -63,8 +64,8 @@ def is_keyword(value):
6364
(r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func
6465
(r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
6566
(r'-?\d*(\.\d+)?E-?\d+', tokens.Number.Float),
66-
(r'-?(\d+(\.\d*)|\.\d+)', tokens.Number.Float),
67-
(r'-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
67+
(r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float),
68+
(r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
6869
(r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
6970
# not a real string literal in ANSI SQL:
7071
(r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
@@ -82,6 +83,7 @@ def is_keyword(value):
8283
(r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
8384
(r'GROUP\s+BY\b', tokens.Keyword),
8485
(r'ORDER\s+BY\b', tokens.Keyword),
86+
(r'(LATERAL\s+VIEW\s+)(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', tokens.Keyword),
8587

8688
(r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword),
8789

@@ -634,26 +636,39 @@ def is_keyword(value):
634636
'DATE': tokens.Name.Builtin,
635637
'DEC': tokens.Name.Builtin,
636638
'DECIMAL': tokens.Name.Builtin,
639+
'FILE_TYPE': tokens.Name.Builtin,
637640
'FLOAT': tokens.Name.Builtin,
638641
'INT': tokens.Name.Builtin,
639642
'INT8': tokens.Name.Builtin,
640643
'INTEGER': tokens.Name.Builtin,
641644
'INTERVAL': tokens.Name.Builtin,
642645
'LONG': tokens.Name.Builtin,
646+
'NATURALN': tokens.Name.Builtin,
647+
'NVARCHAR': tokens.Name.Builtin,
643648
'NUMBER': tokens.Name.Builtin,
644649
'NUMERIC': tokens.Name.Builtin,
650+
'PLS_INTEGER': tokens.Name.Builtin,
651+
'POSITIVE': tokens.Name.Builtin,
652+
'POSITIVEN': tokens.Name.Builtin,
645653
'REAL': tokens.Name.Builtin,
646654
'ROWID': tokens.Name.Builtin,
647655
'ROWLABEL': tokens.Name.Builtin,
648656
'ROWNUM': tokens.Name.Builtin,
649657
'SERIAL': tokens.Name.Builtin,
650658
'SERIAL8': tokens.Name.Builtin,
651659
'SIGNED': tokens.Name.Builtin,
660+
'SIGNTYPE': tokens.Name.Builtin,
661+
'SIMPLE_DOUBLE': tokens.Name.Builtin,
662+
'SIMPLE_FLOAT': tokens.Name.Builtin,
663+
'SIMPLE_INTEGER': tokens.Name.Builtin,
652664
'SMALLINT': tokens.Name.Builtin,
665+
'SYS_REFCURSOR': tokens.Name.Builtin,
653666
'SYSDATE': tokens.Name,
654667
'TEXT': tokens.Name.Builtin,
655668
'TINYINT': tokens.Name.Builtin,
656669
'UNSIGNED': tokens.Name.Builtin,
670+
'UROWID': tokens.Name.Builtin,
671+
'UTL_FILE': tokens.Name.Builtin,
657672
'VARCHAR': tokens.Name.Builtin,
658673
'VARCHAR2': tokens.Name.Builtin,
659674
'VARYING': tokens.Name.Builtin,
@@ -854,3 +869,83 @@ def is_keyword(value):
854869
'IN': tokens.Keyword,
855870
'LOOP': tokens.Keyword,
856871
}
872+
873+
# Hive Syntax
874+
KEYWORDS_HQL = {
875+
'EXPLODE': tokens.Keyword,
876+
'DIRECTORY': tokens.Keyword,
877+
'DISTRIBUTE': tokens.Keyword,
878+
'INCLUDE': tokens.Keyword,
879+
'LOCATE': tokens.Keyword,
880+
'OVERWRITE': tokens.Keyword,
881+
'POSEXPLODE': tokens.Keyword,
882+
883+
'ARRAY_CONTAINS': tokens.Keyword,
884+
'CMP': tokens.Keyword,
885+
'COLLECT_LIST': tokens.Keyword,
886+
'CONCAT': tokens.Keyword,
887+
'CONDITION': tokens.Keyword,
888+
'DATE_ADD': tokens.Keyword,
889+
'DATE_SUB': tokens.Keyword,
890+
'DECODE': tokens.Keyword,
891+
'DBMS_OUTPUT': tokens.Keyword,
892+
'ELEMENTS': tokens.Keyword,
893+
'EXCHANGE': tokens.Keyword,
894+
'EXTENDED': tokens.Keyword,
895+
'FLOOR': tokens.Keyword,
896+
'FOLLOWING': tokens.Keyword,
897+
'FROM_UNIXTIME': tokens.Keyword,
898+
'FTP': tokens.Keyword,
899+
'HOUR': tokens.Keyword,
900+
'INLINE': tokens.Keyword,
901+
'INSTR': tokens.Keyword,
902+
'LEN': tokens.Keyword,
903+
'MAXELEMENT': tokens.Keyword,
904+
'MAXINDEX': tokens.Keyword,
905+
'MAX_PART_DATE': tokens.Keyword,
906+
'MAX_PART_INT': tokens.Keyword,
907+
'MAX_PART_STRING': tokens.Keyword,
908+
'MINELEMENT': tokens.Keyword,
909+
'MININDEX': tokens.Keyword,
910+
'MIN_PART_DATE': tokens.Keyword,
911+
'MIN_PART_INT': tokens.Keyword,
912+
'MIN_PART_STRING': tokens.Keyword,
913+
'NOW': tokens.Keyword,
914+
'NVL': tokens.Keyword,
915+
'NVL2': tokens.Keyword,
916+
'PARSE_URL_TUPLE': tokens.Keyword,
917+
'PART_LOC': tokens.Keyword,
918+
'PART_COUNT': tokens.Keyword,
919+
'PART_COUNT_BY': tokens.Keyword,
920+
'PRINT': tokens.Keyword,
921+
'PUT_LINE': tokens.Keyword,
922+
'RANGE': tokens.Keyword,
923+
'REDUCE': tokens.Keyword,
924+
'REGEXP_REPLACE': tokens.Keyword,
925+
'RESIGNAL': tokens.Keyword,
926+
'RTRIM': tokens.Keyword,
927+
'SIGN': tokens.Keyword,
928+
'SIGNAL': tokens.Keyword,
929+
'SIN': tokens.Keyword,
930+
'SPLIT': tokens.Keyword,
931+
'SQRT': tokens.Keyword,
932+
'STACK': tokens.Keyword,
933+
'STR': tokens.Keyword,
934+
'SUBSTR': tokens.Keyword,
935+
'SUMMARY': tokens.Keyword,
936+
'TBLPROPERTIES': tokens.Keyword,
937+
'TIMESTAMP_ISO': tokens.Keyword,
938+
'TO_CHAR': tokens.Keyword,
939+
'TO_DATE': tokens.Keyword,
940+
'TO_TIMESTAMP': tokens.Keyword,
941+
'TRUNC': tokens.Keyword,
942+
'UNBOUNDED': tokens.Keyword,
943+
'UNIQUEJOIN': tokens.Keyword,
944+
'UNIX_TIMESTAMP': tokens.Keyword,
945+
'UTC_TIMESTAMP': tokens.Keyword,
946+
'VIEWS': tokens.Keyword,
947+
948+
'EXIT': tokens.Keyword,
949+
'BREAK': tokens.Keyword,
950+
'LEAVE': tokens.Keyword,
951+
}

0 commit comments

Comments
 (0)