diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index b7392d2..7b678c4 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -15,14 +15,14 @@ func TestTokenize(t *testing.T) { }{ { YAML: `null - `, + `, Tokens: token.Tokens{ { Type: token.NullType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "null", - Origin: "null\n\t\t", + Origin: "null\n ", }, }, }, @@ -100,7 +100,7 @@ func TestTokenize(t *testing.T) { }, { YAML: `{} - `, + `, Tokens: token.Tokens{ { Type: token.MappingStartType, @@ -119,8 +119,7 @@ func TestTokenize(t *testing.T) { }, }, { - YAML: `v: hi - `, + YAML: `v: hi`, Tokens: token.Tokens{ { Type: token.StringType, @@ -141,13 +140,39 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "hi", - Origin: " hi\n", + Origin: " hi", }, }, }, { - YAML: `v: "true" - `, + YAML: `v: a`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "v", + Origin: "v", + }, + { + Type: token.MappingValueType, + CharacterType: token.CharacterTypeIndicator, + Indicator: token.BlockStructureIndicator, + Value: ":", + Origin: ":", + }, + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "a", + //nolint: gci + Origin: " a", + }, + }, + }, + { + YAML: `v: "true"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -173,8 +198,7 @@ func TestTokenize(t *testing.T) { }, }, { - YAML: `v: "false" - `, + YAML: `v: "false"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -200,8 +224,7 @@ func TestTokenize(t *testing.T) { }, }, { - YAML: `v: true - `, + YAML: `v: true`, Tokens: token.Tokens{ { Type: token.StringType, @@ -222,13 +245,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "true", - Origin: " true\n", + Origin: " true", }, }, }, { - YAML: `v: false - `, + YAML: `v: false`, Tokens: token.Tokens{ { Type: token.StringType, @@ -249,13 +271,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "false", - Origin: " false\n", + Origin: " false", }, }, }, { - YAML: `v: 10 - `, + YAML: `v: 10`, Tokens: token.Tokens{ { Type: token.StringType, @@ -276,13 +297,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "10", - Origin: " 10\n", + Origin: " 10", }, }, }, { - YAML: `v: -10 - `, + YAML: `v: -10`, Tokens: token.Tokens{ { Type: token.StringType, @@ -303,13 +323,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "-10", - Origin: " -10\n", + Origin: " -10", }, }, }, { - YAML: `v: 42 - `, + YAML: `v: 42`, Tokens: token.Tokens{ { Type: token.StringType, @@ -330,13 +349,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "42", - Origin: " 42\n", + Origin: " 42", }, }, }, { - YAML: `v: 4294967296 - `, + YAML: `v: 4294967296`, Tokens: token.Tokens{ { Type: token.StringType, @@ -357,13 +375,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "4294967296", - Origin: " 4294967296\n", + Origin: " 4294967296", }, }, }, { - YAML: `v: "10" - `, + YAML: `v: "10"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -389,8 +406,7 @@ func TestTokenize(t *testing.T) { }, }, { - YAML: `v: 0.1 - `, + YAML: `v: 0.1`, Tokens: token.Tokens{ { Type: token.StringType, @@ -411,13 +427,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "0.1", - Origin: " 0.1\n", + Origin: " 0.1", }, }, }, { - YAML: `v: 0.99 - `, + YAML: `v: 0.99`, Tokens: token.Tokens{ { Type: token.StringType, @@ -438,13 +453,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "0.99", - Origin: " 0.99\n", + Origin: " 0.99", }, }, }, { - YAML: `v: -0.1 - `, + YAML: `v: -0.1`, Tokens: token.Tokens{ { Type: token.StringType, @@ -465,13 +479,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "-0.1", - Origin: " -0.1\n", + Origin: " -0.1", }, }, }, { - YAML: `v: .inf - `, + YAML: `v: .inf`, Tokens: token.Tokens{ { Type: token.StringType, @@ -492,13 +505,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: ".inf", - Origin: " .inf\n", + Origin: " .inf", }, }, }, { - YAML: `v: -.inf - `, + YAML: `v: -.inf`, Tokens: token.Tokens{ { Type: token.StringType, @@ -519,13 +531,12 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "-.inf", - Origin: " -.inf\n", + Origin: " -.inf", }, }, }, { - YAML: `v: .nan - `, + YAML: `v: .nan`, Tokens: token.Tokens{ { Type: token.StringType, @@ -546,7 +557,7 @@ func TestTokenize(t *testing.T) { CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: ".nan", - Origin: " .nan\n", + Origin: " .nan", }, }, }, @@ -586,8 +597,7 @@ a: }, }, { - YAML: `v: null - `, + YAML: `v: null`, Tokens: token.Tokens{ { Type: token.StringType, @@ -608,13 +618,12 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "null", - Origin: " null\n", + Origin: " null", }, }, }, { - YAML: `v: "" - `, + YAML: `v: ""`, Tokens: token.Tokens{ { Type: token.StringType, @@ -897,8 +906,7 @@ a: }, }, { - YAML: `a: '-' - `, + YAML: `a: '-'`, Tokens: token.Tokens{ { Type: token.StringType, @@ -924,15 +932,14 @@ a: }, }, { - YAML: `123 - `, + YAML: `123`, Tokens: token.Tokens{ { Type: token.IntegerType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "123", - Origin: "123\n\t\t", + Origin: "123", }, }, }, @@ -964,8 +971,7 @@ a: }, }, { - YAML: `a: null - `, + YAML: `a: null`, Tokens: token.Tokens{ { Type: token.StringType, @@ -986,13 +992,12 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "null", - Origin: " null\n", + Origin: " null", }, }, }, { - YAML: `a: {x: 1} - `, + YAML: `a: {x: 1}`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1046,8 +1051,7 @@ a: }, }, { - YAML: `a: [1, 2] - `, + YAML: `a: [1, 2]`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1101,16 +1105,17 @@ a: }, }, { - YAML: `t2: 2018-01-09T10:40:47Z - t4: 2098-01-09T10:40:47Z - `, + YAML: ` +t2: 2018-01-09T10:40:47Z +t4: 2098-01-09T10:40:47Z +`, Tokens: token.Tokens{ { Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "t2", - Origin: "t2", + Origin: "\nt2", }, { Type: token.MappingValueType, @@ -1130,8 +1135,8 @@ a: Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\tt4", - Origin: "\t\tt4", + Value: "t4", + Origin: "t4", }, { Type: token.MappingValueType, @@ -1145,13 +1150,12 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "2098-01-09T10:40:47Z", - Origin: " 2098-01-09T10:40:47Z\n", + Origin: " 2098-01-09T10:40:47Z", }, }, }, { - YAML: `a: {b: c, d: e} - `, + YAML: `a: {b: c, d: e}`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1233,8 +1237,7 @@ a: }, }, { - YAML: `a: 3s - `, + YAML: `a: 3s`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1255,13 +1258,12 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "3s", - Origin: " 3s\n", + Origin: " 3s", }, }, }, { - YAML: `a: - `, + YAML: `a: `, Tokens: token.Tokens{ { Type: token.StringType, @@ -1282,13 +1284,12 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "", - Origin: " \n", + Origin: " ", }, }, }, { - YAML: `a: "1:1" - `, + YAML: `a: "1:1"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1314,8 +1315,7 @@ a: }, }, { - YAML: `a: "\0" - `, + YAML: `a: "\0"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1341,8 +1341,7 @@ a: }, }, { - YAML: `a: !!binary gIGC - `, + YAML: `a: !!binary gIGC`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1370,7 +1369,7 @@ a: CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "gIGC", - Origin: "gIGC\n", + Origin: "gIGC", }, }, }, @@ -1419,20 +1418,21 @@ a: !!binary | }, }, { - YAML: `b: 2 - a: 1 - d: 4 - c: 3 - sub: - e: 5 - `, + YAML: ` +b: 2 +a: 1 +d: 4 +c: 3 +sub: + e: 5 +`, Tokens: token.Tokens{ { Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "b", - Origin: "b", + Origin: "\nb", }, { Type: token.MappingValueType, @@ -1452,8 +1452,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\ta", - Origin: "\t\ta", + Value: "a", + Origin: "a", }, { Type: token.MappingValueType, @@ -1473,8 +1473,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\td", - Origin: "\t\td", + Value: "d", + Origin: "d", }, { Type: token.MappingValueType, @@ -1494,8 +1494,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\tc", - Origin: "\t\tc", + Value: "c", + Origin: "c", }, { Type: token.MappingValueType, @@ -1515,8 +1515,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\tsub", - Origin: "\t\tsub", + Value: "sub", + Origin: "sub", }, { Type: token.MappingValueType, @@ -1529,8 +1529,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\t e", - Origin: "\n\t\t e", + Value: "e", + Origin: "\n e", }, { Type: token.MappingValueType, @@ -1544,13 +1544,12 @@ a: !!binary | CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "5", - Origin: " 5\n", + Origin: " 5", }, }, }, { - YAML: `a: 1.2.3.4 - `, + YAML: `a: 1.2.3.4`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1571,13 +1570,12 @@ a: !!binary | CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "1.2.3.4", - Origin: " 1.2.3.4\n", + Origin: " 1.2.3.4", }, }, }, { - YAML: `a: "2015-02-24T18:19:39Z" - `, + YAML: `a: "2015-02-24T18:19:39Z"`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1603,8 +1601,7 @@ a: !!binary | }, }, { - YAML: `a: 'b: c' - `, + YAML: `a: 'b: c'`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1630,8 +1627,7 @@ a: !!binary | }, }, { - YAML: `a: 'Hello #comment' - `, + YAML: `a: 'Hello #comment'`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1657,8 +1653,7 @@ a: !!binary | }, }, { - YAML: `a: 100.5 - `, + YAML: `a: 100.5`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1679,13 +1674,12 @@ a: !!binary | CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "100.5", - Origin: " 100.5\n", + Origin: " 100.5", }, }, }, { - YAML: `a: bogus - `, + YAML: `a: bogus`, Tokens: token.Tokens{ { Type: token.StringType, @@ -1706,7 +1700,7 @@ a: !!binary | CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "bogus", - Origin: " bogus\n", + Origin: " bogus", }, }, }, @@ -1763,15 +1757,16 @@ a: !!binary | }, }, { - YAML: `a: "double quoted" - b: "value map"`, + YAML: ` +a: "double quoted" +b: "value map"`, Tokens: token.Tokens{ { Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "a", - Origin: "a", + Origin: "\na", }, { Type: token.MappingValueType, @@ -1791,8 +1786,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\tb", - Origin: "\n\t\tb", + Value: "b", + Origin: "\nb", }, { Type: token.MappingValueType, @@ -1811,15 +1806,16 @@ a: !!binary | }, }, { - YAML: `a: 'single quoted' - b: 'value map'`, + YAML: ` +a: 'single quoted' +b: 'value map'`, Tokens: token.Tokens{ { Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, Value: "a", - Origin: "a", + Origin: "\na", }, { Type: token.MappingValueType, @@ -1839,8 +1835,8 @@ a: !!binary | Type: token.StringType, CharacterType: token.CharacterTypeMiscellaneous, Indicator: token.NotIndicator, - Value: "\t\tb", - Origin: "\n\t\tb", + Value: "b", + Origin: "\nb", }, { Type: token.MappingValueType, @@ -3176,6 +3172,19 @@ a: |invalid`, name: "use reserved character `", src: "key: [`val]", }, + { + name: "use tab character as indent", + //nolint: gci + src: " a: b", + }, + { + name: "use tab character as indent in literal", + src: ` +a: | + b + c +`, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { diff --git a/parser/parser_test.go b/parser/parser_test.go index 6bb8568..189b970 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -883,7 +883,7 @@ a: - f: g h: i # comment 4 - `, +`, ` - b: c d: e diff --git a/path_test.go b/path_test.go index 8a4d6b5..ddca8bc 100644 --- a/path_test.go +++ b/path_test.go @@ -644,7 +644,7 @@ doc: - value1 - value2 other: value3 - ` +` path, err := yaml.PathString("$.doc.map[0]") if err != nil { log.Fatal(err) @@ -662,7 +662,6 @@ doc: // ^ // 8 | - value2 // 9 | other: value3 - // 10 | } func ExamplePath_PathString() { diff --git a/scanner/context.go b/scanner/context.go index 7c5517d..015a66c 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -182,7 +182,7 @@ func (c *Context) addToken(tk *token.Token) { } func (c *Context) addBuf(r rune) { - if len(c.buf) == 0 && r == ' ' { + if len(c.buf) == 0 && (r == ' ' || r == '\t') { return } c.buf = append(c.buf, r) diff --git a/scanner/scanner.go b/scanner/scanner.go index f1d2774..50ae83b 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -183,6 +183,11 @@ func (s *Scanner) updateIndent(ctx *Context, c rune) { s.indentNum++ return } + if s.isFirstCharAtLine && c == '\t' { + // found tab indent. + // In this case, scanTab returns error. + return + } if !s.isFirstCharAtLine { s.indentState = IndentStateKeep return @@ -593,6 +598,13 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error { } else if s.isFirstCharAtLine && c == ' ' { ctx.addDocumentIndent(s.column) s.progressColumn(ctx, 1) + } else if s.isFirstCharAtLine && c == '\t' { + err := ErrInvalidToken( + "found a tab character where an indentation space is expected", + token.Invalid(string(ctx.obuf), s.pos()), + ) + s.progressColumn(ctx, 1) + return err } else { ctx.updateDocumentLineIndentColumn(s.column) if ctx.docFirstLineIndentColumn > 0 { @@ -735,7 +747,7 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool { func (s *Scanner) scanMapDelim(ctx *Context) bool { nc := ctx.nextChar() - if s.startedFlowMapNum <= 0 && nc != ' ' && !s.isNewLineChar(nc) && !ctx.isNextEOS() { + if s.startedFlowMapNum <= 0 && nc != ' ' && nc != '\t' && !s.isNewLineChar(nc) && !ctx.isNextEOS() { return false } @@ -1009,10 +1021,22 @@ func (s *Scanner) scanReservedChar(ctx *Context, c rune) error { return err } +func (s *Scanner) scanTab(ctx *Context, c rune) error { + if !s.isFirstCharAtLine { + return nil + } + + ctx.addBuf(c) + ctx.addOriginBuf(c) + err := ErrInvalidToken("found character '\t' that cannot start any token", token.Invalid(string(ctx.obuf), s.pos())) + s.progressColumn(ctx, 1) + ctx.clear() + return err +} + func (s *Scanner) scan(ctx *Context) error { for ctx.next() { c := ctx.currentChar() - // First, change the IndentState. // If the target character is the first character in a line, IndentState is Up/Down/Equal state. // The second and subsequent letters are Keep. @@ -1140,6 +1164,10 @@ func (s *Scanner) scan(ctx *Context) error { if err := s.scanReservedChar(ctx, c); err != nil { return err } + case '\t': + if err := s.scanTab(ctx, c); err != nil { + return err + } } ctx.addBuf(c) ctx.addOriginBuf(c)