sqlc-dev
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/db/db.go‎
Lines changed: 31 additions & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/db/db.go‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/db/models.go‎
Lines changed: 10 additions & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/db/models.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/db/query.sql.go‎
Lines changed: 37 additions & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/db/query.sql.go‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/query.sql‎
Lines changed: 7 additions & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/query.sql‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/schema.sql‎
Lines changed: 1 addition & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/schema.sql‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎internal/endtoend/testdata/sqlite_unicode_comment/sqlc.json‎
Lines changed: 16 additions & 0 deletions b/‎internal/endtoend/testdata/sqlite_unicode_comment/sqlc.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎internal/engine/sqlite/parse.go‎
Lines changed: 27 additions & 4 deletions b/‎internal/engine/sqlite/parse.go‎
Lines changed: 27 additions & 4 deletions
@@ -0,0 +1,7 @@
+-- name: GetItem :one
+SELECT id, name FROM items WHERE id = ?;
+
+-- section — divider
+
+-- name: UpdateItem :exec
+UPDATE items SET name = ? WHERE id = ?;
@@ -0,0 +1 @@
+CREATE TABLE items (id INTEGER PRIMARY KEY, name TEXT NOT NULL);
@@ -0,0 +1,16 @@
+{
+    "version": "2",
+    "sql": [
+        {
+            "engine": "sqlite",
+            "queries": "query.sql",
+            "schema": "schema.sql",
+            "gen": {
+                "go": {
+                    "package": "db",
+                    "out": "db"
+                }
+            }
+        }
+    ]
+}
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"unicode/utf8"
 
 	"github.com/antlr4-go/antlr/v4"
 	"github.com/sqlc-dev/sqlc/internal/engine/sqlite/parser"
@@ -42,7 +43,8 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) {
 	if err != nil {
 		return nil, err
 	}
-	input := antlr.NewInputStream(string(blob))
+	src := string(blob)
+	input := antlr.NewInputStream(src)
 	lexer := parser.NewSQLiteLexer(input)
 	stream := antlr.NewCommonTokenStream(lexer, 0)
 	pp := parser.NewSQLiteParser(stream)
@@ -57,6 +59,13 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) {
 	if !ok {
 		return nil, fmt.Errorf("expected ParserContext; got %T\n", tree)
 	}
+
+	// ANTLR's InputStream operates on characters (runes), so token
+	// positions are character indices.  source.Pluck slices with byte
+	// offsets.  Build a lookup table so we can translate correctly when
+	// the input contains multi-byte UTF-8 characters (e.g. em-dash).
+	runeToByteOffset := buildRuneToByteOffsets(src)
+
 	var stmts []ast.Statement
 	for _, istmt := range pctx.AllSql_stmt_list() {
 		list, ok := istmt.(*parser.Sql_stmt_listContext)
@@ -72,12 +81,13 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) {
 				loc = stmt.GetStop().GetStop() + 2
 				continue
 			}
-			len := (stmt.GetStop().GetStop() + 1) - loc
+			byteLoc := runeToByteOffset[loc]
+			byteEnd := runeToByteOffset[stmt.GetStop().GetStop()+1]
 			stmts = append(stmts, ast.Statement{
 				Raw: &ast.RawStmt{
 					Stmt:         out,
-					StmtLocation: loc,
-					StmtLen:      len,
+					StmtLocation: byteLoc,
+					StmtLen:      byteEnd - byteLoc,
 				},
 			})
 			loc = stmt.GetStop().GetStop() + 2
@@ -86,6 +96,19 @@ func (p *Parser) Parse(r io.Reader) ([]ast.Statement, error) {
 	return stmts, nil
 }
 
+// buildRuneToByteOffsets returns a slice mapping rune index to byte offset.
+// Entry i holds the byte offset where rune i begins; the final entry holds
+// len(s) so that an exclusive end position can be looked up safely.
+func buildRuneToByteOffsets(s string) []int {
+	n := utf8.RuneCountInString(s)
+	offsets := make([]int, 0, n+1)
+	for bytePos := range s {
+		offsets = append(offsets, bytePos)
+	}
+	offsets = append(offsets, len(s))
+	return offsets
+}
+
 func (p *Parser) CommentSyntax() source.CommentSyntax {
 	return source.CommentSyntax{
 		Dash:      true,
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+CREATE TABLE items (id INTEGER PRIMARY KEY, name TEXT NOT NULL);`