Skip to content

Commit da6acc9

Browse files
committed
Clean up codec API and enabled codec support by default. Remove dbToConn as it prevents Conn garbage collection.
1 parent e4bf5f6 commit da6acc9

File tree

8 files changed

+250
-220
lines changed

8 files changed

+250
-220
lines changed

go1/sqlite3/codec.go

Lines changed: 110 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
package sqlite3
66

7+
/*
8+
#include "sqlite3.h"
9+
#include "lib/codec.h"
10+
*/
711
import "C"
812

913
import (
@@ -12,34 +16,48 @@ import (
1216
"unsafe"
1317
)
1418

15-
// CodecFunc is a callback function invoked by SQLite when a key is specified
16-
// for an attached database. It returns the Codec implementation that will be
17-
// used to encode and decode all database and journal pages. Returning (nil, OK)
18-
// disables the codec. A result code other than OK indicates an error.
19-
type CodecFunc func(di DbInfo, key []byte) (ci Codec, rc int)
20-
21-
// DbInfo describes the database to which a codec is being attached.
22-
type DbInfo interface {
23-
Path() string // Full path to the database file
24-
Name() string // Database name as it is known to SQLite (e.g. "main")
25-
PageSize() int // Current page size
26-
Reserve() int // Current number of bytes reserved in each page
19+
// CodecFunc is a codec initialization function registered for a specific key
20+
// prefix via RegisterCodec. It is called when a key with a matching prefix is
21+
// specified for an attached database. It returns the Codec implementation that
22+
// should be used to encode and decode all database and journal pages. Returning
23+
// (nil, nil) disables the codec.
24+
type CodecFunc func(ctx *CodecCtx, key []byte) (Codec, *Error)
25+
26+
// CodecCtx describes the database to which a codec is being attached.
27+
type CodecCtx struct {
28+
Path string // Full path to the database file
29+
Name string // Database name as it is known to SQLite (e.g. "main")
30+
PageSize int // Current page size in bytes
31+
Reserve int // Current number of bytes reserved in each page
32+
Fixed bool // True if the PageSize and Reserve values cannot be changed
33+
}
34+
35+
// newCodecCtx converts the C CodecCtx struct into its Go representation.
36+
func newCodecCtx(ctx *C.CodecCtx) *CodecCtx {
37+
return &CodecCtx{
38+
Path: C.GoString(ctx.zPath),
39+
Name: C.GoString(ctx.zName),
40+
PageSize: int(ctx.nBuf),
41+
Reserve: int(ctx.nRes),
42+
Fixed: ctx.fixed != 0,
43+
}
2744
}
2845

2946
// Codec is the interface used to encode/decode database and journal pages as
3047
// they are written to and read from the disk.
3148
//
3249
// The op value passed to Encode and Decode methods identifies the operation
33-
// being performed. It is undocumented and changed meanings over time (the codec
34-
// API was introduced in 2004), but is believed to be a bitmask of the following
35-
// values:
50+
// being performed. It is undocumented and changed meanings over time since the
51+
// codec API was first introduced in 2004. It is believed to be a bitmask of the
52+
// following values:
3653
//
3754
// 1 = journal page, not set for WAL, always set when decoding
3855
// 2 = disk I/O, always set
3956
// 4 = encode
4057
//
4158
// In the current implementation, op is always 3 when decoding, 6 when encoding
42-
// for the database file or the WAL, and 7 when encoding for the journal.
59+
// for the database file or the WAL, and 7 when encoding for the journal. Search
60+
// lib/sqlite3.c for "CODEC1" and "CODEC2" for more information.
4361
type Codec interface {
4462
// Reserve returns the number of bytes that should be reserved for the codec
4563
// at the end of each page. The upper limit is 255 (32 if the page size is
@@ -51,151 +69,107 @@ type Codec interface {
5169
// buffer.
5270
Resize(pageSize, reserve int)
5371

54-
// Encode returns an encoded copy of a page or nil to indicate an error. The
55-
// original page may be returned without making a copy, but it must never be
56-
// modified. The codec is allowed to reuse a single buffer of identical size
57-
// for encoding. Bytes 16 through 23 of page 1 cannot be encoded.
58-
Encode(page []byte, pageNum uint32, op int) []byte
72+
// Encode returns an encoded copy of a page. The data outside of the reserve
73+
// area in the original page must not be modified. The codec can either copy
74+
// this data into a buffer for encoding or return the original page without
75+
// making any changes. Bytes 16 through 23 of page 1 cannot be encoded. Any
76+
// non-nil error will be interpreted by SQLite as a NOMEM condition. This is
77+
// a limitation of underlying C API.
78+
Encode(page []byte, pageNum uint32, op int) ([]byte, *Error)
5979

60-
// Decode decodes the page in-place. Bytes 16 through 23 of page 1 must be
61-
// left at their original values. It returns true on success and false on
62-
// error, which will be interpreted by SQLite as a NOMEM condition.
63-
Decode(page []byte, pageNum uint32, op int) bool
80+
// Decode decodes the page in-place, but it may use the encode buffer as
81+
// scratch space. Bytes 16 through 23 of page 1 must be left at their
82+
// original values. Any non-nil error will be interpreted by SQLite as a
83+
// NOMEM condition. This is a limitation of underlying C API.
84+
Decode(page []byte, pageNum uint32, op int) *Error
6485

6586
// Key returns the original key that was used to initialize the codec. Some
6687
// implementations may be better off returning nil or a fake value. Search
6788
// lib/sqlite3.c for "sqlite3CodecGetKey" to see how the key is used.
6889
Key() []byte
6990

70-
// FastRekey returns true if the codec can change the database key by
71-
// updating just the first page.
72-
FastRekey() bool
73-
7491
// Free releases codec resources when the pager is destroyed or when the
7592
// codec attachment fails.
7693
Free()
7794
}
7895

79-
// Codec registry.
96+
// Codec registry and state reference maps.
8097
var (
81-
codecReg map[string]CodecFunc
82-
codecMu sync.Mutex
98+
codecReg map[string]CodecFunc
99+
codecState map[*codec]struct{}
100+
codecMu sync.Mutex
83101
)
84102

85-
// RegisterCodec associates CodecFunc f with the given key prefix. Connections
86-
// using the default codec handler will call f when a key is provided in the
87-
// format "<keyPrefix>:<...>".
88-
func RegisterCodec(keyPrefix string, f CodecFunc) {
103+
// RegisterCodec adds a new codec to the internal registry. Function f will be
104+
// called when a key in the format "<name>:<...>" is provided to an attached
105+
// database.
106+
func RegisterCodec(name string, f CodecFunc) {
89107
codecMu.Lock()
90108
defer codecMu.Unlock()
91-
if codecReg != nil {
92-
codecReg[keyPrefix] = f
93-
} else {
94-
codecReg = map[string]CodecFunc{keyPrefix: f}
109+
if f == nil {
110+
delete(codecReg, name)
111+
return
95112
}
113+
if codecReg == nil {
114+
codecReg = make(map[string]CodecFunc, 8)
115+
}
116+
codecReg[name] = f
96117
}
97118

98-
// defaultCodecFunc is used by all new connections to select a codec constructor
99-
// from the registry based on the key prefix.
100-
func defaultCodecFunc(di DbInfo, key []byte) (ci Codec, rc int) {
119+
// getCodec returns the CodecFunc for the given key.
120+
func getCodec(key []byte) CodecFunc {
101121
i := bytes.IndexByte(key, ':')
102122
if i == -1 {
103-
i = 0
104-
}
105-
if f := getCodecFunc(bstr(key[:i])); f != nil {
106-
return f(di, key)
123+
i = len(key)
107124
}
108-
return nil, ERROR
109-
}
110-
111-
// getCodecFunc returns the CodecFunc for the given key prefix.
112-
func getCodecFunc(keyPrefix string) CodecFunc {
113125
codecMu.Lock()
114126
defer codecMu.Unlock()
115-
if codecReg != nil {
116-
return codecReg[keyPrefix]
127+
if codecReg == nil {
128+
return nil
117129
}
118-
return nil
130+
return codecReg[bstr(key[:i])]
119131
}
120132

121-
// codecState keeps a reference to all active codec wrappers to prevent them
122-
// from being garbage collected.
123-
var codecState = make(map[*codec]struct{})
124-
125133
// codec is a wrapper around the actual Codec interface. It keeps track of the
126134
// current page size in order to convert page pointers into byte slices.
127135
type codec struct {
128136
Codec
129137
pageSize C.int
130138
}
131139

132-
// dbInfo is the default DbInfo implementation.
133-
type dbInfo struct {
134-
zPath *C.char
135-
zName *C.char
136-
path string
137-
name string
138-
pageSize int
139-
reserve int
140-
}
141-
142-
func (di *dbInfo) Path() string {
143-
if di.zPath != nil {
144-
di.path = C.GoString(di.zPath)
145-
di.zPath = nil
146-
}
147-
return di.path
148-
}
149-
150-
func (di *dbInfo) Name() string {
151-
if di.zName != nil {
152-
di.name = C.GoString(di.zName)
153-
di.zName = nil
154-
}
155-
return di.name
156-
}
157-
158-
func (di *dbInfo) PageSize() int { return di.pageSize }
159-
func (di *dbInfo) Reserve() int { return di.reserve }
160-
161140
//export go_codec_init
162-
func go_codec_init(db unsafe.Pointer, zPath, zName *C.char, nBuf, nRes C.int,
163-
pKey unsafe.Pointer, nKey C.int, pCodec *unsafe.Pointer, nNewRes *C.int,
164-
) C.int {
165-
c := dbToConn[db]
166-
if c == nil || c.db == nil || c.codec == nil {
167-
return OK
168-
}
169-
di := &dbInfo{
170-
zPath: zPath,
171-
zName: zName,
172-
pageSize: int(nBuf),
173-
reserve: int(nRes),
141+
func go_codec_init(ctx *C.CodecCtx, pCodec *unsafe.Pointer, pzErrMsg **C.char) C.int {
142+
cf := getCodec(goBytes(ctx.pKey, ctx.nKey))
143+
if cf == nil {
144+
*pzErrMsg = C.CString("codec not found")
145+
return ERROR
174146
}
175-
var key []byte
176-
if nKey > 0 {
177-
key = C.GoBytes(pKey, nKey)
147+
ci, err := cf(newCodecCtx(ctx), C.GoBytes(ctx.pKey, ctx.nKey))
148+
if err != nil && err.rc != OK {
149+
if ci != nil {
150+
ci.Free()
151+
}
152+
if err.msg != "" {
153+
*pzErrMsg = C.CString(err.msg)
154+
}
155+
return C.int(err.rc)
178156
}
179-
ci, rc := c.codec(di, key)
180157
if ci != nil {
181-
cs := &codec{ci, nBuf}
182-
codecState[cs] = struct{}{}
158+
cs := &codec{ci, ctx.nBuf}
183159
*pCodec = unsafe.Pointer(cs)
184-
*nNewRes = C.int(ci.Reserve())
160+
codecMu.Lock()
161+
defer codecMu.Unlock()
162+
if codecState == nil {
163+
codecState = make(map[*codec]struct{}, 8)
164+
}
165+
codecState[cs] = struct{}{}
185166
}
186-
*di = dbInfo{}
187-
return C.int(rc)
167+
return OK
188168
}
189169

190-
//export go_codec_exec
191-
func go_codec_exec(pCodec, pData unsafe.Pointer, pgno uint32, op C.int) unsafe.Pointer {
192-
cs := (*codec)(pCodec)
193-
if page := goBytes(pData, cs.pageSize); op&4 != 0 {
194-
return cBytes(cs.Encode(page, pgno, int(op)))
195-
} else if cs.Decode(page, pgno, int(op)) {
196-
return pData
197-
}
198-
return nil
170+
//export go_codec_reserve
171+
func go_codec_reserve(pCodec unsafe.Pointer) C.int {
172+
return C.int((*codec)(pCodec).Reserve())
199173
}
200174

201175
//export go_codec_resize
@@ -205,6 +179,22 @@ func go_codec_resize(pCodec unsafe.Pointer, nBuf, nRes C.int) {
205179
cs.Resize(int(nBuf), int(nRes))
206180
}
207181

182+
//export go_codec_exec
183+
func go_codec_exec(pCodec, pData unsafe.Pointer, pgno uint32, op C.int) unsafe.Pointer {
184+
cs := (*codec)(pCodec)
185+
page := goBytes(pData, cs.pageSize)
186+
var err *Error
187+
if op&4 == 0 {
188+
err = cs.Decode(page, pgno, int(op))
189+
} else {
190+
page, err = cs.Encode(page, pgno, int(op))
191+
}
192+
if err == nil {
193+
return cBytes(page)
194+
}
195+
return nil // Can't do anything with the error at the moment
196+
}
197+
208198
//export go_codec_get_key
209199
func go_codec_get_key(pCodec unsafe.Pointer, pKey *unsafe.Pointer, nKey *C.int) {
210200
if key := (*codec)(pCodec).Key(); len(key) > 0 {
@@ -216,7 +206,9 @@ func go_codec_get_key(pCodec unsafe.Pointer, pKey *unsafe.Pointer, nKey *C.int)
216206
//export go_codec_free
217207
func go_codec_free(pCodec unsafe.Pointer) {
218208
cs := (*codec)(pCodec)
209+
codecMu.Lock()
219210
delete(codecState, cs)
211+
codecMu.Unlock()
220212
cs.Free()
221213
cs.Codec = nil
222214
}

go1/sqlite3/doc.go

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ Database Names
134134
Methods that require a database name as one of the arguments (e.g. Conn.Path())
135135
expect the symbolic name by which the database is known to the connection, not a
136136
path to a file. Valid database names are "main", "temp", or a name specified
137-
after the AS keyword in an ATTACH statement.
137+
after the AS clause in an ATTACH statement.
138138
139139
Callbacks
140140
@@ -147,27 +147,30 @@ interactions with Conn, Stmt, and other related objects within the handler.
147147
148148
Codecs and Encryption
149149
150-
WARNING: Codec support is unstable and is disabled by default. Uncomment "#cgo
151-
CFLAGS: -DSQLITE_HAS_CODEC=1" in sqlite3.go and rebuild the package to enable
152-
it.
153-
154-
SQLite has an undocumented codec API to modify database and journal pages as
155-
they are written to and read from the disk. It operates between the pager and
156-
the VFS layers. The SQLite Encryption Extension (SEE) uses this API to encrypt
157-
database contents. Consider purchasing a SEE license if you require
150+
SQLite has an undocumented codec API, which operates between the pager and VFS
151+
layers, and is used by the SQLite Encryption Extension (SEE) to encrypt database
152+
and journal contents. Consider purchasing a SEE license if you require
158153
production-quality encryption support (http://www.hwaci.com/sw/sqlite/see.html).
159154
160-
This package exposes the codec API so that it can be implemented in Go using the
161-
native crypto/* packages. See the Codec interface and CodecFunc for more
162-
information. This package does not and will not perform any actual encryption
163-
itself to avoid introducing crypto dependencies.
155+
This package has an experimental API (read: unstable, may eat your data) for
156+
writing codecs in Go. The "codec" subpackage provides additional documentation
157+
and several existing codec implementations.
158+
159+
Codecs are registered via the RegisterCodec function for a specific key prefix.
160+
For example, the "aes-hmac" codec is initialized when a key in the format
161+
"aes-hmac:<...>" is provided to an attached database. The key format after the
162+
first colon is codec-specific. See CodecFunc for more information.
164163
165-
A codec cannot be used for memory or temporary databases. Once the database is
166-
created (after the first CREATE TABLE statement), the page size and the reserved
167-
space at the end of each page can no longer be changed. Using "PRAGMA
168-
page_size=N; VACUUM;" will not work. Online backups will fail unless the
164+
The codec API has several limitations. Codecs cannot be used for in-memory or
165+
temporary databases. Once a database is created, the page size and the amount of
166+
reserved space at the end of each page cannot be changed (i.e. "PRAGMA
167+
page_size=N; VACUUM;" will not work). Online backups will fail unless the
169168
destination database has the same page size and reserve values. Bytes 16 through
170-
23 of page 1 (the database header) cannot be altered, so it is technically
171-
possible to identify encrypted SQLite databases.
169+
23 of page 1 (the database header, see http://www.sqlite.org/fileformat2.html)
170+
cannot be altered, so it is always possible to identify encrypted SQLite
171+
databases.
172+
173+
The rekey function is currently not implemented. The key can only be changed via
174+
the backup API or by dumping and restoring the database contents.
172175
*/
173176
package sqlite3

0 commit comments

Comments
 (0)