diff --git a/hack/vendor.sh b/hack/vendor.sh index 81fe01e84ee..ba249342c69 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -44,7 +44,7 @@ clone git github.com/coreos/etcd v2.2.0 fix_rewritten_imports github.com/coreos/etcd clone git github.com/ugorji/go 5abd4e96a45c386928ed2ca2a7ef63e2533e18ec clone git github.com/hashicorp/consul v0.5.2 -clone git github.com/boltdb/bolt v1.1.0 +clone git github.com/boltdb/bolt v1.2.0 clone git github.com/miekg/dns 75e6e86cc601825c5dbcd4e0c209eab180997cd7 # get graph and distribution packages diff --git a/vendor/src/github.com/boltdb/bolt/Makefile b/vendor/src/github.com/boltdb/bolt/Makefile index cfbed514bbb..e035e63adcd 100644 --- a/vendor/src/github.com/boltdb/bolt/Makefile +++ b/vendor/src/github.com/boltdb/bolt/Makefile @@ -1,54 +1,18 @@ -TEST=. -BENCH=. -COVERPROFILE=/tmp/c.out BRANCH=`git rev-parse --abbrev-ref HEAD` COMMIT=`git rev-parse --short HEAD` GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" default: build -bench: - go test -v -test.run=NOTHINCONTAINSTHIS -test.bench=$(BENCH) - -# http://cloc.sourceforge.net/ -cloc: - @cloc --not-match-f='Makefile|_test.go' . - -cover: fmt - go test -coverprofile=$(COVERPROFILE) -test.run=$(TEST) $(COVERFLAG) . - go tool cover -html=$(COVERPROFILE) - rm $(COVERPROFILE) - -cpuprofile: fmt - @go test -c - @./bolt.test -test.v -test.run=$(TEST) -test.cpuprofile cpu.prof +race: + @go test -v -race -test.run="TestSimulate_(100op|1000op)" # go get github.com/kisielk/errcheck errcheck: - @echo "=== errcheck ===" - @errcheck github.com/boltdb/bolt + @errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt -fmt: - @go fmt ./... - -get: - @go get -d ./... - -build: get - @mkdir -p bin - @go build -ldflags=$(GOLDFLAGS) -a -o bin/bolt ./cmd/bolt - -test: fmt - @go get github.com/stretchr/testify/assert - @echo "=== TESTS ===" - @go test -v -cover -test.run=$(TEST) - @echo "" - @echo "" - @echo "=== CLI ===" - @go test -v -test.run=$(TEST) ./cmd/bolt - @echo "" - @echo "" - @echo "=== RACE DETECTOR ===" - @go test -v -race -test.run="TestSimulate_(100op|1000op)" +test: + @go test -v -cover . + @go test -v ./cmd/bolt -.PHONY: bench cloc cover cpuprofile fmt memprofile test +.PHONY: fmt test diff --git a/vendor/src/github.com/boltdb/bolt/README.md b/vendor/src/github.com/boltdb/bolt/README.md index 0a33ebc8a2d..66b19ace8aa 100644 --- a/vendor/src/github.com/boltdb/bolt/README.md +++ b/vendor/src/github.com/boltdb/bolt/README.md @@ -1,8 +1,8 @@ -Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.png?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.png)](https://godoc.org/github.com/boltdb/bolt) ![Version](http://img.shields.io/badge/version-1.0-green.png) +Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.svg?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.svg)](https://godoc.org/github.com/boltdb/bolt) ![Version](https://img.shields.io/badge/version-1.0-green.svg) ==== -Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] and -the [LMDB project][lmdb]. The goal of the project is to provide a simple, +Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] +[LMDB project][lmdb]. The goal of the project is to provide a simple, fast, and reliable database for projects that don't require a full database server such as Postgres or MySQL. @@ -13,7 +13,6 @@ and setting values. That's it. [hyc_symas]: https://twitter.com/hyc_symas [lmdb]: http://symas.com/mdb/ - ## Project Status Bolt is stable and the API is fixed. Full unit test coverage and randomized @@ -22,6 +21,36 @@ Bolt is currently in high-load production environments serving databases as large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed services every day. +## Table of Contents + +- [Getting Started](#getting-started) + - [Installing](#installing) + - [Opening a database](#opening-a-database) + - [Transactions](#transactions) + - [Read-write transactions](#read-write-transactions) + - [Read-only transactions](#read-only-transactions) + - [Batch read-write transactions](#batch-read-write-transactions) + - [Managing transactions manually](#managing-transactions-manually) + - [Using buckets](#using-buckets) + - [Using key/value pairs](#using-keyvalue-pairs) + - [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket) + - [Iterating over keys](#iterating-over-keys) + - [Prefix scans](#prefix-scans) + - [Range scans](#range-scans) + - [ForEach()](#foreach) + - [Nested buckets](#nested-buckets) + - [Database backups](#database-backups) + - [Statistics](#statistics) + - [Read-Only Mode](#read-only-mode) + - [Mobile Use (iOS/Android)](#mobile-use-iosandroid) +- [Resources](#resources) +- [Comparison with other databases](#comparison-with-other-databases) + - [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases) + - [LevelDB, RocksDB](#leveldb-rocksdb) + - [LMDB](#lmdb) +- [Caveats & Limitations](#caveats--limitations) +- [Reading the Source](#reading-the-source) +- [Other Projects Using Bolt](#other-projects-using-bolt) ## Getting Started @@ -180,8 +209,8 @@ and then safely close your transaction if an error is returned. This is the recommended way to use Bolt transactions. However, sometimes you may want to manually start and end your transactions. -You can use the `Tx.Begin()` function directly but _please_ be sure to close the -transaction. +You can use the `Tx.Begin()` function directly but **please** be sure to close +the transaction. ```go // Start a writable transaction. @@ -269,7 +298,7 @@ then you must use `copy()` to copy it to another byte slice. ### Autoincrementing integer for the bucket -By using the NextSequence() function, you can let Bolt determine a sequence +By using the `NextSequence()` function, you can let Bolt determine a sequence which can be used as the unique identifier for your key/value pairs. See the example below. @@ -309,7 +338,6 @@ type User struct { ID int ... } - ``` ### Iterating over keys @@ -320,7 +348,9 @@ iteration over these keys extremely fast. To iterate over keys we'll use a ```go db.View(func(tx *bolt.Tx) error { + // Assume bucket exists and has keys b := tx.Bucket([]byte("MyBucket")) + c := b.Cursor() for k, v := c.First(); k != nil; k, v = c.Next() { @@ -344,10 +374,15 @@ Next() Move to the next key. Prev() Move to the previous key. ``` -When you have iterated to the end of the cursor then `Next()` will return `nil`. -You must seek to a position using `First()`, `Last()`, or `Seek()` before -calling `Next()` or `Prev()`. If you do not seek to a position then these -functions will return `nil`. +Each of those functions has a return signature of `(key []byte, value []byte)`. +When you have iterated to the end of the cursor then `Next()` will return a +`nil` key. You must seek to a position using `First()`, `Last()`, or `Seek()` +before calling `Next()` or `Prev()`. If you do not seek to a position then +these functions will return a `nil` key. + +During iteration, if the key is non-`nil` but the value is `nil`, that means +the key refers to a bucket rather than a value. Use `Bucket.Bucket()` to +access the sub-bucket. #### Prefix scans @@ -356,6 +391,7 @@ To iterate over a key prefix, you can combine `Seek()` and `bytes.HasPrefix()`: ```go db.View(func(tx *bolt.Tx) error { + // Assume bucket exists and has keys c := tx.Bucket([]byte("MyBucket")).Cursor() prefix := []byte("1234") @@ -375,7 +411,7 @@ date range like this: ```go db.View(func(tx *bolt.Tx) error { - // Assume our events bucket has RFC3339 encoded time keys. + // Assume our events bucket exists and has RFC3339 encoded time keys. c := tx.Bucket([]byte("Events")).Cursor() // Our time range spans the 90's decade. @@ -399,7 +435,9 @@ all the keys in a bucket: ```go db.View(func(tx *bolt.Tx) error { + // Assume bucket exists and has keys b := tx.Bucket([]byte("MyBucket")) + b.ForEach(func(k, v []byte) error { fmt.Printf("key=%s, value=%s\n", k, v) return nil @@ -426,8 +464,11 @@ func (*Bucket) DeleteBucket(key []byte) error Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()` function to write a consistent view of the database to a writer. If you call this from a read-only transaction, it will perform a hot backup and not block -your other database reads and writes. It will also use `O_DIRECT` when available -to prevent page cache trashing. +your other database reads and writes. + +By default, it will use a regular file handle which will utilize the operating +system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx) +documentation for information about optimizing for larger-than-RAM datasets. One common use case is to backup over HTTP so you can use tools like `cURL` to do database backups: @@ -509,6 +550,84 @@ if err != nil { } ``` +### Mobile Use (iOS/Android) + +Bolt is able to run on mobile devices by leveraging the binding feature of the +[gomobile](https://github.com/golang/mobile) tool. Create a struct that will +contain your database logic and a reference to a `*bolt.DB` with a initializing +contstructor that takes in a filepath where the database file will be stored. +Neither Android nor iOS require extra permissions or cleanup from using this method. + +```go +func NewBoltDB(filepath string) *BoltDB { + db, err := bolt.Open(filepath+"/demo.db", 0600, nil) + if err != nil { + log.Fatal(err) + } + + return &BoltDB{db} +} + +type BoltDB struct { + db *bolt.DB + ... +} + +func (b *BoltDB) Path() string { + return b.db.Path() +} + +func (b *BoltDB) Close() { + b.db.Close() +} +``` + +Database logic should be defined as methods on this wrapper struct. + +To initialize this struct from the native language (both platforms now sync +their local storage to the cloud. These snippets disable that functionality for the +database file): + +#### Android + +```java +String path; +if (android.os.Build.VERSION.SDK_INT >=android.os.Build.VERSION_CODES.LOLLIPOP){ + path = getNoBackupFilesDir().getAbsolutePath(); +} else{ + path = getFilesDir().getAbsolutePath(); +} +Boltmobiledemo.BoltDB boltDB = Boltmobiledemo.NewBoltDB(path) +``` + +#### iOS + +```objc +- (void)demo { + NSString* path = [NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, + NSUserDomainMask, + YES) objectAtIndex:0]; + GoBoltmobiledemoBoltDB * demo = GoBoltmobiledemoNewBoltDB(path); + [self addSkipBackupAttributeToItemAtPath:demo.path]; + //Some DB Logic would go here + [demo close]; +} + +- (BOOL)addSkipBackupAttributeToItemAtPath:(NSString *) filePathString +{ + NSURL* URL= [NSURL fileURLWithPath: filePathString]; + assert([[NSFileManager defaultManager] fileExistsAtPath: [URL path]]); + + NSError *error = nil; + BOOL success = [URL setResourceValue: [NSNumber numberWithBool: YES] + forKey: NSURLIsExcludedFromBackupKey error: &error]; + if(!success){ + NSLog(@"Error excluding %@ from backup %@", [URL lastPathComponent], error); + } + return success; +} + +``` ## Resources @@ -544,7 +663,7 @@ they are libraries bundled into the application, however, their underlying structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes random writes by using a write ahead log and multi-tiered, sorted files called SSTables. Bolt uses a B+tree internally and only a single file. Both approaches -have trade offs. +have trade-offs. If you require a high random write throughput (>10,000 w/sec) or you need to use spinning disks then LevelDB could be a good choice. If your application is @@ -580,9 +699,8 @@ It's important to pick the right tool for the job and Bolt is no exception. Here are a few things to note when evaluating and using Bolt: * Bolt is good for read intensive workloads. Sequential write performance is - also fast but random writes can be slow. You can add a write-ahead log or - [transaction coalescer](https://github.com/boltdb/coalescer) in front of Bolt - to mitigate this issue. + also fast but random writes can be slow. You can use `DB.Batch()` or add a + write-ahead log to help mitigate this issue. * Bolt uses a B+tree internally so there can be a lot of random page access. SSDs provide a significant performance boost over spinning disks. @@ -618,7 +736,7 @@ Here are a few things to note when evaluating and using Bolt: * The data structures in the Bolt database are memory mapped so the data file will be endian specific. This means that you cannot copy a Bolt file from a - little endian machine to a big endian machine and have it work. For most + little endian machine to a big endian machine and have it work. For most users this is not a concern since most modern CPUs are little endian. * Because of the way pages are laid out on disk, Bolt cannot truncate data files @@ -633,6 +751,56 @@ Here are a few things to note when evaluating and using Bolt: [page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638 +## Reading the Source + +Bolt is a relatively small code base (<3KLOC) for an embedded, serializable, +transactional key/value database so it can be a good starting point for people +interested in how databases work. + +The best places to start are the main entry points into Bolt: + +- `Open()` - Initializes the reference to the database. It's responsible for + creating the database if it doesn't exist, obtaining an exclusive lock on the + file, reading the meta pages, & memory-mapping the file. + +- `DB.Begin()` - Starts a read-only or read-write transaction depending on the + value of the `writable` argument. This requires briefly obtaining the "meta" + lock to keep track of open transactions. Only one read-write transaction can + exist at a time so the "rwlock" is acquired during the life of a read-write + transaction. + +- `Bucket.Put()` - Writes a key/value pair into a bucket. After validating the + arguments, a cursor is used to traverse the B+tree to the page and position + where they key & value will be written. Once the position is found, the bucket + materializes the underlying page and the page's parent pages into memory as + "nodes". These nodes are where mutations occur during read-write transactions. + These changes get flushed to disk during commit. + +- `Bucket.Get()` - Retrieves a key/value pair from a bucket. This uses a cursor + to move to the page & position of a key/value pair. During a read-only + transaction, the key and value data is returned as a direct reference to the + underlying mmap file so there's no allocation overhead. For read-write + transactions, this data may reference the mmap file or one of the in-memory + node values. + +- `Cursor` - This object is simply for traversing the B+tree of on-disk pages + or in-memory nodes. It can seek to a specific key, move to the first or last + value, or it can move forward or backward. The cursor handles the movement up + and down the B+tree transparently to the end user. + +- `Tx.Commit()` - Converts the in-memory dirty nodes and the list of free pages + into pages to be written to disk. Writing to disk then occurs in two phases. + First, the dirty pages are written to disk and an `fsync()` occurs. Second, a + new meta page with an incremented transaction ID is written and another + `fsync()` occurs. This two phase write ensures that partially written data + pages are ignored in the event of a crash since the meta page pointing to them + is never written. Partially written meta pages are invalidated because they + are written with a checksum. + +If you have additional notes that could be helpful for others, please submit +them via pull request. + + ## Other Projects Using Bolt Below is a list of public, open source projects that use Bolt: @@ -643,21 +811,21 @@ Below is a list of public, open source projects that use Bolt: * [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics. * [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects. * [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday. -* [ChainStore](https://github.com/nulayer/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations. +* [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations. * [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite. * [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin". * [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka. * [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed. * [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt. -* [photosite/session](http://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site. +* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site. * [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage. * [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters. * [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend. * [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend. * [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server. * [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database. -* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read. -* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics. +* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read. +* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics. * [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data. * [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system. * [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware. @@ -667,5 +835,10 @@ Below is a list of public, open source projects that use Bolt: backed by boltdb. * [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining simple tx and key scans. +* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. +* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service +* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. +* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. +* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. If you are using Bolt in a project please send a pull request to add it to the list. diff --git a/vendor/src/github.com/boltdb/bolt/appveyor.yml b/vendor/src/github.com/boltdb/bolt/appveyor.yml new file mode 100644 index 00000000000..6e26e941d68 --- /dev/null +++ b/vendor/src/github.com/boltdb/bolt/appveyor.yml @@ -0,0 +1,18 @@ +version: "{build}" + +os: Windows Server 2012 R2 + +clone_folder: c:\gopath\src\github.com\boltdb\bolt + +environment: + GOPATH: c:\gopath + +install: + - echo %PATH% + - echo %GOPATH% + - go version + - go env + - go get -v -t ./... + +build_script: + - go test -v ./... diff --git a/vendor/src/github.com/boltdb/bolt/batch.go b/vendor/src/github.com/boltdb/bolt/batch.go deleted file mode 100644 index 84acae6bbf0..00000000000 --- a/vendor/src/github.com/boltdb/bolt/batch.go +++ /dev/null @@ -1,138 +0,0 @@ -package bolt - -import ( - "errors" - "fmt" - "sync" - "time" -) - -// Batch calls fn as part of a batch. It behaves similar to Update, -// except: -// -// 1. concurrent Batch calls can be combined into a single Bolt -// transaction. -// -// 2. the function passed to Batch may be called multiple times, -// regardless of whether it returns error or not. -// -// This means that Batch function side effects must be idempotent and -// take permanent effect only after a successful return is seen in -// caller. -// -// The maximum batch size and delay can be adjusted with DB.MaxBatchSize -// and DB.MaxBatchDelay, respectively. -// -// Batch is only useful when there are multiple goroutines calling it. -func (db *DB) Batch(fn func(*Tx) error) error { - errCh := make(chan error, 1) - - db.batchMu.Lock() - if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { - // There is no existing batch, or the existing batch is full; start a new one. - db.batch = &batch{ - db: db, - } - db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) - } - db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) - if len(db.batch.calls) >= db.MaxBatchSize { - // wake up batch, it's ready to run - go db.batch.trigger() - } - db.batchMu.Unlock() - - err := <-errCh - if err == trySolo { - err = db.Update(fn) - } - return err -} - -type call struct { - fn func(*Tx) error - err chan<- error -} - -type batch struct { - db *DB - timer *time.Timer - start sync.Once - calls []call -} - -// trigger runs the batch if it hasn't already been run. -func (b *batch) trigger() { - b.start.Do(b.run) -} - -// run performs the transactions in the batch and communicates results -// back to DB.Batch. -func (b *batch) run() { - b.db.batchMu.Lock() - b.timer.Stop() - // Make sure no new work is added to this batch, but don't break - // other batches. - if b.db.batch == b { - b.db.batch = nil - } - b.db.batchMu.Unlock() - -retry: - for len(b.calls) > 0 { - var failIdx = -1 - err := b.db.Update(func(tx *Tx) error { - for i, c := range b.calls { - if err := safelyCall(c.fn, tx); err != nil { - failIdx = i - return err - } - } - return nil - }) - - if failIdx >= 0 { - // take the failing transaction out of the batch. it's - // safe to shorten b.calls here because db.batch no longer - // points to us, and we hold the mutex anyway. - c := b.calls[failIdx] - b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] - // tell the submitter re-run it solo, continue with the rest of the batch - c.err <- trySolo - continue retry - } - - // pass success, or bolt internal errors, to all callers - for _, c := range b.calls { - if c.err != nil { - c.err <- err - } - } - break retry - } -} - -// trySolo is a special sentinel error value used for signaling that a -// transaction function should be re-run. It should never be seen by -// callers. -var trySolo = errors.New("batch function returned an error and should be re-run solo") - -type panicked struct { - reason interface{} -} - -func (p panicked) Error() string { - if err, ok := p.reason.(error); ok { - return err.Error() - } - return fmt.Sprintf("panic: %v", p.reason) -} - -func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { - defer func() { - if p := recover(); p != nil { - err = panicked{p} - } - }() - return fn(tx) -} diff --git a/vendor/src/github.com/boltdb/bolt/bolt_linux.go b/vendor/src/github.com/boltdb/bolt/bolt_linux.go index e9d1c907b63..2b676661409 100644 --- a/vendor/src/github.com/boltdb/bolt/bolt_linux.go +++ b/vendor/src/github.com/boltdb/bolt/bolt_linux.go @@ -4,8 +4,6 @@ import ( "syscall" ) -var odirect = syscall.O_DIRECT - // fdatasync flushes written data to a file descriptor. func fdatasync(db *DB) error { return syscall.Fdatasync(int(db.file.Fd())) diff --git a/vendor/src/github.com/boltdb/bolt/bolt_openbsd.go b/vendor/src/github.com/boltdb/bolt/bolt_openbsd.go index 7c1bef1a4f4..7058c3d734e 100644 --- a/vendor/src/github.com/boltdb/bolt/bolt_openbsd.go +++ b/vendor/src/github.com/boltdb/bolt/bolt_openbsd.go @@ -11,8 +11,6 @@ const ( msInvalidate // invalidate cached data ) -var odirect int - func msync(db *DB) error { _, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate) if errno != 0 { diff --git a/vendor/src/github.com/boltdb/bolt/bolt_ppc.go b/vendor/src/github.com/boltdb/bolt/bolt_ppc.go new file mode 100644 index 00000000000..645ddc3edc2 --- /dev/null +++ b/vendor/src/github.com/boltdb/bolt/bolt_ppc.go @@ -0,0 +1,9 @@ +// +build ppc + +package bolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0x7FFFFFFF // 2GB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0xFFFFFFF diff --git a/vendor/src/github.com/boltdb/bolt/bolt_ppc64.go b/vendor/src/github.com/boltdb/bolt/bolt_ppc64.go new file mode 100644 index 00000000000..2dc6be02e3e --- /dev/null +++ b/vendor/src/github.com/boltdb/bolt/bolt_ppc64.go @@ -0,0 +1,9 @@ +// +build ppc64 + +package bolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF diff --git a/vendor/src/github.com/boltdb/bolt/bolt_unix.go b/vendor/src/github.com/boltdb/bolt/bolt_unix.go index 6eef6b22035..cad62dda1e3 100644 --- a/vendor/src/github.com/boltdb/bolt/bolt_unix.go +++ b/vendor/src/github.com/boltdb/bolt/bolt_unix.go @@ -11,7 +11,7 @@ import ( ) // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, exclusive bool, timeout time.Duration) error { +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { var t time.Time for { // If we're beyond our timeout then return an error. @@ -27,7 +27,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // Otherwise attempt to obtain an exclusive lock. - err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB) + err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) if err == nil { return nil } else if err != syscall.EWOULDBLOCK { @@ -40,25 +40,14 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { - return syscall.Flock(int(f.Fd()), syscall.LOCK_UN) +func funlock(db *DB) error { + return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) } // mmap memory maps a DB's data file. func mmap(db *DB, sz int) error { - // Truncate and fsync to ensure file size metadata is flushed. - // https://github.com/boltdb/bolt/issues/284 - if !db.NoGrowSync && !db.readOnly { - if err := db.file.Truncate(int64(sz)); err != nil { - return fmt.Errorf("file resize error: %s", err) - } - if err := db.file.Sync(); err != nil { - return fmt.Errorf("file sync error: %s", err) - } - } - // Map the data file to memory. - b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED) + b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) if err != nil { return err } diff --git a/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go b/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go index f480ee76d1f..307bf2b3ee9 100644 --- a/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go +++ b/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go @@ -1,4 +1,3 @@ - package bolt import ( @@ -7,11 +6,12 @@ import ( "syscall" "time" "unsafe" + "golang.org/x/sys/unix" ) // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, exclusive bool, timeout time.Duration) error { +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { var t time.Time for { // If we're beyond our timeout then return an error. @@ -32,7 +32,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } else { lock.Type = syscall.F_RDLCK } - err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock) + err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) if err == nil { return nil } else if err != syscall.EAGAIN { @@ -45,30 +45,19 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { +func funlock(db *DB) error { var lock syscall.Flock_t lock.Start = 0 lock.Len = 0 lock.Type = syscall.F_UNLCK lock.Whence = 0 - return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock) + return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock) } // mmap memory maps a DB's data file. func mmap(db *DB, sz int) error { - // Truncate and fsync to ensure file size metadata is flushed. - // https://github.com/boltdb/bolt/issues/284 - if !db.NoGrowSync && !db.readOnly { - if err := db.file.Truncate(int64(sz)); err != nil { - return fmt.Errorf("file resize error: %s", err) - } - if err := db.file.Sync(); err != nil { - return fmt.Errorf("file sync error: %s", err) - } - } - // Map the data file to memory. - b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED) + b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) if err != nil { return err } diff --git a/vendor/src/github.com/boltdb/bolt/bolt_windows.go b/vendor/src/github.com/boltdb/bolt/bolt_windows.go index 8b782be5f9e..d538e6afd77 100644 --- a/vendor/src/github.com/boltdb/bolt/bolt_windows.go +++ b/vendor/src/github.com/boltdb/bolt/bolt_windows.go @@ -8,7 +8,39 @@ import ( "unsafe" ) -var odirect int +// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1 +var ( + modkernel32 = syscall.NewLazyDLL("kernel32.dll") + procLockFileEx = modkernel32.NewProc("LockFileEx") + procUnlockFileEx = modkernel32.NewProc("UnlockFileEx") +) + +const ( + lockExt = ".lock" + + // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx + flagLockExclusive = 2 + flagLockFailImmediately = 1 + + // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx + errLockViolation syscall.Errno = 0x21 +) + +func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { + r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol))) + if r == 0 { + return err + } + return nil +} + +func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { + r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0) + if r == 0 { + return err + } + return nil +} // fdatasync flushes written data to a file descriptor. func fdatasync(db *DB) error { @@ -16,13 +48,49 @@ func fdatasync(db *DB) error { } // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, _ bool, _ time.Duration) error { - return nil +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { + // Create a separate lock file on windows because a process + // cannot share an exclusive lock on the same file. This is + // needed during Tx.WriteTo(). + f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode) + if err != nil { + return err + } + db.lockfile = f + + var t time.Time + for { + // If we're beyond our timeout then return an error. + // This can only occur after we've attempted a flock once. + if t.IsZero() { + t = time.Now() + } else if timeout > 0 && time.Since(t) > timeout { + return ErrTimeout + } + + var flag uint32 = flagLockFailImmediately + if exclusive { + flag |= flagLockExclusive + } + + err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) + if err == nil { + return nil + } else if err != errLockViolation { + return err + } + + // Wait for a bit and try again. + time.Sleep(50 * time.Millisecond) + } } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { - return nil +func funlock(db *DB) error { + err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{}) + db.lockfile.Close() + os.Remove(db.path+lockExt) + return err } // mmap memory maps a DB's data file. diff --git a/vendor/src/github.com/boltdb/bolt/boltsync_unix.go b/vendor/src/github.com/boltdb/bolt/boltsync_unix.go index 8db89776fe6..f50442523c3 100644 --- a/vendor/src/github.com/boltdb/bolt/boltsync_unix.go +++ b/vendor/src/github.com/boltdb/bolt/boltsync_unix.go @@ -2,8 +2,6 @@ package bolt -var odirect int - // fdatasync flushes written data to a file descriptor. func fdatasync(db *DB) error { return db.file.Sync() diff --git a/vendor/src/github.com/boltdb/bolt/bucket.go b/vendor/src/github.com/boltdb/bolt/bucket.go index 29252885f93..d2f8c524e42 100644 --- a/vendor/src/github.com/boltdb/bolt/bucket.go +++ b/vendor/src/github.com/boltdb/bolt/bucket.go @@ -11,7 +11,7 @@ const ( MaxKeySize = 32768 // MaxValueSize is the maximum length of a value, in bytes. - MaxValueSize = 4294967295 + MaxValueSize = (1 << 31) - 2 ) const ( @@ -273,6 +273,7 @@ func (b *Bucket) Get(key []byte) []byte { // Put sets the value for a key in the bucket. // If the key exist then its previous value will be overwritten. +// Supplied value must remain valid for the life of the transaction. // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. func (b *Bucket) Put(key []byte, value []byte) error { if b.tx.db == nil { diff --git a/vendor/src/github.com/boltdb/bolt/cursor.go b/vendor/src/github.com/boltdb/bolt/cursor.go index 006c54889e8..1be9f35e3ef 100644 --- a/vendor/src/github.com/boltdb/bolt/cursor.go +++ b/vendor/src/github.com/boltdb/bolt/cursor.go @@ -34,6 +34,13 @@ func (c *Cursor) First() (key []byte, value []byte) { p, n := c.bucket.pageNode(c.bucket.root) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) c.first() + + // If we land on an empty page then move to the next value. + // https://github.com/boltdb/bolt/issues/450 + if c.stack[len(c.stack)-1].count() == 0 { + c.next() + } + k, v, flags := c.keyValue() if (flags & uint32(bucketLeafFlag)) != 0 { return k, nil @@ -209,28 +216,37 @@ func (c *Cursor) last() { // next moves to the next leaf element and returns the key and value. // If the cursor is at the last leaf element then it stays there and returns nil. func (c *Cursor) next() (key []byte, value []byte, flags uint32) { - // Attempt to move over one element until we're successful. - // Move up the stack as we hit the end of each page in our stack. - var i int - for i = len(c.stack) - 1; i >= 0; i-- { - elem := &c.stack[i] - if elem.index < elem.count()-1 { - elem.index++ - break + for { + // Attempt to move over one element until we're successful. + // Move up the stack as we hit the end of each page in our stack. + var i int + for i = len(c.stack) - 1; i >= 0; i-- { + elem := &c.stack[i] + if elem.index < elem.count()-1 { + elem.index++ + break + } } - } - // If we've hit the root page then stop and return. This will leave the - // cursor on the last element of the last page. - if i == -1 { - return nil, nil, 0 - } + // If we've hit the root page then stop and return. This will leave the + // cursor on the last element of the last page. + if i == -1 { + return nil, nil, 0 + } - // Otherwise start from where we left off in the stack and find the - // first element of the first leaf page. - c.stack = c.stack[:i+1] - c.first() - return c.keyValue() + // Otherwise start from where we left off in the stack and find the + // first element of the first leaf page. + c.stack = c.stack[:i+1] + c.first() + + // If this is an empty page then restart and move back up the stack. + // https://github.com/boltdb/bolt/issues/450 + if c.stack[len(c.stack)-1].count() == 0 { + continue + } + + return c.keyValue() + } } // search recursively performs a binary search against a given page/node until it finds a given key. diff --git a/vendor/src/github.com/boltdb/bolt/db.go b/vendor/src/github.com/boltdb/bolt/db.go index d39c4aa9cce..501d36aac24 100644 --- a/vendor/src/github.com/boltdb/bolt/db.go +++ b/vendor/src/github.com/boltdb/bolt/db.go @@ -1,8 +1,10 @@ package bolt import ( + "errors" "fmt" "hash/fnv" + "log" "os" "runtime" "runtime/debug" @@ -24,13 +26,14 @@ const magic uint32 = 0xED0CDAED // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when // syncing changes to a file. This is required as some operating systems, // such as OpenBSD, do not have a unified buffer cache (UBC) and writes -// must be synchronzied using the msync(2) syscall. +// must be synchronized using the msync(2) syscall. const IgnoreNoSync = runtime.GOOS == "openbsd" // Default values if not set in a DB instance. const ( DefaultMaxBatchSize int = 1000 DefaultMaxBatchDelay = 10 * time.Millisecond + DefaultAllocSize = 16 * 1024 * 1024 ) // DB represents a collection of buckets persisted to a file on disk. @@ -63,6 +66,10 @@ type DB struct { // https://github.com/boltdb/bolt/issues/284 NoGrowSync bool + // If you want to read the entire database fast, you can set MmapFlag to + // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. + MmapFlags int + // MaxBatchSize is the maximum size of a batch. Default value is // copied from DefaultMaxBatchSize in Open. // @@ -79,11 +86,18 @@ type DB struct { // Do not change concurrently with calls to Batch. MaxBatchDelay time.Duration + // AllocSize is the amount of space allocated when the database + // needs to create new pages. This is done to amortize the cost + // of truncate() and fsync() when growing the data file. + AllocSize int + path string file *os.File + lockfile *os.File // windows only dataref []byte // mmap'ed readonly, write throws SEGV data *[maxMapSize]byte datasz int + filesz int // current on disk file size meta0 *meta meta1 *meta pageSize int @@ -136,10 +150,12 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { options = DefaultOptions } db.NoGrowSync = options.NoGrowSync + db.MmapFlags = options.MmapFlags // Set default values for later DB operations. db.MaxBatchSize = DefaultMaxBatchSize db.MaxBatchDelay = DefaultMaxBatchDelay + db.AllocSize = DefaultAllocSize flag := os.O_RDWR if options.ReadOnly { @@ -162,7 +178,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // if !options.ReadOnly. // The database file is locked using the shared lock (more than one process may // hold a lock at the same time) otherwise (options.ReadOnly is set). - if err := flock(db.file, !db.readOnly, options.Timeout); err != nil { + if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { _ = db.close() return nil, err } @@ -172,7 +188,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // Initialize the database if it doesn't exist. if info, err := db.file.Stat(); err != nil { - return nil, fmt.Errorf("stat error: %s", err) + return nil, err } else if info.Size() == 0 { // Initialize new files with meta pages. if err := db.init(); err != nil { @@ -184,14 +200,14 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { if _, err := db.file.ReadAt(buf[:], 0); err == nil { m := db.pageInBuffer(buf[:], 0).meta() if err := m.validate(); err != nil { - return nil, fmt.Errorf("meta0 error: %s", err) + return nil, err } db.pageSize = int(m.pageSize) } } // Memory map the data file. - if err := db.mmap(0); err != nil { + if err := db.mmap(options.InitialMmapSize); err != nil { _ = db.close() return nil, err } @@ -248,10 +264,10 @@ func (db *DB) mmap(minsz int) error { // Validate the meta pages. if err := db.meta0.validate(); err != nil { - return fmt.Errorf("meta0 error: %s", err) + return err } if err := db.meta1.validate(); err != nil { - return fmt.Errorf("meta1 error: %s", err) + return err } return nil @@ -266,7 +282,7 @@ func (db *DB) munmap() error { } // mmapSize determines the appropriate size for the mmap given the current size -// of the database. The minimum size is 1MB and doubles until it reaches 1GB. +// of the database. The minimum size is 32KB and doubles until it reaches 1GB. // Returns an error if the new mmap size is greater than the max allowed. func (db *DB) mmapSize(size int) (int, error) { // Double the size from 32KB until 1GB. @@ -364,6 +380,10 @@ func (db *DB) Close() error { } func (db *DB) close() error { + if !db.opened { + return nil + } + db.opened = false db.freelist = nil @@ -382,7 +402,9 @@ func (db *DB) close() error { // No need to unlock read-only file. if !db.readOnly { // Unlock the file. - _ = funlock(db.file) + if err := funlock(db); err != nil { + log.Printf("bolt.Close(): funlock error: %s", err) + } } // Close the file descriptor. @@ -401,11 +423,15 @@ func (db *DB) close() error { // will cause the calls to block and be serialized until the current write // transaction finishes. // -// Transactions should not be depedent on one another. Opening a read +// Transactions should not be dependent on one another. Opening a read // transaction and a write transaction in the same goroutine can cause the // writer to deadlock because the database periodically needs to re-mmap itself // as it grows and it cannot do that while a read transaction is open. // +// If a long running read transaction (for example, a snapshot transaction) is +// needed, you might want to set DB.InitialMmapSize to a large enough value +// to avoid potential blocking of write transaction. +// // IMPORTANT: You must close read-only transactions after you are finished or // else the database will not reclaim old pages. func (db *DB) Begin(writable bool) (*Tx, error) { @@ -589,6 +615,136 @@ func (db *DB) View(fn func(*Tx) error) error { return nil } +// Batch calls fn as part of a batch. It behaves similar to Update, +// except: +// +// 1. concurrent Batch calls can be combined into a single Bolt +// transaction. +// +// 2. the function passed to Batch may be called multiple times, +// regardless of whether it returns error or not. +// +// This means that Batch function side effects must be idempotent and +// take permanent effect only after a successful return is seen in +// caller. +// +// The maximum batch size and delay can be adjusted with DB.MaxBatchSize +// and DB.MaxBatchDelay, respectively. +// +// Batch is only useful when there are multiple goroutines calling it. +func (db *DB) Batch(fn func(*Tx) error) error { + errCh := make(chan error, 1) + + db.batchMu.Lock() + if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { + // There is no existing batch, or the existing batch is full; start a new one. + db.batch = &batch{ + db: db, + } + db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) + } + db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) + if len(db.batch.calls) >= db.MaxBatchSize { + // wake up batch, it's ready to run + go db.batch.trigger() + } + db.batchMu.Unlock() + + err := <-errCh + if err == trySolo { + err = db.Update(fn) + } + return err +} + +type call struct { + fn func(*Tx) error + err chan<- error +} + +type batch struct { + db *DB + timer *time.Timer + start sync.Once + calls []call +} + +// trigger runs the batch if it hasn't already been run. +func (b *batch) trigger() { + b.start.Do(b.run) +} + +// run performs the transactions in the batch and communicates results +// back to DB.Batch. +func (b *batch) run() { + b.db.batchMu.Lock() + b.timer.Stop() + // Make sure no new work is added to this batch, but don't break + // other batches. + if b.db.batch == b { + b.db.batch = nil + } + b.db.batchMu.Unlock() + +retry: + for len(b.calls) > 0 { + var failIdx = -1 + err := b.db.Update(func(tx *Tx) error { + for i, c := range b.calls { + if err := safelyCall(c.fn, tx); err != nil { + failIdx = i + return err + } + } + return nil + }) + + if failIdx >= 0 { + // take the failing transaction out of the batch. it's + // safe to shorten b.calls here because db.batch no longer + // points to us, and we hold the mutex anyway. + c := b.calls[failIdx] + b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] + // tell the submitter re-run it solo, continue with the rest of the batch + c.err <- trySolo + continue retry + } + + // pass success, or bolt internal errors, to all callers + for _, c := range b.calls { + if c.err != nil { + c.err <- err + } + } + break retry + } +} + +// trySolo is a special sentinel error value used for signaling that a +// transaction function should be re-run. It should never be seen by +// callers. +var trySolo = errors.New("batch function returned an error and should be re-run solo") + +type panicked struct { + reason interface{} +} + +func (p panicked) Error() string { + if err, ok := p.reason.(error); ok { + return err.Error() + } + return fmt.Sprintf("panic: %v", p.reason) +} + +func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { + defer func() { + if p := recover(); p != nil { + err = panicked{p} + } + }() + return fn(tx) +} + // Sync executes fdatasync() against the database file handle. // // This is not necessary under normal operation, however, if you use NoSync @@ -655,6 +811,38 @@ func (db *DB) allocate(count int) (*page, error) { return p, nil } +// grow grows the size of the database to the given sz. +func (db *DB) grow(sz int) error { + // Ignore if the new size is less than available file size. + if sz <= db.filesz { + return nil + } + + // If the data is smaller than the alloc size then only allocate what's needed. + // Once it goes over the allocation size then allocate in chunks. + if db.datasz < db.AllocSize { + sz = db.datasz + } else { + sz += db.AllocSize + } + + // Truncate and fsync to ensure file size metadata is flushed. + // https://github.com/boltdb/bolt/issues/284 + if !db.NoGrowSync && !db.readOnly { + if runtime.GOOS != "windows" { + if err := db.file.Truncate(int64(sz)); err != nil { + return fmt.Errorf("file resize error: %s", err) + } + } + if err := db.file.Sync(); err != nil { + return fmt.Errorf("file sync error: %s", err) + } + } + + db.filesz = sz + return nil +} + func (db *DB) IsReadOnly() bool { return db.readOnly } @@ -672,6 +860,19 @@ type Options struct { // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to // grab a shared lock (UNIX). ReadOnly bool + + // Sets the DB.MmapFlags flag before memory mapping the file. + MmapFlags int + + // InitialMmapSize is the initial mmap size of the database + // in bytes. Read transactions won't block write transaction + // if the InitialMmapSize is large enough to hold database mmap + // size. (See DB.Begin for more information) + // + // If <=0, the initial map size is 0. + // If initialMmapSize is smaller than the previous database size, + // it takes no effect. + InitialMmapSize int } // DefaultOptions represent the options used if nil options are passed into Open(). diff --git a/vendor/src/github.com/boltdb/bolt/node.go b/vendor/src/github.com/boltdb/bolt/node.go index c9fb21c7314..e9d64af81e3 100644 --- a/vendor/src/github.com/boltdb/bolt/node.go +++ b/vendor/src/github.com/boltdb/bolt/node.go @@ -463,43 +463,6 @@ func (n *node) rebalance() { target = n.prevSibling() } - // If target node has extra nodes then just move one over. - if target.numChildren() > target.minKeys() { - if useNextSibling { - // Reparent and move node. - if child, ok := n.bucket.nodes[target.inodes[0].pgid]; ok { - child.parent.removeChild(child) - child.parent = n - child.parent.children = append(child.parent.children, child) - } - n.inodes = append(n.inodes, target.inodes[0]) - target.inodes = target.inodes[1:] - - // Update target key on parent. - target.parent.put(target.key, target.inodes[0].key, nil, target.pgid, 0) - target.key = target.inodes[0].key - _assert(len(target.key) > 0, "rebalance(1): zero-length node key") - } else { - // Reparent and move node. - if child, ok := n.bucket.nodes[target.inodes[len(target.inodes)-1].pgid]; ok { - child.parent.removeChild(child) - child.parent = n - child.parent.children = append(child.parent.children, child) - } - n.inodes = append(n.inodes, inode{}) - copy(n.inodes[1:], n.inodes) - n.inodes[0] = target.inodes[len(target.inodes)-1] - target.inodes = target.inodes[:len(target.inodes)-1] - } - - // Update parent key for node. - n.parent.put(n.key, n.inodes[0].key, nil, n.pgid, 0) - n.key = n.inodes[0].key - _assert(len(n.key) > 0, "rebalance(2): zero-length node key") - - return - } - // If both this node and the target node are too small then merge them. if useNextSibling { // Reparent all child nodes being moved. diff --git a/vendor/src/github.com/boltdb/bolt/tx.go b/vendor/src/github.com/boltdb/bolt/tx.go index fe6c287f81a..b8510fdb87b 100644 --- a/vendor/src/github.com/boltdb/bolt/tx.go +++ b/vendor/src/github.com/boltdb/bolt/tx.go @@ -5,6 +5,7 @@ import ( "io" "os" "sort" + "strings" "time" "unsafe" ) @@ -29,6 +30,14 @@ type Tx struct { pages map[pgid]*page stats TxStats commitHandlers []func() + + // WriteFlag specifies the flag for write-related methods like WriteTo(). + // Tx opens the database file with the specified flag to copy the data. + // + // By default, the flag is unset, which works well for mostly in-memory + // workloads. For databases that are much larger than available RAM, + // set the flag to syscall.O_DIRECT to avoid trashing the page cache. + WriteFlag int } // init initializes the transaction. @@ -160,6 +169,8 @@ func (tx *Tx) Commit() error { // Free the old root bucket. tx.meta.root.root = tx.root.root + opgid := tx.meta.pgid + // Free the freelist and allocate new pages for it. This will overestimate // the size of the freelist but not underestimate the size (which would be bad). tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) @@ -174,6 +185,14 @@ func (tx *Tx) Commit() error { } tx.meta.freelist = p.id + // If the high water mark has moved up then attempt to grow the database. + if tx.meta.pgid > opgid { + if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + tx.rollback() + return err + } + } + // Write dirty pages to disk. startTime = time.Now() if err := tx.write(); err != nil { @@ -184,8 +203,17 @@ func (tx *Tx) Commit() error { // If strict mode is enabled then perform a consistency check. // Only the first consistency error is reported in the panic. if tx.db.StrictMode { - if err, ok := <-tx.Check(); ok { - panic("check fail: " + err.Error()) + ch := tx.Check() + var errs []string + for { + err, ok := <-ch + if !ok { + break + } + errs = append(errs, err.Error()) + } + if len(errs) > 0 { + panic("check fail: " + strings.Join(errs, "\n")) } } @@ -263,7 +291,7 @@ func (tx *Tx) close() { } // Copy writes the entire database to a writer. -// This function exists for backwards compatibility. Use WriteTo() in +// This function exists for backwards compatibility. Use WriteTo() instead. func (tx *Tx) Copy(w io.Writer) error { _, err := tx.WriteTo(w) return err @@ -272,29 +300,47 @@ func (tx *Tx) Copy(w io.Writer) error { // WriteTo writes the entire database to a writer. // If err == nil then exactly tx.Size() bytes will be written into the writer. func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { - // Attempt to open reader directly. - var f *os.File - if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil { - // Fallback to a regular open if that doesn't work. - if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil { - return 0, err - } + // Attempt to open reader with WriteFlag + f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) + if err != nil { + return 0, err } + defer func() { _ = f.Close() }() - // Copy the meta pages. - tx.db.metalock.Lock() - n, err = io.CopyN(w, f, int64(tx.db.pageSize*2)) - tx.db.metalock.Unlock() + // Generate a meta page. We use the same page data for both meta pages. + buf := make([]byte, tx.db.pageSize) + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = metaPageFlag + *page.meta() = *tx.meta + + // Write meta 0. + page.id = 0 + page.meta().checksum = page.meta().sum64() + nn, err := w.Write(buf) + n += int64(nn) if err != nil { - _ = f.Close() - return n, fmt.Errorf("meta copy: %s", err) + return n, fmt.Errorf("meta 0 copy: %s", err) + } + + // Write meta 1 with a lower transaction id. + page.id = 1 + page.meta().txid -= 1 + page.meta().checksum = page.meta().sum64() + nn, err = w.Write(buf) + n += int64(nn) + if err != nil { + return n, fmt.Errorf("meta 1 copy: %s", err) + } + + // Move past the meta pages in the file. + if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { + return n, fmt.Errorf("seek: %s", err) } // Copy data pages. wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2)) n += wn if err != nil { - _ = f.Close() return n, err } @@ -501,7 +547,7 @@ func (tx *Tx) writeMeta() error { } // page returns a reference to the page with a given id. -// If page has been written to then a temporary bufferred page is returned. +// If page has been written to then a temporary buffered page is returned. func (tx *Tx) page(id pgid) *page { // Check the dirty pages first. if tx.pages != nil {