Skip to content
This repository was archived by the owner on Mar 23, 2024. It is now read-only.

Commit e8daae8

Browse files
author
fbockaj
committed
Merge branch 'master' into privval_tests
2 parents 65aee95 + e82503d commit e8daae8

File tree

16 files changed

+258
-127
lines changed

16 files changed

+258
-127
lines changed

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ SignCTRL is a high availability solution for Tendermint that enables the creatio
1010
1111
## Why SignCTRL?
1212

13-
1) Built-in double-signing protection.
14-
2) Very lightweight due to not introducing any additional communication overhead for coordination work.
15-
3) No more sentry nodes are needed, as the validators in the set back each other up.
16-
4) A minimal setup requires only two nodes to be run.
13+
1) Built-in double-signing protection
14+
2) Very lightweight (no additional communication overhead for coordination work)
15+
3) No more sentry nodes are needed, as the validators in the set back each other up
16+
4) Minimal setup requires only two nodes
1717

1818
## Requirements
1919

@@ -25,7 +25,7 @@ SignCTRL is a high availability solution for Tendermint that enables the creatio
2525
Get the repository via
2626

2727
```shell
28-
$ git clone https://github.com/BlockscapeNetwork/signctrl && cd signctrl
28+
$ git clone https://github.com/BlockscapeNetwork/signctrl
2929
```
3030

3131
## Build & Install
@@ -45,7 +45,8 @@ $ make install
4545

4646
## Getting Started
4747

48-
To get started, please see the [Guides/Tutorials](docs/guides/README.md).
48+
To get started, please see the [Guides/Tutorials](docs/guides/README.md).</br>
49+
If you get stuck, see the [FAQ](docs/core/faq.md).
4950

5051
## Documentation
5152

cmd/start.go

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@ package cmd
22

33
import (
44
"fmt"
5-
"log"
65
"net/http"
76
"os"
87
"os/signal"
98
"strings"
109
"syscall"
11-
"time"
1210

1311
"github.com/BlockscapeNetwork/signctrl/config"
1412
"github.com/BlockscapeNetwork/signctrl/privval"
@@ -34,9 +32,9 @@ var (
3432
cfgDir := config.Dir()
3533

3634
// Set the logger and its mininum log level.
37-
logger := log.New(os.Stderr, "", 0)
35+
logger := types.NewSyncLogger(os.Stderr, "", 0)
3836
filter := &logutils.LevelFilter{
39-
Levels: config.LogLevels,
37+
Levels: types.LogLevels,
4038
MinLevel: logutils.LogLevel(cfg.Base.LogLevel),
4139
Writer: os.Stderr,
4240
}
@@ -64,7 +62,7 @@ var (
6462

6563
// Start the SignCTRL service.
6664
if err := pv.Start(); err != nil {
67-
fmt.Println(err)
65+
logger.Error(err.Error())
6866
if err := pv.Stop(); err != nil {
6967
fmt.Println(err)
7068
os.Exit(1)
@@ -78,21 +76,15 @@ var (
7876

7977
select {
8078
case <-pv.Quit(): // Used for self-induced shutdown
81-
pv.Logger.Println("[INFO] signctrl: Shutting SignCTRL down... \u23FB (quit)")
79+
pv.Logger.Info("Shutting SignCTRL down... \u23FB (quit)")
8280
case <-sigs: // The sigs channel is only used for OS interrupt signals
83-
pv.Logger.Println("[INFO] signctrl: Shutting SignCTRL down... \u23FB (user/os interrupt)")
84-
}
85-
86-
if err := pv.Stop(); err != nil {
87-
fmt.Println(err)
88-
os.Exit(1)
81+
pv.Logger.Info("Shutting SignCTRL down... \u23FB (user/os interrupt)")
82+
if err := pv.Stop(); err != nil {
83+
logger.Error(err.Error())
84+
os.Exit(1)
85+
}
8986
}
9087

91-
// TODO: The current logger is async which is why some of the last log messages before
92-
// shutdown aren't printed out. Make the logger sync. For now, wait a second for everything
93-
// to be printed out.
94-
time.Sleep(time.Second)
95-
9688
// Terminate the process gracefully with exit code 0.
9789
os.Exit(0)
9890
},

config/config.go

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212
"time"
1313

14+
"github.com/BlockscapeNetwork/signctrl/types"
1415
"github.com/hashicorp/logutils"
1516
"github.com/spf13/viper"
1617
)
@@ -20,11 +21,6 @@ const (
2021
File = "config.toml"
2122
)
2223

23-
var (
24-
// LogLevels defines the loglevels for SignCTRL logs.
25-
LogLevels = []logutils.LogLevel{"DEBUG", "INFO", "WARN", "ERR"}
26-
)
27-
2824
// Base defines the base configuration parameters for SignCTRL.
2925
type Base struct {
3026
// LogLevel determines the minimum log level for SignCTRL logs.
@@ -56,11 +52,36 @@ type Base struct {
5652
RetryDialAfter string `mapstructure:"retry_dial_after"`
5753
}
5854

55+
// validateAddress validates the configuration's addresses.
56+
func validateAddress(addr string, addrName string) error {
57+
protocol := regexp.MustCompile(`(tcp|unix)://`).FindString(addr)
58+
switch protocol {
59+
case "":
60+
return fmt.Errorf("%v is missing the protocol", addrName)
61+
62+
case "tcp://":
63+
host, _, err := net.SplitHostPort(strings.TrimPrefix(addr, protocol))
64+
if err != nil {
65+
return fmt.Errorf("%v is not in the host:port format", addrName)
66+
}
67+
if ip := net.ParseIP(host); ip == nil {
68+
return fmt.Errorf("%v is not a valid IPv4 address", addrName)
69+
}
70+
71+
case "unix://":
72+
if !strings.HasSuffix(addr, ".sock") {
73+
return fmt.Errorf("%v is not a unix domain socket address", addrName)
74+
}
75+
}
76+
77+
return nil
78+
}
79+
5980
// validate validates the configuration's base section.
6081
func (b Base) validate() error {
6182
var errs string
62-
if match, _ := regexp.MatchString(logLevelsToRegExp(&LogLevels), b.LogLevel); !match {
63-
errs += fmt.Sprintf("\tlog_level must be one of the following: %v\n", LogLevels)
83+
if match, _ := regexp.MatchString(logLevelsToRegExp(&types.LogLevels), b.LogLevel); !match {
84+
errs += fmt.Sprintf("\tlog_level must be one of the following: %v\n", types.LogLevels)
6485
}
6586
if b.SetSize < 2 {
6687
errs += "\tset_size must be 2 or higher\n"
@@ -71,34 +92,11 @@ func (b Base) validate() error {
7192
if b.StartRank < 1 {
7293
errs += "\tstart_rank must be 1 or higher\n"
7394
}
74-
protocol := regexp.MustCompile(`(tcp|unix)://`).FindString(b.ValidatorListenAddress)
75-
if protocol == "" {
76-
errs += "\tvalidator_laddr is missing the protocol\n"
77-
} else if protocol == "tcp://" {
78-
host, _, err := net.SplitHostPort(strings.TrimPrefix(b.ValidatorListenAddress, protocol))
79-
if err != nil {
80-
errs += "\tvalidator_laddr is not in the host:port format\n"
81-
} else {
82-
if ip := net.ParseIP(host); ip == nil {
83-
errs += "\tvalidator_laddr is not a valid IPv4 address\n"
84-
}
85-
}
86-
} else if protocol == "unix://" {
87-
if !strings.HasSuffix(b.ValidatorListenAddress, ".sock") {
88-
errs += "\nvalidator_laddr is not a unix domain socket address\n"
89-
}
95+
if err := validateAddress(b.ValidatorListenAddress, "validator_laddr"); err != nil {
96+
errs += fmt.Sprintf("\t%v\n", err.Error())
9097
}
91-
if !strings.HasPrefix(b.ValidatorListenAddressRPC, "tcp://") {
92-
errs += "\tvalidator_laddr_rpc is missing the protocol\n"
93-
} else {
94-
host, _, err := net.SplitHostPort(strings.TrimPrefix(b.ValidatorListenAddressRPC, "tcp://"))
95-
if err != nil {
96-
errs += "\tvalidator_laddr_rpc is not in the host:port format\n"
97-
} else {
98-
if ip := net.ParseIP(host); ip == nil {
99-
errs += "\tvalidator_laddr_rpc is not a valid IPv4 address\n"
100-
}
101-
}
98+
if err := validateAddress(b.ValidatorListenAddressRPC, "validator_laddr_rpc"); err != nil {
99+
errs += fmt.Sprintf("\t%v\n", err.Error())
102100
}
103101
if b.RetryDialAfter == "" {
104102
errs += "\tretry_dial_after must not be empty\n"

connection/dial.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package connection
33
import (
44
"errors"
55
"fmt"
6-
"log"
76
"net"
87
"os"
98
"os/signal"
@@ -12,6 +11,7 @@ import (
1211
"syscall"
1312
"time"
1413

14+
"github.com/BlockscapeNetwork/signctrl/types"
1515
tm_ed25519 "github.com/tendermint/tendermint/crypto/ed25519"
1616
tm_p2pconn "github.com/tendermint/tendermint/p2p/conn"
1717
)
@@ -29,28 +29,28 @@ var (
2929

3030
// retryDialTCP keeps dialing the given TCP socket address until success, using the
3131
// given connkey for encryption and returns the secret connection.
32-
func retryDialTCP(address string, connkey tm_ed25519.PrivKey, sigs chan os.Signal, logger *log.Logger) (net.Conn, error) {
32+
func retryDialTCP(address string, connkey tm_ed25519.PrivKey, sigs chan os.Signal, logger *types.SyncLogger) (net.Conn, error) {
3333
for {
3434
select {
3535
case <-sigs:
3636
return nil, ErrAbortDial
3737

3838
case <-time.After(RetryDialInterval):
3939
if conn, err := net.Dial("tcp", strings.TrimPrefix(address, "tcp://")); err == nil {
40-
logger.Println("[INFO] signctrl: Successfully dialed the validator ✓")
40+
logger.Info("Successfully dialed the validator ✓")
4141
return tm_p2pconn.MakeSecretConnection(conn, connkey)
4242
}
4343

4444
// After the first dial, dial in intervals of 1 second.
4545
RetryDialInterval = time.Second
46-
logger.Println("[DEBUG] signctrl: Retry dialing...")
46+
logger.Debug("Retry dialing...")
4747
}
4848
}
4949
}
5050

5151
// retryDialUnix keeps dialing the given unix domain socket address until success and
5252
// returns the connection.
53-
func retryDialUnix(address string, sigs chan os.Signal, logger *log.Logger) (net.Conn, error) {
53+
func retryDialUnix(address string, sigs chan os.Signal, logger *types.SyncLogger) (net.Conn, error) {
5454
addrWithoutProtocol := strings.TrimPrefix(address, "unix://")
5555

5656
for {
@@ -61,21 +61,21 @@ func retryDialUnix(address string, sigs chan os.Signal, logger *log.Logger) (net
6161
case <-time.After(RetryDialInterval):
6262
unixAddr := &net.UnixAddr{Name: addrWithoutProtocol, Net: "unix"}
6363
if conn, err := net.DialUnix("unix", nil, unixAddr); err == nil {
64-
logger.Println("[INFO] signctrl: Successfully dialed the validator ✓")
64+
logger.Info("Successfully dialed the validator ✓")
6565
return conn, nil
6666
}
6767

6868
// After the first dial, dial in intervals of 1 second.
6969
os.RemoveAll(addrWithoutProtocol)
7070
RetryDialInterval = time.Second
71-
logger.Println("[DEBUG] signctrl: Retry dialing...")
71+
logger.Debug("Retry dialing...")
7272
}
7373
}
7474
}
7575

7676
// RetryDial keeps dialing the given address until success and returns the connection.
77-
func RetryDial(cfgDir, address string, logger *log.Logger) (net.Conn, error) {
78-
logger.Printf("[INFO] signctrl: Dialing %v... (Use Ctrl+C to abort)", address)
77+
func RetryDial(cfgDir, address string, logger *types.SyncLogger) (net.Conn, error) {
78+
logger.Info("Dialing %v... (Use Ctrl+C to abort)", address)
7979
sigs := make(chan os.Signal, 1)
8080
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
8181

@@ -86,7 +86,7 @@ func RetryDial(cfgDir, address string, logger *log.Logger) (net.Conn, error) {
8686
// a secret/encrypted connection to the validator.
8787
connKey, err := LoadConnKey(cfgDir)
8888
if err != nil {
89-
return nil, fmt.Errorf("[ERR] signctrl: couldn't load conn.key: %v", err)
89+
return nil, fmt.Errorf("couldn't load conn.key: %v", err)
9090
}
9191
return retryDialTCP(address, connKey, sigs, logger)
9292

connection/dial_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ import (
55
"crypto/rand"
66
"fmt"
77
"io/ioutil"
8-
"log"
98
"net"
109
"os"
1110
"sync"
1211
"testing"
1312
"time"
1413

14+
"github.com/BlockscapeNetwork/signctrl/types"
1515
"github.com/stretchr/testify/assert"
1616
tm_ed25519 "github.com/tendermint/tendermint/crypto/ed25519"
1717
tm_p2pconn "github.com/tendermint/tendermint/p2p/conn"
@@ -70,7 +70,7 @@ func TestRetryDialTCP_NoConnKey(t *testing.T) {
7070
assert.NoError(t, err)
7171
}()
7272

73-
conn, err := RetryDial(cfgDir, "tcp://"+laddr, log.New(ioutil.Discard, "", 0))
73+
conn, err := RetryDial(cfgDir, "tcp://"+laddr, types.NewSyncLogger(ioutil.Discard, "", 0))
7474
assert.Nil(t, conn)
7575
assert.Error(t, err)
7676
}
@@ -92,7 +92,7 @@ func TestRetryDialTCP_WithConnKey(t *testing.T) {
9292
assert.NoError(t, err)
9393
}()
9494

95-
conn, err := RetryDial(cfgDir, "tcp://"+laddr, log.New(ioutil.Discard, "", 0))
95+
conn, err := RetryDial(cfgDir, "tcp://"+laddr, types.NewSyncLogger(ioutil.Discard, "", 0))
9696
assert.NotNil(t, conn)
9797
assert.NoError(t, err)
9898
}
@@ -131,15 +131,15 @@ func TestRetryDialUnix(t *testing.T) {
131131
assert.NoError(t, err)
132132
}()
133133

134-
conn, err := RetryDial(cfgDir, "unix://"+sockAddr, log.New(ioutil.Discard, "", 0))
134+
conn, err := RetryDial(cfgDir, "unix://"+sockAddr, types.NewSyncLogger(ioutil.Discard, "", 0))
135135
assert.NotNil(t, conn)
136136
assert.NoError(t, err)
137137

138138
wg.Wait()
139139
}
140140

141141
func TestRetryDialUnknown(t *testing.T) {
142-
conn, err := RetryDial(".", "invalid://127.0.0.1:3000", log.New(ioutil.Discard, "", 0))
142+
conn, err := RetryDial(".", "invalid://127.0.0.1:3000", types.NewSyncLogger(ioutil.Discard, "", 0))
143143
assert.Nil(t, conn)
144144
assert.Error(t, err)
145145
}

docs/core/ds-protection.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,24 @@ This page covers how SignCTRL's underlying ranking system provides protection ag
44

55
## SignCTRL Set
66

7-
A SignCTRL set consists of two or more validators that all share the same keys, and thus represent the same validator entity. Letting all of those validators sign blocks simultaneously is a sure-fire way of getting slashed for double-signing, so there needs to be some form of coordination in terms of which validator in the set has permission to sign at any given point in time. Thus, the idea is very simple - only one validator in the set should sign blocks while the others back it up if it becomes unavailable.
7+
A SignCTRL set consists of two or more validators that share the same keypair, and thus represent the same validator entity. Letting all of those validators sign blocks simultaneously is a sure-fire way of getting slashed for double-signing, so there needs to be some form of coordination in terms of which validator in the set has permission to sign at any given point in time. Thus, the idea is very simple - only one validator in the set should sign blocks while the others back it up if it becomes unavailable.
88

99
Contrary to approaches that add a consensus layer, and therefore more communication overhead, to the set of redundant validators, like [Raft](https://raft.github.io/), SignCTRL uses the blockchain itself as a perfectly synchronous communication line that the validators use to coordinate signing permissions.
1010

1111
## Validator Ranking
1212

13-
SignCTRL employs a ranking system for the validators in its set which essentially just ranks the validators in descending order - and this ranking system is actually the key factor enabling double-signing protection.
13+
SignCTRL employs a ranking system for the validators in its set that ranks the validators in descending order and it is this ranking system that is the key factor enabling double-signing protection.
1414

15-
A node's rank determines which blocks exactly it has permission to sign and which not. Only the highest-ranked validator signs blocks while the others serve as backups. The validators can move up one rank at a time if one key criterion is met - and that is if too many blocks have been missed in a row. So, rank updates are triggered by too many blocks on the blockchain being missed in a row.
15+
### Ranks
16+
17+
A node's rank determines which blocks exactly it has permission to sign and which not. Only the highest-ranked validator signs blocks while the others queue up as backups. The validators can move up one rank at a time if one key criterion is met - and that is if too many blocks have been missed in a row. So, rank updates are triggered by too many blocks on the blockchain being missed in a row.
1618

1719
![Rank Updates](../imgs/rank-update.gif)
1820

19-
In order to detect missed blocks, the validators closely monitor every single block in the blockchain. This includes looking into every last block's commit signatures and checking for the own validator's signature. If the signature is missing, every validator in the set will see it and increment an internal counter for missed blocks in a row. If a certain threshold is exceeded, ranks 2..n will notice first and accordingly move up one rank each. Once rank 1 becomes available again, it will have to sync up its blockchain state. Eventually, while syncing, it will also notice that is has been replaced and needs to shut itself down. It can then later be readded to the set with the lowest rank, though.
21+
In order to detect missed blocks, the validators closely monitor every single block in the blockchain. This includes looking into every last block's commit signatures and checking for their own validator's signature. If the signature is missing, every validator in the set will see it and increment an internal counter. If a certain threshold is exceeded, ranks 2..n will notice first and accordingly move up one rank each. Once rank 1 becomes available again, it will have to sync up its blockchain state. Eventually, while syncing, it will also notice that is has been replaced and needs to shut itself down. It can then later be readded to the set with the lowest rank, though.
22+
23+
### State
24+
25+
Before the node shuts itself down, it persists its last rank and last height in a separate `signctrl_state.json` file. This file acts as a protection mechanism against launching a validator with an rank that has been rendered obsolete by a rank update in the set, which is the case if the requested height differs more than `threshold+1` from the last height persisted in the state file.
2026

21-
Before the node shuts itself down, though, it persists its last rank in a separate `last_rank.json` file which indicates that its last rank was different than the rank specified in the `config.toml`. On startup it then checks for the last rank and starts up with that instead of the one specified in the configuration file.
27+
For now, the only way to recover from a deprecated state is to delete the `signctrl_state.json` and start the validator back up again with the correct `start_rank` in its `config.toml`.

0 commit comments

Comments
 (0)