Skip to content

Commit

Permalink
reflow/errors: provide errors.Is for comparing (chained) kinds; add e…
Browse files Browse the repository at this point in the history
…rrors.Net, errors.Restartable

Summary:
This change adds several improvements to error handling code:

- package func errors.Is, to examine an error's kind
- the new error kind errors.Net to indicate indeterminate networking errors
- package func errors.Restartable to classify errors that are restartable (i.e., nonfatal)

We then use these to reclassify http-level errors as errors.Net; the
runner restarts all Net errors along with the errors that are
considered transient.

This fixes a current error misclassification, where nonfatal network errors
do not cause run restarts.

Reviewers: pgopal, razvanm

Reviewed By: pgopal

Differential Revision: https://phabricator.grailbio.com/D8956

fbshipit-source-id: a856f31
  • Loading branch information
mariusae authored and grailbot committed Dec 22, 2017
1 parent 039228d commit f1331e4
Show file tree
Hide file tree
Showing 17 changed files with 92 additions and 53 deletions.
4 changes: 2 additions & 2 deletions cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func TestCache(t *testing.T) {
Transferer: transferer{},
}
key := reflow.Digester.FromString
if _, err := cache.Lookup(ctx, key("hello")); err == nil || !errors.Match(errors.NotExist, err) {
if _, err := cache.Lookup(ctx, key("hello")); err == nil || !errors.Is(errors.NotExist, err) {
t.Errorf("expected notexist error, got %v", err)
}

Expand All @@ -83,7 +83,7 @@ func TestCache(t *testing.T) {
},
}
err := cache.Write(ctx, key("key1"), v, local)
if err == nil || !errors.Match(errors.NotExist, err) {
if err == nil || !errors.Is(errors.NotExist, err) {
t.Errorf("expected nontexist error, got %v", err)
}

Expand Down
2 changes: 1 addition & 1 deletion ec2cluster/ec2cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ func (c *Cluster) loop() {
npending--
switch {
case inst.Err() == nil:
case errors.Match(errors.Unavailable, inst.Err()):
case errors.Is(errors.Unavailable, inst.Err()):
c.Log.Printf("instance type %s unavailable in region %s: %v", inst.Config.Type, c.Region, inst.Err())
c.instanceState.Unavailable(inst.Config)
fallthrough
Expand Down
4 changes: 2 additions & 2 deletions ec2cluster/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,9 @@ func (i *instance) Go(ctx context.Context) {
}
switch {
case i.err == nil:
case errors.Match(errors.Fatal, i.err):
case errors.Is(errors.Fatal, i.err):
return
case errors.Match(errors.Unavailable, i.err):
case errors.Is(errors.Unavailable, i.err):
// Return these immediately because our caller may be able to handle
// them by selecting a different instance type.
return
Expand Down
74 changes: 51 additions & 23 deletions errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ const (
Fatal
// Invalid indicates an invalid state or data.
Invalid
// Net indicates a networking error.
Net

maxKind
)
Expand Down Expand Up @@ -102,6 +104,8 @@ func (k Kind) String() string {
return "fatal"
case Invalid:
return "invalid"
case Net:
return "network error"
}
}

Expand All @@ -120,6 +124,7 @@ var kind2string = [maxKind]string{
Unavailable: "Unavailable",
Fatal: "Fatal",
Invalid: "Invalid",
Net: "Net",
}

var string2kind = map[string]Kind{
Expand All @@ -137,6 +142,7 @@ var string2kind = map[string]Kind{
"Unavailable": Unavailable,
"Fatal": Fatal,
"Invalid": Invalid,
"Net": Net,
}

// Error defines a Reflow error. It is used to indicate an error
Expand Down Expand Up @@ -414,38 +420,54 @@ func (e *Error) UnmarshalJSON(b []byte) error {
// that every nonempty field in err1 has the same value in err2. If
// err1 is an *Error with a non-nil Err field, Match recurs to check
// that the two errors chain of underlying errors also match.
func Match(err1 interface{}, err2 error) bool {
func Match(err1, err2 error) bool {
e2 := Recover(err2)
switch e1 := err1.(type) {
default:
e1, ok := err1.(*Error)
if !ok {
return false
case Kind:
return e1 == e2.Kind
case *Error:
if e1.Op != "" && e2.Op != e1.Op {
return false
}
if len(e1.Arg) != len(e2.Arg) {
}
if e1.Op != "" && e2.Op != e1.Op {
return false
}
if len(e1.Arg) != len(e2.Arg) {
return false
}
for i := range e1.Arg {
if e1.Arg[i] != e2.Arg[i] {
return false
}
for i := range e1.Arg {
if e1.Arg[i] != e2.Arg[i] {
return false
}
}
if e1.Kind != Other && e2.Kind != e1.Kind {
return false
}
if e1.Err != nil {
if _, ok := e1.Err.(*Error); ok {
return Match(e1.Err, e2.Err)
}
if e1.Kind != Other && e2.Kind != e1.Kind {
if e2.Err == nil || e2.Err.Error() != e1.Err.Error() {
return false
}
if e1.Err != nil {
if _, ok := e1.Err.(*Error); ok {
return Match(e1.Err, e2.Err)
}
if e2.Err == nil || e2.Err.Error() != e1.Err.Error() {
return false
}
}
return true
}

// Is tells whether an error has a specified kind, except for the
// indeterminate kind Other. In the case an error has kind Other, the
// chain is traversed until a non-Other error is encountered.
func Is(kind Kind, err error) bool {
return is(kind, Recover(err))
}

func is(kind Kind, e *Error) bool {
if e.Kind != Other {
return e.Kind == kind
}
if e.Err != nil {
if e2, ok := e.Err.(*Error); ok {
return is(kind, e2)
}
return true
}
return false
}

// Transient tells whether error err is likely transient, and thus may
Expand All @@ -458,3 +480,9 @@ func Transient(err error) bool {
return false
}
}

// Restartable determines if the provided error is restartable.
// Restartable errors include transient errors and network errors.
func Restartable(err error) bool {
return Transient(err) || Is(Net, err)
}
18 changes: 18 additions & 0 deletions errors/errors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,21 @@ func TestError(t *testing.T) {
t.Errorf("got %q, want %q", got, want)
}
}

type isTemporary bool

func (t isTemporary) Error() string { return "maybe a temporary error" }
func (t isTemporary) Temporary() bool { return bool(t) }

func TestIs(t *testing.T) {
for kind := Other; kind < maxKind; kind++ {
if got, want := Is(kind, E(kind)), kind != Other; got != want {
t.Errorf("got %v, want %v", got, want)
}
}
for _, temp := range []bool{true, false} {
if got, want := Is(Temporary, isTemporary(temp)), temp; got != want {
t.Errorf("got %v, want %v", got, want)
}
}
}
4 changes: 2 additions & 2 deletions eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ func (e *Eval) eval(ctx context.Context, f *Flow) (err error) {
// Don't print cancellation errors, since they are follow-on errors
// from upstream ones that have already been reported.
/*
if !errors.Match(errors.E(errors.Canceled), err) {
if !errors.Is(errors.E(errors.Canceled), err) {
e.Log.Errorf("eval %s runtime error: %v", f.ExecString(false), err)
}
*/
Expand Down Expand Up @@ -1068,7 +1068,7 @@ func (e *Eval) lookup(ctx context.Context, f *Flow) {
if err == nil {
break
}
if !errors.Match(errors.NotExist, err) {
if !errors.Is(errors.NotExist, err) {
e.Log.Errorf("cache.Lookup %v: %v", f, err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion local/testutil/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func TestPool(t *testing.T, p pool.Pool) {
}

_, err = alloc.Keepalive(ctx, intv)
if !errors.Match(errors.NotExist, err) {
if !errors.Is(errors.NotExist, err) {
t.Fatalf("expected NotExist error, got %v", err)
}
// Look it up again to get its zombie.
Expand Down
2 changes: 1 addition & 1 deletion pool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func Allocate(ctx context.Context, pool Pool, min, max reflow.Resources, labels
if err == nil {
return alloc, err
}
if !errors.Match(errors.NotExist, err) {
if !errors.Is(errors.NotExist, err) {
return nil, err
}
}
Expand Down
2 changes: 1 addition & 1 deletion pool/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func TestMux(t *testing.T) {
t.Errorf("expected error for %s", c.arg)
continue
}
if !errors.Match(c.kind, err) {
if c.kind != errors.Other && !errors.Is(c.kind, err) {
t.Errorf("got %v, want %v", errors.Recover(err).Kind, c.kind)
t.Errorf("%v", errors.Recover(err).Kind == c.kind)
}
Expand Down
2 changes: 1 addition & 1 deletion repository/missing.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func Missing(ctx context.Context, r reflow.Repository, files ...reflow.File) ([]
cancel()
if err == nil {
exists[i] = true
} else if errors.Match(errors.NotExist, err) {
} else if errors.Is(errors.NotExist, err) {
return nil
}
return err
Expand Down
4 changes: 2 additions & 2 deletions repository/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func Transfer(ctx context.Context, dst, src reflow.Repository, id digest.Digest)
switch {
case err == nil:
return nil
case errors.Match(errors.NotSupported, err):
case errors.Is(errors.NotSupported, err):
default:
return err
}
Expand All @@ -75,7 +75,7 @@ func Transfer(ctx context.Context, dst, src reflow.Repository, id digest.Digest)
switch {
case err == nil:
return nil
case errors.Match(errors.NotSupported, err):
case errors.Is(errors.NotSupported, err):
default:
return err
}
Expand Down
4 changes: 2 additions & 2 deletions repository/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func TestClientServerFile(t *testing.T) {

id2 := reflow.Digester.FromString("hello, world")
_, err = repo.Get(ctx, id2)
if !errors.Match(errors.NotExist, err) {
if !errors.Is(errors.NotExist, err) {
t.Fatalf("expected NotExist, got %v", err)
}
}
Expand Down Expand Up @@ -156,7 +156,7 @@ func TestClientServerCollect(t *testing.T) {
t.Fatal(err)
}
_, err = repo.Get(ctx, id2)
if !errors.Match(errors.NotExist, err) {
if !errors.Is(errors.NotExist, err) {
t.Errorf("expected NotExist, got %v", err)
}
body, err := repo.Get(ctx, id1)
Expand Down
9 changes: 1 addition & 8 deletions rest/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/http/httputil"
"net/url"
Expand Down Expand Up @@ -121,13 +120,7 @@ func (c *ClientCall) Do(ctx context.Context, body io.Reader) (int, error) {
case context.Canceled, context.DeadlineExceeded:
c.err = errors.Recover(err)
default:
if err, ok := err.(net.Error); ok {
// net.Error defines Temporary() bool
c.err = errors.E(err)
} else {
// Unless explicitly labeled otherwise, we assume these network errors
c.err = errors.E(errors.Temporary, err)
}
c.err = errors.E(errors.Net, err)
}
if c.log.At(log.DebugLevel) {
if c.resp != nil {
Expand Down
2 changes: 1 addition & 1 deletion rest/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestError(t *testing.T) {
if err := json.NewDecoder(w.Result().Body).Decode(e); err != nil {
t.Fatal(err)
}
if !errors.Match(errors.NotExist, e) {
if !errors.Is(errors.NotExist, e) {
t.Errorf("expected %v to be NotExist", e)
}
if got, want := e.Error(), "op1: resource does not exist"; got != want {
Expand Down
4 changes: 2 additions & 2 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ func (r *Runner) Do(ctx context.Context) bool {
r.Err = errors.Recover(err)
// We retry potentially transient errors here: there is no harm
// beyond extra resource usage.
if errors.Transient(r.Err) {
r.Log.Debugf("marking run for retry after transient error %v", r.Err)
if errors.Restartable(r.Err) {
r.Log.Debugf("marking run for retry after restartable error %v", r.Err)
r.Phase = Retry
} else {
r.Log.Debugf("marking run done after nonrecoverable error %v", r.Err)
Expand Down
2 changes: 1 addition & 1 deletion runner/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func (w *worker) do(ctx context.Context, f *reflow.Flow) (err error) {
panic("unhandled state")
}
if err != nil {
if errors.Match(errors.Temporary, err) || errors.Match(errors.Timeout, err) {
if errors.Is(errors.Temporary, err) || errors.Is(errors.Timeout, err) {
n++
} else {
break
Expand Down
6 changes: 3 additions & 3 deletions tool/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,13 @@ retriable.`
bgcancel()
cancel()
if run.Err != nil {
if errors.Match(errors.Eval, run.Err) {
if errors.Is(errors.Eval, run.Err) {
// Error that occured during evaluation. Probably not recoverable.
// TODO(marius): if this was caused by an underyling exit (from a tool)
// then propagate this here.
os.Exit(11)
}
if errors.Transient(run.Err) {
if errors.Restartable(run.Err) {
os.Exit(10)
}
os.Exit(1)
Expand Down Expand Up @@ -417,7 +417,7 @@ func (c *Cmd) runLocal(ctx context.Context, config runConfig, execLogger *log.Lo
}
if err = eval.Do(ctx); err != nil {
c.Errorln(err)
if errors.Transient(err) {
if errors.Restartable(err) {
os.Exit(10)
}
os.Exit(1)
Expand Down

0 comments on commit f1331e4

Please sign in to comment.