ipld
diff --git a/‎codec/api.go‎
Lines changed: 64 additions & 0 deletions b/‎codec/api.go‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎codec/codectools/token.go‎
Lines changed: 78 additions & 0 deletions b/‎codec/codectools/token.go‎
Lines changed: 78 additions & 0 deletions
@@ -0,0 +1,64 @@
+package codec
+
+import (
+	"io"
+
+	"github.com/ipld/go-ipld-prime"
+)
+
+// TODO: i still can't decide between marshaller vs encoder terminology.
+//   i liked the defn i had in refmt: encoder handles tokens-to-bytes; marshaller handles trees to tokens.
+//   but that distinction doesn't exist here.
+//      if it did, we'd need a token type.
+//      and in that case the encoder parts would be an internal codec code reuse choice, not necessary to expose.
+//          if this was the case, it would suggest these functions should be called marshaller.
+//   an alternate definition is: marshallers are things that twist a structure to a tokenizable form;
+//    but going from tree (already trivially tokenizable) to serial is still considered an encoder.
+//        i could also see this definition holding water, and it appears to be what i'm rolling with at the moment.
+//
+// maybe we really should make a TokenWalker thing.  Put it in codectools.
+//   i still really don't know how we'd describe links in that, though.  it's really hard to claim links are a token.
+//   maybe we can cram links into some sort of "extra" field in the token union.
+
+// Encoder is the essential definition of a function that takes IPLD Data Model data in memory and serializes it.
+// IPLD Codecs are written by implementing this function interface (as well as (typically) a matched Decoder).
+//
+// Encoder functions can be composed into an ipld.LinkSystem to provide
+// a "one stop shop" API for handling content addressable storage.
+// Encoder functions can also be used directly if you want to handle serial data streams.
+//
+// Most codec packages will have a ReusableEncoder type
+// (which contains any working memory needed by the encoder implementation,
+// as well as any configuration options),
+// and that type will have an Encode function matching this interface.
+//
+// By convention, codec packages that have a multicodec contract will also have
+// a package-scope exported function called Encode which also matches this interface,
+// and is the equivalent of creating a zero-value ReusableEncoder (aka, default config)
+// and using its Encode method.
+// This package-scope function will typically also internally use a sync.Pool
+// to keep some ReusableEncoder values on hand to avoid unnecesary allocations.
+//
+// Note that a ReusableEncoder type that supports configuration options
+// does not functionally expose those options when invoked by the multicodec system --
+// multicodec indicators do not provide room for extended configuration info.
+// Codecs that expose configuration options are doing so for library users to enjoy;
+// it does not mean those non-default configurations will necessarly be available
+// in all scenarios that use codecs indirectly.
+// There is also no standard interface for such configurations: by nature,
+// if they exist at all, they vary per codec.
+type Encoder func(data ipld.Node, output io.Writer) error
+
+// Decoder is the essential definiton of a function that consumes serial data and unfurls it into IPLD Data Model-compatible in-memory representations.
+// IPLD Codecs are written by implementing this function interface (as well as (typically) a matched Encoder).
+//
+// Decoder is the dual of Encoder.
+// Most of the documentation for the Encoder function interface
+// also applies wholesale to the Decoder interface.
+type Decoder func(into ipld.NodeAssembler, input io.Reader) error
+
+type ErrBudgetExhausted struct{}
+
+func (e ErrBudgetExhausted) Error() string {
+	return "decoder resource budget exhausted (message too long or too complex)"
+}
@@ -0,0 +1,78 @@
+package codectools
+
+import (
+	"fmt"
+
+	"github.com/ipld/go-ipld-prime"
+)
+
+type Token struct {
+	Kind TokenKind
+
+	Length int       // Present for MapOpen or ListOpen.  May be -1 for "unknown" (e.g. a json tokenizer will yield this).
+	Bool   bool      // Value.  Union: only has meaning if Kind is TokenKind_Bool.
+	Int    int64     // Value.  Union: only has meaning if Kind is TokenKind_Int.
+	Float  float64   // Value.  Union: only has meaning if Kind is TokenKind_Float.
+	Str    string    // Value.  Union: only has meaning if Kind is TokenKind_String.  ('Str' rather than 'String' to avoid collision with method.)
+	Bytes  []byte    // Value.  Union: only has meaning if Kind is TokenKind_Bytes.
+	Link   ipld.Link // Value.  Union: only has meaning if Kind is TokenKind_Link.
+
+	Node ipld.Node // Direct pointer to the original data, if this token is used to communicate data during a walk of existing in-memory data.  Absent when token is being used during deserialization.
+
+	// TODO: position info?  We seem to want this basically everywhere the token goes, so it might as well just live here.
+	//  Putting this position info into the token would require writing those fields many times, though;
+	//   hopefully we can also use them as the primary accounting position then, or else this might be problematic for speed.
+}
+
+func (tk Token) String() string {
+	switch tk.Kind {
+	case TokenKind_MapOpen:
+		return fmt.Sprintf("<%c:%d>", tk.Kind, tk.Length)
+	case TokenKind_MapClose:
+		return fmt.Sprintf("<%c>", tk.Kind)
+	case TokenKind_ListOpen:
+		return fmt.Sprintf("<%c:%d>", tk.Kind, tk.Length)
+	case TokenKind_ListClose:
+		return fmt.Sprintf("<%c>", tk.Kind)
+	case TokenKind_Null:
+		return fmt.Sprintf("<%c>", tk.Kind)
+	case TokenKind_Bool:
+		return fmt.Sprintf("<%c:%v>", tk.Kind, tk.Bool)
+	case TokenKind_Int:
+		return fmt.Sprintf("<%c:%v>", tk.Kind, tk.Int)
+	case TokenKind_Float:
+		return fmt.Sprintf("<%c:%v>", tk.Kind, tk.Float)
+	case TokenKind_String:
+		return fmt.Sprintf("<%c:%q>", tk.Kind, tk.Str)
+	case TokenKind_Bytes:
+		return fmt.Sprintf("<%c:%x>", tk.Kind, tk.Bytes)
+	case TokenKind_Link:
+		return fmt.Sprintf("<%c:%v>", tk.Kind, tk.Link)
+	default:
+		return "<INVALID>"
+	}
+}
+
+type TokenKind uint8
+
+const (
+	TokenKind_MapOpen   = '{'
+	TokenKind_MapClose  = '}'
+	TokenKind_ListOpen  = '['
+	TokenKind_ListClose = ']'
+	TokenKind_Null      = '0'
+	TokenKind_Bool      = 'b'
+	TokenKind_Int       = 'i'
+	TokenKind_Float     = 'f'
+	TokenKind_String    = 's'
+	TokenKind_Bytes     = 'x'
+	TokenKind_Link      = '/'
+)
+
+type ErrMalformedTokenSequence struct {
+	Detail string
+}
+
+func (e ErrMalformedTokenSequence) Error() string {
+	return "malformed token sequence: " + e.Detail
+}