Incremental encode (#36)

* incremental encoding from multiple buffers * Update README.md * tests for cobs_encode_inc * need_advance flag, all tests pass * udpate readme * remove warning disable * update windows bat * pass byte to std fill * pass byte to iota * pass byte to std fill * pass byte to std fill
charlesnicholson · Aug 31, 2021 · 27ee4d6 · 27ee4d6
1 parent a41ad87
commit 27ee4d6
Show file tree

Hide file tree

Showing 7 changed files with 470 additions and 37 deletions.
diff --git a/Makefile b/Makefile
@@ -1,6 +1,7 @@
 SRCS := cobs.c \
 		tests/test_cobs_encode_max.cc \
 		tests/test_cobs_encode.cc \
+		tests/test_cobs_encode_inc.cc \
 		tests/test_cobs_encode_inplace.cc \
 		tests/test_cobs_decode.cc \
 		tests/test_cobs_decode_inplace.cc \
@@ -28,7 +29,7 @@ ifeq ($(OS),Darwin)
 CPPFLAGS += -Weverything -Wno-poison-system-directories -Wno-format-pedantic
 endif
 
-CPPFLAGS += -Wno-c++98-compat
+CPPFLAGS += -Wno-c++98-compat -Wno-padded
 CFLAGS = --std=c99
 CXXFLAGS = --std=c++17
 

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 `nanocobs` is a C99 implementation of the [Consistent Overhead Byte Stuffing](https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing) ("COBS") algorithm, defined in the [paper](http://www.stuartcheshire.org/papers/COBSforToN.pdf) by Stuart Cheshire and Mary Baker.
 
-Users can encode and decode data in-place or into separate target buffers. The `nanocobs` runtime requires no extra memory overhead. No standard library headers are included, and no standard library functions are called.
+Users can encode and decode data in-place or into separate target buffers. Encoding can be incremental; users can encode multiple small buffers (e.g. header, then payloads) into one target. The `nanocobs` runtime requires no extra memory overhead. No standard library headers are included, and no standard library functions are called.
 
 ## Rationale
 
@@ -18,7 +18,9 @@ You probably only need `nanocobs` for things like inter-chip communications prot
 
 There are a few out there, but I haven't seen any that optionally encode in-place. This can be handy if you're memory-constrained and would enjoy CPU + RAM optimizations that come from using small frames. Also, the cost of in-place decoding is only as expensive as the number of zeroes in your payload; exploiting that if you're designing your own protocols can make decoding very fast.
 
-Also I didn't see as many unit tests as I'd have liked in the other libraries, especially around invalid payload handling. Framing protocols make for lovely attack surfaces, and malicious COBS frames can easily instruct decoders to jump outside of the frame itself.
+None of the other COBS implementations I saw supported incremental encoding. It's often the case in communication stacks that a layer above the link provides a tightly-sized payload buffer, and the link has to encode both a header _and_ this payload into a single frame. That requires an extra buffer for assembling which then immediately gets encoded into yet another buffer. With incremental encoding, a header structure can be created on the stack and encoded into the target, then the payload can follow into the same target.
+
+Finally, I didn't see as many unit tests as I'd have liked in the other libraries, especially around invalid payload handling. Framing protocols make for lovely attack surfaces, and malicious COBS frames can easily instruct decoders to jump outside of the frame itself.
 
 ## Metrics
 
@@ -27,10 +29,14 @@ It's pretty small.
 ❯ arm-none-eabi-gcc -mthumb -mcpu=cortex-m4 -Os -c cobs.c
 ❯ arm-none-eabi-nm --print-size --size-sort cobs.o
 
-00000048 00000032 T cobs_decode_inplace  (50 bytes)
-00000000 00000048 T cobs_encode_inplace  (72 bytes)
-00000092 0000007a T cobs_encode          (122 bytes)
-0000010c 0000008c T cobs_decode          (140 bytes)
+0000011c 0000001e T cobs_encode_inc_end    (30 bytes)
+0000007a 00000022 T cobs_encode_inc_begin  (34 bytes)
+00000048 00000032 T cobs_decode_inplace    (50 bytes)
+0000013a 00000034 T cobs_encode            (52 bytes)
+00000000 00000048 T cobs_encode_inplace    (72 bytes)
+0000009c 00000080 T cobs_encode_inc        (128 bytes)
+0000016e 00000090 T cobs_decode            (144 bytes)
+Total 1fe (510 bytes)
 ```
 
 ## Usage
@@ -56,6 +62,10 @@ if (result == COBS_RET_SUCCESS) {
 }
 ```
 
+### Incremental Encoding
+
+TODO: write this
+
 ### Encoding In-Place
 
 The COBS protocol requires an extra byte at the beginning and end of the payload. If encoding and decoding in-place, it becomes your responsibility to reserve these extra bytes. It's easy to mess this up and just put your own data at byte 0, but your data must start at byte 1. For safety and sanity, `cobs_encode_inplace` will error with `COBS_RET_ERR_BAD_PAYLOAD` if the first and last bytes aren't explicitly set to the sentinel value. You have to put them there.

diff --git a/cobs.c b/cobs.c
@@ -1,8 +1,10 @@
 #include "cobs.h"
 
 #define COBS_ISV COBS_INPLACE_SENTINEL_VALUE
+
 typedef unsigned char cobs_byte_t;
 
+
 cobs_ret_t cobs_encode_inplace(void *buf, unsigned len) {
   if (!buf || (len < 2)) { return COBS_RET_ERR_BAD_ARG; }
 
@@ -13,7 +15,7 @@ cobs_ret_t cobs_encode_inplace(void *buf, unsigned len) {
 
   unsigned patch = 0, cur = 1;
   while (cur < len - 1) {
-    if (src[cur] == COBS_FRAME_DELIMETER) {
+    if (src[cur] == COBS_FRAME_DELIMITER) {
       unsigned const ofs = cur - patch;
       if (ofs > 255) { return COBS_RET_ERR_BAD_PAYLOAD; }
       src[patch] = (cobs_byte_t)ofs;
@@ -28,12 +30,13 @@ cobs_ret_t cobs_encode_inplace(void *buf, unsigned len) {
   return COBS_RET_SUCCESS;
 }
 
+
 cobs_ret_t cobs_decode_inplace(void *buf, unsigned const len) {
   if (!buf || (len < 2)) { return COBS_RET_ERR_BAD_ARG; }
 
   cobs_byte_t *const src = (cobs_byte_t *)buf;
   unsigned ofs, cur = 0;
-  while ((ofs = src[cur]) != COBS_FRAME_DELIMETER) {
+  while ((ofs = src[cur]) != COBS_FRAME_DELIMITER) {
     src[cur] = 0;
     cur += ofs;
     if (cur > len) { return COBS_RET_ERR_BAD_PAYLOAD; }
@@ -51,14 +54,55 @@ cobs_ret_t cobs_encode(void const *dec,
                        void *out_enc,
                        unsigned enc_max,
                        unsigned *out_enc_len) {
-  if (!dec || !out_enc || !out_enc_len) { return COBS_RET_ERR_BAD_ARG; }
-  if ((enc_max < 2) || (enc_max < dec_len)) { return COBS_RET_ERR_BAD_ARG; }
+  if (!out_enc_len) { return COBS_RET_ERR_BAD_ARG; }
+
+  cobs_enc_ctx_t ctx;
+  cobs_ret_t r;
+  r = cobs_encode_inc_begin(out_enc, enc_max, &ctx);
+  if (r != COBS_RET_SUCCESS) { return r; }
+  r = cobs_encode_inc(&ctx, dec, dec_len);
+  if (r != COBS_RET_SUCCESS) { return r; }
+  r = cobs_encode_inc_end(&ctx, out_enc_len);
+  return r;
+}
+
+
+cobs_ret_t cobs_encode_inc_begin(void *out_enc,
+                                 unsigned enc_max,
+                                 cobs_enc_ctx_t *out_ctx) {
+  if (!out_enc || !out_ctx) { return COBS_RET_ERR_BAD_ARG; }
+  if (enc_max < 2) { return COBS_RET_ERR_BAD_ARG; }
+
+  out_ctx->dst = out_enc;
+  out_ctx->dst_max = enc_max;
+  out_ctx->cur = 1;
+  out_ctx->code = 1;
+  out_ctx->code_idx = 0;
+  out_ctx->need_advance = 0;
+  return COBS_RET_SUCCESS;
+}
+
+
+cobs_ret_t cobs_encode_inc(cobs_enc_ctx_t *ctx,
+                           void const *dec,
+                           unsigned dec_len) {
+  if (!ctx || !dec) { return COBS_RET_ERR_BAD_ARG; }
+  unsigned dst_idx = ctx->cur;
+  unsigned const enc_max = ctx->dst_max;
+  if ((enc_max - dst_idx) < dec_len) { return COBS_RET_ERR_EXHAUSTED; }
+
+  unsigned dst_code_idx = ctx->code_idx;
+  unsigned code = ctx->code;
+  int need_advance = ctx->need_advance;
 
   cobs_byte_t const *const src = (cobs_byte_t const *)dec;
-  cobs_byte_t *const dst = (cobs_byte_t *)out_enc;
+  cobs_byte_t *const dst = (cobs_byte_t *)ctx->dst;
+  unsigned src_idx = 0;
 
-  unsigned src_idx = 0, dst_code_idx = 0, dst_idx = 1;
-  cobs_byte_t code = 1;
+  if (need_advance) {
+    if (++dst_idx >= enc_max) { return COBS_RET_ERR_EXHAUSTED; }
+    need_advance = 0;
+  }
 
   while (dec_len--) {
     cobs_byte_t const byte = src[src_idx];
@@ -69,23 +113,39 @@ cobs_ret_t cobs_encode(void const *dec,
     }
 
     if ((byte == 0) || (code == 0xFF)) {
-      dst[dst_code_idx] = code;
+      dst[dst_code_idx] = (cobs_byte_t)code;
       dst_code_idx = dst_idx;
       code = 1;
 
       if ((byte == 0) || dec_len) {
         if (++dst_idx >= enc_max) { return COBS_RET_ERR_EXHAUSTED; }
+      } else {
+        need_advance = !dec_len;
       }
     }
     ++src_idx;
   }
 
-  dst[dst_code_idx] = code;
-  dst[dst_idx++] = COBS_FRAME_DELIMETER;
-  *out_enc_len = dst_idx;
+  ctx->cur = dst_idx;
+  ctx->code = code;
+  ctx->code_idx = dst_code_idx;
+  ctx->need_advance = need_advance;
   return COBS_RET_SUCCESS;
 }
 
+
+cobs_ret_t cobs_encode_inc_end(cobs_enc_ctx_t *ctx, unsigned *out_enc_len) {
+  if (!ctx || !out_enc_len) { return COBS_RET_ERR_BAD_ARG; }
+
+  cobs_byte_t *const dst = (cobs_byte_t *)ctx->dst;
+  unsigned cur = ctx->cur;
+  dst[ctx->code_idx] = (cobs_byte_t)ctx->code;
+  dst[cur++] = COBS_FRAME_DELIMITER;
+  *out_enc_len = cur;
+  return COBS_RET_SUCCESS;
+}
+
+
 cobs_ret_t cobs_decode(void const *enc,
                        unsigned enc_len,
                        void *out_dec,
@@ -97,7 +157,7 @@ cobs_ret_t cobs_decode(void const *enc,
   cobs_byte_t const *const src = (cobs_byte_t const *)enc;
   cobs_byte_t *const dst = (cobs_byte_t *)out_dec;
 
-  if ((src[0] == COBS_FRAME_DELIMETER) || (src[enc_len - 1] != COBS_FRAME_DELIMETER)) {
+  if ((src[0] == COBS_FRAME_DELIMITER) || (src[enc_len - 1] != COBS_FRAME_DELIMITER)) {
     return COBS_RET_ERR_BAD_PAYLOAD;
   }
 

diff --git a/cobs.h b/cobs.h
@@ -7,11 +7,12 @@ typedef enum {
   COBS_RET_ERR_EXHAUSTED
 } cobs_ret_t;
 
+
 enum {
   // All COBS frames end with this value. If you're scanning a data source
   // for frame delimiters, the presence of this zero byte indicates the
   // completion of a frame.
-  COBS_FRAME_DELIMETER = 0x00,
+  COBS_FRAME_DELIMITER = 0x00,
 
   // In-place encoding mandatory placeholder byte values.
   COBS_INPLACE_SENTINEL_VALUE = 0x5A,
@@ -82,6 +83,26 @@ cobs_ret_t cobs_encode_inplace(void *buf, unsigned len);
 cobs_ret_t cobs_decode_inplace(void *buf, unsigned len);
 
 
+// cobs_decode
+//
+// Decode |enc_len| encoded bytes from |enc| into |out_dec|, storing the decoded
+// length in |out_dec_len|. Returns COBS_RET_SUCCESS on successful decoding.
+//
+// If any of the input pointers are null, or if any of the lengths are invalid,
+// the function will fail with COBS_RET_ERR_BAD_ARG.
+//
+// If |enc| starts with a 0 byte, or does not end with a 0 byte, the function
+// will fail with COBS_RET_ERR_BAD_PAYLOAD.
+//
+// If the decoding exceeds |dec_max| bytes, the function will fail with
+// COBS_RET_ERR_EXHAUSTED.
+cobs_ret_t cobs_decode(void const *enc,
+                       unsigned enc_len,
+                       void *out_dec,
+                       unsigned dec_max,
+                       unsigned *out_dec_len);
+
+
 // cobs_encode
 //
 // Encode |dec_len| decoded bytes from |dec| into |out_enc|, storing the encoded
@@ -99,24 +120,67 @@ cobs_ret_t cobs_encode(void const *dec,
                        unsigned *out_enc_len);
 
 
-// cobs_decode
+// Incremental encoding API
+
+typedef struct cobs_enc_ctx {
+  void *dst;
+  unsigned dst_max;
+  unsigned cur;
+  unsigned code_idx;
+  unsigned code;
+  int need_advance;
+} cobs_enc_ctx_t;
+
+
+// cobs_encode_inc_begin
 //
-// Decode |enc_len| encoded bytes from |enc| into |out_dec|, storing the decoded
-// length in |out_dec_len|. Returns COBS_RET_SUCCESS on successful decoding.
+// Begin an incremental encoding of data into |out_enc|. The intermediate
+// encoding state is stored in |out_ctx|, which can then be passed into
+// calls to cobs_encode_inc. Returns COBS_RET_SUCCESS if |out_ctx| can be
+// used in future calls to cobs_encode_inc.
 //
-// If any of the input pointers are null, or if any of the lengths are invalid,
-// the function will fail with COBS_RET_ERR_BAD_ARG.
+// If |out_enc| or |out_ctx| are null, or if |enc_max| is not large enough to
+// hold the smallest possible encoding, the function will return
+// COBS_RET_ERR_BAD_ARG.
+cobs_ret_t cobs_encode_inc_begin(void *out_enc,
+                                 unsigned enc_max,
+                                 cobs_enc_ctx_t *out_ctx);
+
+
+// cobs_encode_inc
 //
-// If |enc| starts with a 0 byte, or does not end with a 0 byte, the function
-// will fail with COBS_RET_ERR_BAD_PAYLOAD.
+// Continue an encoding in progress with the new |dec| buffer of length |dec_len|.
+// Encodes |dec_len| decoded bytes from |dec| into the buffer that |ctx| was
+// initialized with in cobs_encode_inc_begin.
 //
-// If the decoding exceeds |dec_max| bytes, the function will fail with
-// COBS_RET_ERR_EXHAUSTED.
-cobs_ret_t cobs_decode(void const *enc,
-                       unsigned enc_len,
-                       void *out_dec,
-                       unsigned dec_max,
-                       unsigned *out_dec_len);
+// If any of the input pointers are null, or |dec_len| is zero, the function
+// will fail with COBS_RET_ERR_BAD_ARG.
+//
+// If the contents pointed to by |dec| can not be encoded in the remaining
+// available buffer space, the function returns COBS_RET_ERR_EXHAUSTED. In
+// this case, |ctx| remains unchanged and incremental encoding can be attempted
+// again with different data, or finished with cobs_encode_inc_end.
+//
+// If the contents of |dec| are successfully encoded, the function returns
+// COBS_RET_SUCCESS.
+cobs_ret_t cobs_encode_inc(cobs_enc_ctx_t *ctx,
+                           void const *dec,
+                           unsigned dec_len);
+
+
+// cobs_encode_inc_end
+//
+// Finish an incremental encoding by writing the final code and delimiter.
+// Returns COBS_RET_SUCCESS on success, and no further calls to
+// cobs_encode_inc or cobs_encode_inc_end can be safely made until |ctx|
+// is re-initialized via a new call to cobs_encode_inc_begin.
+//
+// The final encoded length is written to |out_enc_len|, and the buffer
+// passed to cobs_encode_inc_begin holds the full COBS-encoded frame.
+//
+// If null pointers are provided, the function returns COBS_RET_ERR_BAD_ARG.
+cobs_ret_t cobs_encode_inc_end(cobs_enc_ctx_t *ctx, unsigned *out_enc_len);
+
 
 #ifdef __cplusplus
 }

diff --git a/make-win.bat b/make-win.bat
@@ -1,10 +1,11 @@
 cl.exe /W4 /WX /EHsc ^
     cobs.c ^
     tests/test_cobs_decode.cc ^
+    tests/test_cobs_decode_inplace.cc ^
     tests/test_cobs_encode_max.cc ^
     tests/test_cobs_encode.cc ^
+    tests/test_cobs_encode_inc.cc ^
     tests/test_cobs_encode_inplace.cc ^
-    tests/test_cobs_decode_inplace.cc ^
     tests/test_paper_figures.cc ^
     tests/test_wikipedia.cc ^
     tests/unittest_main.cc ^

diff --git a/tests/test_cobs_encode.cc b/tests/test_cobs_encode.cc
@@ -21,8 +21,8 @@ TEST_CASE("Encoding validation") {
   SUBCASE("Invalid enc_max") {
     REQUIRE(cobs_encode(dec, dec_n, enc, 0, &enc_len) == COBS_RET_ERR_BAD_ARG);
     REQUIRE(cobs_encode(dec, dec_n, enc, 1, &enc_len) == COBS_RET_ERR_BAD_ARG);
-    REQUIRE(cobs_encode(dec, dec_n, enc, dec_n - 2, &enc_len) == COBS_RET_ERR_BAD_ARG);
-    REQUIRE(cobs_encode(dec, dec_n, enc, dec_n - 1, &enc_len) == COBS_RET_ERR_BAD_ARG);
+    REQUIRE(cobs_encode(dec, dec_n, enc, dec_n - 2, &enc_len) == COBS_RET_ERR_EXHAUSTED);
+    REQUIRE(cobs_encode(dec, dec_n, enc, dec_n - 1, &enc_len) == COBS_RET_ERR_EXHAUSTED);
   }
 }