Comments fix (#39)

hjson · Jan 12, 2023 · d553e65 · d553e65
1 parent 0c40199
commit d553e65
Show file tree

Hide file tree

Showing 58 changed files with 1,058 additions and 29 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -9,20 +9,20 @@ jobs:
       matrix:
         include:
           - name: linux-prev-release
-            os: ubuntu-18.04
+            os: ubuntu-20.04
             env:
               BUILD_TYPE: Release
           - name: linux-latest-debug
-            os: ubuntu-20.04
+            os: ubuntu-latest
             env:
               BUILD_TYPE: Debug
               HJSON_CXX_FLAGS: "-g -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer -fstack-protector-all -fsanitize=undefined -fno-sanitize-recover"
           - name: linux-latest-release
-            os: ubuntu-20.04
+            os: ubuntu-latest
             env:
               BUILD_TYPE: Release
           - name: linux-strtod
-            os: ubuntu-20.04
+            os: ubuntu-latest
             env:
               BUILD_TYPE: Release
               HJSON_NUMBER_PARSER: StrToD
@@ -48,6 +48,6 @@ jobs:
               HJSON_NUMBER_PARSER: CharConv
     env: ${{ matrix.env }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DHJSON_ENABLE_TEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DHJSON_NUMBER_PARSER=${HJSON_NUMBER_PARSER} -DCMAKE_CXX_FLAGS="${HJSON_CXX_FLAGS}" .. && cmake --build . --target runtest
         shell: bash
diff --git a/src/hjson_decode.cpp b/src/hjson_decode.cpp
@@ -46,6 +46,8 @@ static inline void _setComment(Value& val, void (Value::*fp)(const std::string&)
   if (ciA.hasComment && ciB.hasComment) {
     (val.*fp)(std::string(p->data + ciA.cmStart, p->data + ciA.cmEnd) +
       std::string(p->data + ciB.cmStart, p->data + ciB.cmEnd));
+  } else if (!ciA.hasComment && !ciB.hasComment) {
+    (val.*fp)("");
   } else {
     _setComment(val, fp, p, ciA);
     _setComment(val, fp, p, ciB);
@@ -343,7 +345,7 @@ static std::string _readKeyname(Parser *p) {
 
 static CommentInfo _white(Parser *p) {
   CommentInfo ci = {
-    p->opt.whitespaceAsComments,
+    false,
     p->indexNext - 1,
     0
   };
@@ -382,6 +384,9 @@ static CommentInfo _white(Parser *p) {
   // cmEnd is the first char after the comment (i.e. not included in the comment).
   ci.cmEnd = p->indexNext - 1;
 
+  ci.hasComment = (ci.hasComment || (p->opt.whitespaceAsComments &&
+    (ci.cmEnd > ci.cmStart)));
+
   return ci;
 }
 
@@ -433,12 +438,12 @@ static CommentInfo _getCommentAfter(Parser *p) {
 
 // Hjson strings can be quoteless
 // returns string, true, false, or null.
-static Value _readTfnns(Parser *p) {
+static Value _readTfnns2(Parser *p, size_t &valEnd) {
   if (_isPunctuatorChar(p->ch)) {
     throw syntax_error(_errAt(p, std::string("Found a punctuator character '") +
       (char)p->ch + std::string("' when expecting a quoteless string (check your syntax)")));
   }
-  size_t valStart = p->indexNext - 1, valEnd = 0;
+  size_t valStart = p->indexNext - 1;
 
   if (std::isspace(p->ch)) {
     ++valStart;
@@ -499,6 +504,16 @@ static Value _readTfnns(Parser *p) {
 }
 
 
+static Value _readTfnns(Parser *p) {
+  size_t valEnd = 0;
+  auto ret = _readTfnns2(p, valEnd);
+  // Make sure that we include whitespace after the value in the after-comment.
+  p->indexNext = valEnd;
+  _next(p);
+  return ret;
+}
+
+
 // Parse an array value.
 // assuming ch == '['
 static Value _readArray(Parser *p) {
@@ -643,11 +658,6 @@ static Value _readValue(Parser *p) {
     break;
   default:
     ret = _readTfnns(p);
-    // Make sure that any comment will include preceding whitespace.
-    if (p->ch == '#' || p->ch == '/') {
-      while (_prev(p) && std::isspace(p->ch)) {}
-      _next(p);
-    }
     break;
   }
 

diff --git a/src/hjson_encode.cpp b/src/hjson_encode.cpp
@@ -294,6 +294,35 @@ static bool _quoteForComment(Encoder *e, const std::string& comment) {
 }
 
 
+// Returns true if we are inside a comment after outputting the string
+// (i.e. the string contains an unterminated line comment).
+// Also returns true for '/* # */' and similar, but that should be uncommon and
+// will only cause an unnecessary line feed after the comment.
+static bool _isInComment(const std::string& comment) {
+  bool endsInsideComment = false;
+  char prev = ' ';
+
+  for (char ch : comment) {
+    switch (ch) {
+    case '\n':
+      endsInsideComment = false;
+      break;
+    case '#':
+      endsInsideComment = true;
+      break;
+    case '/':
+      if (prev == '/') {
+        endsInsideComment = true;
+      }
+      break;
+    }
+    prev = ch;
+  }
+
+  return endsInsideComment;
+}
+
+
 // Produce a string from value.
 static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjElement) {
   const char *separator = ((isObjElement && (!e->opt.comments ||
@@ -359,6 +388,11 @@ static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjEl
           }
 
           if (e->opt.comments && !value[i].get_comment_before().empty()) {
+            if (!e->opt.separator &&
+              value[i].get_comment_before().find("\n") == std::string::npos)
+            {
+              _writeIndent(e, e->indent);
+            }
             *e->os << value[i].get_comment_before();
           } else if (shouldIndent) {
             _writeIndent(e, e->indent);
@@ -369,10 +403,12 @@ static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjEl
           commentAfter = value[i].get_comment_after();
         }
       }
-
       if (e->opt.comments && !commentAfter.empty()) {
         *e->os << commentAfter;
-      } else if (!value.empty()) {
+      }
+      if (!value.empty() && (!e->opt.comments || commentAfter.empty() ||
+        !e->opt.separator && commentAfter.find("\n") == std::string::npos))
+      {
         _writeIndent(e, e->indent - 1);
       }
 
@@ -412,12 +448,21 @@ static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjEl
 
       if (e->opt.comments && !commentAfter.empty()) {
         *e->os << commentAfter;
-      } else if (!value.empty() && (!e->opt.omitRootBraces || !isRootObject)) {
+      }
+      if (!value.empty() && (!e->opt.omitRootBraces || !isRootObject) &&
+        (!e->opt.comments || commentAfter.empty() ||
+        !e->opt.separator && commentAfter.find("\n") == std::string::npos))
+      {
         _writeIndent(e, e->indent - 1);
       }
 
       if (!e->opt.omitRootBraces || !isRootObject || value.empty()) {
         e->indent--;
+        if (isRootObject && e->opt.comments && !commentAfter.empty() &&
+          _isInComment(commentAfter))
+        {
+          _writeIndent(e, e->indent);
+        }
         *e->os << "}";
       }
     }
@@ -461,7 +506,9 @@ static void _objElem(Encoder *e, const std::string& key, const Value& value, boo
     if (e->opt.comments) {
       *e->os << commentAfterPrevObj;
     }
-    if (!hasCommentBefore) {
+    if (!hasCommentBefore || !e->opt.separator &&
+      value.get_comment_before().find("\n") == std::string::npos)
+    {
       _writeIndent(e, e->indent);
     }
   }

diff --git a/test/assets/comments/comments7_result.hjson b/test/assets/comments/comments7_result.hjson
@@ -0,0 +1,8 @@
+{# comment before
+b: /* key comment */ {
+    sub1: 3  # comment after
+    sub2: 4  # comment more after
+  } # cm after obj
+// Comment B4
+a: 2
+/* Last comment */}
diff --git a/test/assets/comments/pass6_result.hjson b/test/assets/comments/pass6_result.hjson
@@ -0,0 +1,8 @@
+{
+  a: 1
+  b: 2
+  c: 3
+  d: /*cm1*/ 4
+   /*cm2 */ e: /* cm3*/ 5 /* cm 4 */ 
+  f: 6
+}
diff --git a/test/assets/comments/strings_result.hjson b/test/assets/comments/strings_result.hjson
@@ -33,6 +33,7 @@
     last line
 
     ''' # trailing lf
+  multiline4: ←→±≠Я
 
   # escapes/no escape
 
@@ -89,5 +90,6 @@
     one: "1"
     two: "2"
     minus: "-3"
+    zero: "0"
   }
 }
diff --git a/test/assets/comments2/comments7_result.hjson b/test/assets/comments2/comments7_result.hjson
@@ -0,0 +1,8 @@
+{# comment before
+b: /* key comment */ {
+    sub1: 3  # comment after
+    sub2: 4  # comment more after
+  } # cm after obj
+// Comment B4
+a: 2
+/* Last comment */}
diff --git a/test/assets/comments2/pass6_result.hjson b/test/assets/comments2/pass6_result.hjson
@@ -0,0 +1,8 @@
+{
+  a: 1
+  b: 2
+  c: 3
+  d: /*cm1*/ 4
+   /*cm2 */ e: /* cm3*/ 5 /* cm 4 */ 
+  f: 6
+}
diff --git a/test/assets/comments2/strings_result.hjson b/test/assets/comments2/strings_result.hjson
@@ -33,6 +33,7 @@
     last line
 
     ''' # trailing lf
+  multiline4: ←→±≠Я
 
   # escapes/no escape
 
@@ -85,5 +86,6 @@
     one: "1"
     two: "2"
     minus: "-3"
+    zero: "0"
   }
 }
diff --git a/test/assets/comments3/charconv/pass5_result.hjson b/test/assets/comments3/charconv/pass5_result.hjson
@@ -0,0 +1,4 @@
+{
+  bigDouble: 9.223372036854776e+58
+  bigInt: 9.223372036854776e+58
+}
diff --git a/test/assets/comments3/charconv/pass5_result.json b/test/assets/comments3/charconv/pass5_result.json
@@ -0,0 +1,4 @@
+{
+  "bigDouble": 9.223372036854776e+58,
+  "bigInt": 9.223372036854776e+58
+}
diff --git a/test/assets/comments3/charset2_result.hjson b/test/assets/comments3/charset2_result.hjson
@@ -0,0 +1,5 @@
+{
+  uescape: "\u0000,\u0001,\uffff"
+  umlaut: äöüßÄÖÜ
+  hex: ģ䕧覫췯ꯍ
+}
diff --git a/test/assets/comments3/charset_result.hjson b/test/assets/comments3/charset_result.hjson
@@ -0,0 +1,6 @@
+{
+  ql-ascii: ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+  js-ascii: ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+  ml-ascii:
+    ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+}
diff --git a/test/assets/comments3/comments2_result.hjson b/test/assets/comments3/comments2_result.hjson
@@ -0,0 +1,3 @@
+// before
+/* before2 */ 3 // after1
+// after2
diff --git a/test/assets/comments3/comments3_result.hjson b/test/assets/comments3/comments3_result.hjson
@@ -0,0 +1,3 @@
+// before
+/* before2 */ a string value // still part of the string
+// after2
diff --git a/test/assets/comments3/comments4_result.hjson b/test/assets/comments3/comments4_result.hjson
@@ -0,0 +1,51 @@
+// before
+/* before2 */ [ #before1
+  /*key1keycm*/a string value // still part of the string
+ /* key2keycm  */   "a string value" // not part of the string
+  // map1before
+  /* map1key */
+  {}//map2after
+ {}
+   {
+      // map3 inner comment
+      }
+   []
+   // map4before
+  /*map4key*/{
+  /* map4inner */
+  } // map4after
+  //map5before
+  /*map5key*/ {
+      //map5ab4
+      val5a: /* map5akey */ 1 // map5aAfter
+      val5b: 2 /* map5bb4comma */  #map5bAfter
+    #map5extra
+  } /* map5after */
+  // vec1bbefore
+  /* vec1bkey */
+  []//vec1bafter
+ []
+   [
+      // vec3 inner comment
+      ]
+   // vec4before
+  /*vec4key*/[
+  /* vec4inner */
+  ] // vec4after
+  //vec5before
+  /*vec5key*/ [
+      //vec5ab4
+      1 // vec5aAfter
+      2 /* vec5bb4comma */  #vec5bAfter
+    #vec5extra
+  ] /* map5after */
+  // before3
+
+  3 # after3
+  # before4
+  /*before4b*/4/*after4*/
+  #after4b
+]
+// after2
+
+/* after3 */