[Merge chakra-core/ChakraCore@f75c14a5d2] [MERGE #3560 @Cellule] WASM…

…: Spec fixes + improve debugging Merge pull request #3560 from Cellule:wasm/misc Fix the remaining spec test failures. Improve debugging output with `-trace:wasmbytecode` and improve some error messages. Bugs fixed: - Disallow invalid utf8 characters in wasm strings - Disallow `tee_local` of unreachable values - Must have an `else` block if the `if` is yielding a value - All the targets of `br_table` must have the same signature - If `br_if` is yielding `any` type, then push the block's result type to the stack instead. ie: the any type now has an explicit type for further type checking
nodejs · Aug 24, 2017 · 02a78ae · 02a78ae
1 parent a2c870c
commit 02a78ae
Show file tree

Hide file tree

Showing 13 changed files with 234 additions and 654 deletions.
diff --git a/deps/chakrashim/core/lib/Common/Codex/Utf8Codex.cpp b/deps/chakrashim/core/lib/Common/Codex/Utf8Codex.cpp
@@ -70,18 +70,26 @@ namespace utf8
         return ((0x5B >> (((prefix ^ 0xF0) >> 3) & 0x1E)) & 0x03) + 1;
     }
 
-    const char16 g_chUnknown = char16(UNICODE_UNKNOWN_CHAR_MARK);
     const char16 WCH_UTF16_HIGH_FIRST  =  char16(0xd800);
     const char16 WCH_UTF16_HIGH_LAST   =  char16(0xdbff);
     const char16 WCH_UTF16_LOW_FIRST   =  char16(0xdc00);
     const char16 WCH_UTF16_LOW_LAST    =  char16(0xdfff);
 
+    char16 GetUnknownCharacter(DecodeOptions options = doDefault)
+    {
+        if ((options & doThrowOnInvalidWCHARs) != 0)
+        {
+            throw InvalidWideCharException();
+        }
+        return char16(UNICODE_UNKNOWN_CHAR_MARK);
+    }
+
     inline BOOL InRange(const char16 ch, const char16 chMin, const char16 chMax)
     {
         return (unsigned)(ch - chMin) <= (unsigned)(chMax - chMin);
     }
 
-    inline BOOL IsValidWideChar(const char16 ch)
+    BOOL IsValidWideChar(char16 ch)
     {
         return (ch < 0xfdd0) || ((ch > 0xfdef) && (ch <= 0xffef)) || ((ch >= 0xfff9) && (ch <= 0xfffd));
     }
@@ -122,7 +130,7 @@ namespace utf8
             }
 
             // 10xxxxxx (trail byte appearing in a lead byte position
-            return g_chUnknown;
+            return GetUnknownCharacter(options);
 
         case 2:
             // Look for an overlong utf-8 sequence.
@@ -138,7 +146,7 @@ namespace utf8
                         *chunkEndsAtTruncatedSequence = true;
                     }
                 }
-                return g_chUnknown;
+                return GetUnknownCharacter(options);
             }
             c2 = *ptr++;
             // 110XXXXx 10xxxxxx
@@ -152,12 +160,14 @@ namespace utf8
                 ch |= WCHAR(c1 & 0x1f) << 6;     // 0x0080 - 0x07ff
                 ch |= WCHAR(c2 & 0x3f);
                 if (!IsValidWideChar(ch) && ((options & doAllowInvalidWCHARs) == 0))
-                    ch = g_chUnknown;
+                {
+                    ch = GetUnknownCharacter(options);
+                }
             }
             else
             {
                 ptr--;
-                ch = g_chUnknown;
+                ch = GetUnknownCharacter(options);
             }
             break;
 
@@ -177,7 +187,7 @@ namespace utf8
                     }
                 }
 
-                return g_chUnknown;
+                return GetUnknownCharacter(options);
             }
 
             //      UTF16       |   UTF8 1st byte  2nd byte 3rd byte
@@ -217,12 +227,14 @@ namespace utf8
                 ch |= WCHAR(c2 & 0x3f) << 6;     // 0x0080 - 0x07ff
                 ch |= WCHAR(c3 & 0x3f);
                 if (!IsValidWideChar(ch) && ((options & (doAllowThreeByteSurrogates | doAllowInvalidWCHARs)) == 0))
-                    ch = g_chUnknown;
+                {
+                    ch = GetUnknownCharacter(options);
+                }
                 ptr += 2;
             }
             else
             {
-                ch = g_chUnknown;
+                ch = GetUnknownCharacter(options);
                 // Windows OS 1713952. Only drop the illegal leading byte
                 // Retry next byte.
                 // ptr is already advanced.
@@ -246,7 +258,7 @@ namespace utf8
                     }
                 }
 
-                ch = g_chUnknown;
+                ch = GetUnknownCharacter(options);
                 break;
             }
 
@@ -281,7 +293,7 @@ namespace utf8
                 // Windows OS 1713952. Only drop the illegal leading byte.
                 // Retry next byte.
                 // ptr is already advanced 1.
-                ch = g_chUnknown;
+                ch = GetUnknownCharacter(options);
                 break;
             }
 

diff --git a/deps/chakrashim/core/lib/Common/Codex/Utf8Codex.h b/deps/chakrashim/core/lib/Common/Codex/Utf8Codex.h
@@ -95,6 +95,7 @@ typedef const utf8char_t *LPCUTF8;
 
 namespace utf8
 {
+    class InvalidWideCharException {};
 
     // Terminology -
     //   Code point      - A ordinal value mapped to a standard ideograph as defined by ISO/IEC 10646-1. Here
@@ -138,9 +139,12 @@ namespace utf8
                                             // surrogate pair. The second call will return the second word and reset
                                             // this 'option'.
         doAllowInvalidWCHARs        = 0x08, // Don't replace invalid wide chars with 0xFFFD
+        doThrowOnInvalidWCHARs      = 0x10, // throw InvalidWideCharException if an invalid wide char is seen. Incompatible with doAllowInvalidWCHARs
     };
     DEFINE_ENUM_FLAG_OPERATORS(DecodeOptions);
 
+    BOOL IsValidWideChar(char16 ch);
+
     // Decode the trail bytes after the UTF8 lead byte c1 but returning 0xFFFD if trail bytes are expected after end.
     _At_(ptr, _In_reads_(end - ptr) _Post_satisfies_(ptr >= _Old_(ptr) - 1 && ptr <= end))
     char16 DecodeTail(char16 c1, LPCUTF8& ptr, LPCUTF8 end, DecodeOptions& options, bool *chunkEndsAtTruncatedSequence = nullptr);

diff --git a/deps/chakrashim/core/lib/WasmReader/WasmBinaryReader.cpp b/deps/chakrashim/core/lib/WasmReader/WasmBinaryReader.cpp
@@ -38,28 +38,16 @@ uint32 GetTypeByteSize(WasmType type)
 
 const char16 * GetTypeName(WasmType type)
 {
-    const char16* typestring = _u("unknown");
     switch (type) {
-    case WasmTypes::WasmType::Void:
-        typestring = _u("void");
-        break;
-    case WasmTypes::WasmType::I32:
-        typestring = _u("i32");
-        break;
-    case WasmTypes::WasmType::I64:
-        typestring = _u("i64");
-        break;
-    case WasmTypes::WasmType::F32:
-        typestring = _u("f32");
-        break;
-    case WasmTypes::WasmType::F64:
-        typestring = _u("f64");
-        break;
-    default:
-        Assert(false);
-        break;
-    }
-    return typestring;
+    case WasmTypes::WasmType::Void: return _u("void");
+    case WasmTypes::WasmType::I32: return _u("i32");
+    case WasmTypes::WasmType::I64: return _u("i64");
+    case WasmTypes::WasmType::F32: return _u("f32");
+    case WasmTypes::WasmType::F64: return _u("f64");
+    case WasmTypes::WasmType::Any: return _u("any");
+    default: Assert(UNREACHED); break;
+    }
+    return _u("unknown");
 }
 
 } // namespace WasmTypes
@@ -1024,16 +1012,23 @@ const char16* WasmBinaryReader::ReadInlineName(uint32& length, uint32& nameLengt
     m_pc += rawNameLength;
     length += rawNameLength;
 
-    utf8::DecodeOptions decodeOptions = utf8::doDefault;
-    nameLength = (uint32)utf8::ByteIndexIntoCharacterIndex(rawName, rawNameLength, decodeOptions);
-    char16* contents = AnewArray(m_alloc, char16, nameLength + 1);
-    size_t decodedLength = utf8::DecodeUnitsIntoAndNullTerminate(contents, rawName, rawName + rawNameLength, decodeOptions);
-    if (decodedLength != nameLength)
+    utf8::DecodeOptions decodeOptions = utf8::doThrowOnInvalidWCHARs;
+    try
+    {
+        nameLength = (uint32)utf8::ByteIndexIntoCharacterIndex(rawName, rawNameLength, decodeOptions);
+        char16* contents = AnewArray(m_alloc, char16, nameLength + 1);
+        size_t decodedLength = utf8::DecodeUnitsIntoAndNullTerminate(contents, rawName, rawName + rawNameLength, decodeOptions);
+        if (decodedLength != nameLength)
+        {
+            AssertMsg(UNREACHED, "We calculated the length before decoding, what happened ?");
+            ThrowDecodingError(_u("Error while decoding utf8 string"));
+        }
+        return contents;
+    }
+    catch (utf8::InvalidWideCharException)
     {
-        AssertMsg(UNREACHED, "We calculated the length before decoding, what happened ?");
-        ThrowDecodingError(_u("Error while decoding utf8 string"));
+        ThrowDecodingError(_u("Invalid UTF-8 encoding"));
     }
-    return contents;
 }
 
 void WasmBinaryReader::ReadImportSection()