Skip to content
This repository has been archived by the owner on Oct 15, 2020. It is now read-only.

Commit

Permalink
[Merge chakra-core/ChakraCore@f75c14a5d2] [MERGE #3560 @Cellule] WASM…
Browse files Browse the repository at this point in the history
…: Spec fixes + improve debugging

Merge pull request #3560 from Cellule:wasm/misc

Fix the remaining spec test failures.
Improve debugging output with `-trace:wasmbytecode` and improve some error messages.

Bugs fixed:
- Disallow invalid utf8 characters in wasm strings
- Disallow `tee_local` of unreachable values
- Must have an `else` block if the `if` is yielding a value
- All the targets of `br_table` must have the same signature
- If `br_if` is yielding `any` type, then push the block's result type to the stack instead. ie: the any type now has an explicit type for further type checking
  • Loading branch information
chakrabot committed Aug 24, 2017
1 parent a2c870c commit 02a78ae
Show file tree
Hide file tree
Showing 13 changed files with 234 additions and 654 deletions.
34 changes: 23 additions & 11 deletions deps/chakrashim/core/lib/Common/Codex/Utf8Codex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,26 @@ namespace utf8
return ((0x5B >> (((prefix ^ 0xF0) >> 3) & 0x1E)) & 0x03) + 1;
}

const char16 g_chUnknown = char16(UNICODE_UNKNOWN_CHAR_MARK);
const char16 WCH_UTF16_HIGH_FIRST = char16(0xd800);
const char16 WCH_UTF16_HIGH_LAST = char16(0xdbff);
const char16 WCH_UTF16_LOW_FIRST = char16(0xdc00);
const char16 WCH_UTF16_LOW_LAST = char16(0xdfff);

char16 GetUnknownCharacter(DecodeOptions options = doDefault)
{
if ((options & doThrowOnInvalidWCHARs) != 0)
{
throw InvalidWideCharException();
}
return char16(UNICODE_UNKNOWN_CHAR_MARK);
}

inline BOOL InRange(const char16 ch, const char16 chMin, const char16 chMax)
{
return (unsigned)(ch - chMin) <= (unsigned)(chMax - chMin);
}

inline BOOL IsValidWideChar(const char16 ch)
BOOL IsValidWideChar(char16 ch)
{
return (ch < 0xfdd0) || ((ch > 0xfdef) && (ch <= 0xffef)) || ((ch >= 0xfff9) && (ch <= 0xfffd));
}
Expand Down Expand Up @@ -122,7 +130,7 @@ namespace utf8
}

// 10xxxxxx (trail byte appearing in a lead byte position
return g_chUnknown;
return GetUnknownCharacter(options);

case 2:
// Look for an overlong utf-8 sequence.
Expand All @@ -138,7 +146,7 @@ namespace utf8
*chunkEndsAtTruncatedSequence = true;
}
}
return g_chUnknown;
return GetUnknownCharacter(options);
}
c2 = *ptr++;
// 110XXXXx 10xxxxxx
Expand All @@ -152,12 +160,14 @@ namespace utf8
ch |= WCHAR(c1 & 0x1f) << 6; // 0x0080 - 0x07ff
ch |= WCHAR(c2 & 0x3f);
if (!IsValidWideChar(ch) && ((options & doAllowInvalidWCHARs) == 0))
ch = g_chUnknown;
{
ch = GetUnknownCharacter(options);
}
}
else
{
ptr--;
ch = g_chUnknown;
ch = GetUnknownCharacter(options);
}
break;

Expand All @@ -177,7 +187,7 @@ namespace utf8
}
}

return g_chUnknown;
return GetUnknownCharacter(options);
}

// UTF16 | UTF8 1st byte 2nd byte 3rd byte
Expand Down Expand Up @@ -217,12 +227,14 @@ namespace utf8
ch |= WCHAR(c2 & 0x3f) << 6; // 0x0080 - 0x07ff
ch |= WCHAR(c3 & 0x3f);
if (!IsValidWideChar(ch) && ((options & (doAllowThreeByteSurrogates | doAllowInvalidWCHARs)) == 0))
ch = g_chUnknown;
{
ch = GetUnknownCharacter(options);
}
ptr += 2;
}
else
{
ch = g_chUnknown;
ch = GetUnknownCharacter(options);
// Windows OS 1713952. Only drop the illegal leading byte
// Retry next byte.
// ptr is already advanced.
Expand All @@ -246,7 +258,7 @@ namespace utf8
}
}

ch = g_chUnknown;
ch = GetUnknownCharacter(options);
break;
}

Expand Down Expand Up @@ -281,7 +293,7 @@ namespace utf8
// Windows OS 1713952. Only drop the illegal leading byte.
// Retry next byte.
// ptr is already advanced 1.
ch = g_chUnknown;
ch = GetUnknownCharacter(options);
break;
}

Expand Down
4 changes: 4 additions & 0 deletions deps/chakrashim/core/lib/Common/Codex/Utf8Codex.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ typedef const utf8char_t *LPCUTF8;

namespace utf8
{
class InvalidWideCharException {};

// Terminology -
// Code point - A ordinal value mapped to a standard ideograph as defined by ISO/IEC 10646-1. Here
Expand Down Expand Up @@ -138,9 +139,12 @@ namespace utf8
// surrogate pair. The second call will return the second word and reset
// this 'option'.
doAllowInvalidWCHARs = 0x08, // Don't replace invalid wide chars with 0xFFFD
doThrowOnInvalidWCHARs = 0x10, // throw InvalidWideCharException if an invalid wide char is seen. Incompatible with doAllowInvalidWCHARs
};
DEFINE_ENUM_FLAG_OPERATORS(DecodeOptions);

BOOL IsValidWideChar(char16 ch);

// Decode the trail bytes after the UTF8 lead byte c1 but returning 0xFFFD if trail bytes are expected after end.
_At_(ptr, _In_reads_(end - ptr) _Post_satisfies_(ptr >= _Old_(ptr) - 1 && ptr <= end))
char16 DecodeTail(char16 c1, LPCUTF8& ptr, LPCUTF8 end, DecodeOptions& options, bool *chunkEndsAtTruncatedSequence = nullptr);
Expand Down
53 changes: 24 additions & 29 deletions deps/chakrashim/core/lib/WasmReader/WasmBinaryReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,16 @@ uint32 GetTypeByteSize(WasmType type)

const char16 * GetTypeName(WasmType type)
{
const char16* typestring = _u("unknown");
switch (type) {
case WasmTypes::WasmType::Void:
typestring = _u("void");
break;
case WasmTypes::WasmType::I32:
typestring = _u("i32");
break;
case WasmTypes::WasmType::I64:
typestring = _u("i64");
break;
case WasmTypes::WasmType::F32:
typestring = _u("f32");
break;
case WasmTypes::WasmType::F64:
typestring = _u("f64");
break;
default:
Assert(false);
break;
}
return typestring;
case WasmTypes::WasmType::Void: return _u("void");
case WasmTypes::WasmType::I32: return _u("i32");
case WasmTypes::WasmType::I64: return _u("i64");
case WasmTypes::WasmType::F32: return _u("f32");
case WasmTypes::WasmType::F64: return _u("f64");
case WasmTypes::WasmType::Any: return _u("any");
default: Assert(UNREACHED); break;
}
return _u("unknown");
}

} // namespace WasmTypes
Expand Down Expand Up @@ -1024,16 +1012,23 @@ const char16* WasmBinaryReader::ReadInlineName(uint32& length, uint32& nameLengt
m_pc += rawNameLength;
length += rawNameLength;

utf8::DecodeOptions decodeOptions = utf8::doDefault;
nameLength = (uint32)utf8::ByteIndexIntoCharacterIndex(rawName, rawNameLength, decodeOptions);
char16* contents = AnewArray(m_alloc, char16, nameLength + 1);
size_t decodedLength = utf8::DecodeUnitsIntoAndNullTerminate(contents, rawName, rawName + rawNameLength, decodeOptions);
if (decodedLength != nameLength)
utf8::DecodeOptions decodeOptions = utf8::doThrowOnInvalidWCHARs;
try
{
nameLength = (uint32)utf8::ByteIndexIntoCharacterIndex(rawName, rawNameLength, decodeOptions);
char16* contents = AnewArray(m_alloc, char16, nameLength + 1);
size_t decodedLength = utf8::DecodeUnitsIntoAndNullTerminate(contents, rawName, rawName + rawNameLength, decodeOptions);
if (decodedLength != nameLength)
{
AssertMsg(UNREACHED, "We calculated the length before decoding, what happened ?");
ThrowDecodingError(_u("Error while decoding utf8 string"));
}
return contents;
}
catch (utf8::InvalidWideCharException)
{
AssertMsg(UNREACHED, "We calculated the length before decoding, what happened ?");
ThrowDecodingError(_u("Error while decoding utf8 string"));
ThrowDecodingError(_u("Invalid UTF-8 encoding"));
}
return contents;
}

void WasmBinaryReader::ReadImportSection()
Expand Down
Loading

0 comments on commit 02a78ae

Please sign in to comment.