diff --git a/src/coreclr/debug/daccess/inspect.cpp b/src/coreclr/debug/daccess/inspect.cpp index a7f697ec0b7c2..ec8356cf1f519 100644 --- a/src/coreclr/debug/daccess/inspect.cpp +++ b/src/coreclr/debug/daccess/inspect.cpp @@ -1049,8 +1049,8 @@ ClrDataValue::GetString( { *strLen = static_cast(u16_strlen(msgStr) + 1); } - - status = u16_strcpy_s(str, bufLen, msgStr) != NULL ? S_OK : S_FALSE; + status = StringCchCopy(str, bufLen, msgStr) == S_OK ? + S_OK : S_FALSE; } else { diff --git a/src/coreclr/debug/daccess/stack.cpp b/src/coreclr/debug/daccess/stack.cpp index 43b420fd6e7f6..6b9f1a491c291 100644 --- a/src/coreclr/debug/daccess/stack.cpp +++ b/src/coreclr/debug/daccess/stack.cpp @@ -830,7 +830,7 @@ ClrDataFrame::GetArgumentByIndex( *nameLen = 5; } - u16_strcpy_s(name, bufLen, W("this")); + StringCchCopy(name, bufLen, W("this")); } else { diff --git a/src/coreclr/debug/daccess/stdafx.h b/src/coreclr/debug/daccess/stdafx.h index 14f6882d808ec..bb7b7b2365de5 100644 --- a/src/coreclr/debug/daccess/stdafx.h +++ b/src/coreclr/debug/daccess/stdafx.h @@ -50,6 +50,11 @@ #include "dacimpl.h" +#define STRSAFE_NO_DEPRECATE +#include +#undef _ftcscat +#undef _ftcscpy + // from ntstatus.h #define STATUS_STOWED_EXCEPTION ((NTSTATUS)0xC000027BL) diff --git a/src/coreclr/debug/daccess/task.cpp b/src/coreclr/debug/daccess/task.cpp index b54aab734bceb..ddbf251b7b982 100644 --- a/src/coreclr/debug/daccess/task.cpp +++ b/src/coreclr/debug/daccess/task.cpp @@ -732,7 +732,8 @@ ClrDataAppDomain::GetName( } else { - status = u16_strcpy_s(name, bufLen, (PCWSTR)rawName) != NULL ? S_OK : S_FALSE; + status = StringCchCopy(name, bufLen, (PCWSTR)rawName) == S_OK ? + S_OK : S_FALSE; if (nameLen) { size_t cchName = u16_strlen((PCWSTR)rawName) + 1; @@ -4767,7 +4768,7 @@ ClrDataExceptionState::GetString( message->GetStringLength(), true); - status = u16_strcpy_s(str, bufLen, msgStr) != NULL ? S_OK : S_FALSE; + status = StringCchCopy(str, bufLen, msgStr) == S_OK ? S_OK : S_FALSE; if (strLen != NULL) { size_t cchName = u16_strlen(msgStr) + 1; diff --git a/src/coreclr/dlls/mscordac/mscordac_unixexports.src b/src/coreclr/dlls/mscordac/mscordac_unixexports.src index 5a9b1e40970e8..6cdd5ff733bcf 100644 --- a/src/coreclr/dlls/mscordac/mscordac_unixexports.src +++ b/src/coreclr/dlls/mscordac/mscordac_unixexports.src @@ -69,9 +69,6 @@ nativeStringResourceTable_mscorrc #PAL__pread #PAL__close -#minipal_get_length_utf16_to_utf8 -#minipal_convert_utf16_to_utf8 - #_wcsicmp #_stricmp #sprintf_s diff --git a/src/coreclr/ilasm/main.cpp b/src/coreclr/ilasm/main.cpp index ebe1a2c220e55..838f05aa996cf 100644 --- a/src/coreclr/ilasm/main.cpp +++ b/src/coreclr/ilasm/main.cpp @@ -11,6 +11,7 @@ #include "clrversion.h" #include "shimload.h" +#include "strsafe.h" #define ASSERTE_ALL_BUILDS(expr) _ASSERTE_ALL_BUILDS((expr)) WCHAR* EqualOrColon(_In_ __nullterminated WCHAR* szArg) diff --git a/src/coreclr/inc/clr/fs/path.h b/src/coreclr/inc/clr/fs/path.h index a42b1d48500db..efc21a5cdd439 100644 --- a/src/coreclr/inc/clr/fs/path.h +++ b/src/coreclr/inc/clr/fs/path.h @@ -10,6 +10,8 @@ #include "clrtypes.h" +#include "strsafe.h" + #include "clr/str.h" namespace clr diff --git a/src/coreclr/inc/corhlprpriv.h b/src/coreclr/inc/corhlprpriv.h index f42eaa637d4e7..62298798d7017 100644 --- a/src/coreclr/inc/corhlprpriv.h +++ b/src/coreclr/inc/corhlprpriv.h @@ -11,7 +11,7 @@ #define __CORHLPRPRIV_H__ #include "corhlpr.h" -#include +#include "fstring.h" #if defined(_MSC_VER) && defined(HOST_X86) #pragma optimize("y", on) // If routines don't get inlined, don't pay the EBP frame penalty @@ -225,33 +225,71 @@ class CQuickMemoryBase iSize = cbTotal; } + + // Convert UTF8 string to UNICODE string, optimized for speed + HRESULT ConvertUtf8_UnicodeNoThrow(const char * utf8str) + { + bool allAscii; + DWORD length; + + HRESULT hr = FString::Utf8_Unicode_Length(utf8str, & allAscii, & length); + + if (SUCCEEDED(hr)) + { + LPWSTR buffer = (LPWSTR) AllocNoThrow((length + 1) * sizeof(WCHAR)); + + if (buffer == NULL) + { + hr = E_OUTOFMEMORY; + } + else + { + hr = FString::Utf8_Unicode(utf8str, allAscii, buffer, length); + } + } + + return hr; + } + // Convert UTF8 string to UNICODE string, optimized for speed void ConvertUtf8_Unicode(const char * utf8str) { - size_t sourceLen = strlen(utf8str); - size_t destLen = minipal_get_length_utf8_to_utf16(utf8str, sourceLen, 0); + bool allAscii; + DWORD length; - CHAR16_T* buffer = (CHAR16_T*) AllocThrows((destLen + 1) * sizeof(CHAR16_T)); - buffer[destLen] = W('\0'); + HRESULT hr = FString::Utf8_Unicode_Length(utf8str, & allAscii, & length); - if (!minipal_convert_utf8_to_utf16(utf8str, sourceLen, buffer, destLen, 0)) + if (SUCCEEDED(hr)) { - ThrowHR(EMAKEHR(errno)); + LPWSTR buffer = (LPWSTR) AllocThrows((length + 1) * sizeof(WCHAR)); + + hr = FString::Utf8_Unicode(utf8str, allAscii, buffer, length); + } + + if (FAILED(hr)) + { + ThrowHR(hr); } } // Convert UNICODE string to UTF8 string, optimized for speed void ConvertUnicode_Utf8(const WCHAR * pString) { - size_t sourceLen = u16_strlen(pString); - size_t destLen = minipal_get_length_utf16_to_utf8((const CHAR16_T*)pString, sourceLen, 0); + bool allAscii; + DWORD length; + + HRESULT hr = FString::Unicode_Utf8_Length(pString, & allAscii, & length); - LPSTR buffer = (LPSTR) AllocThrows((destLen + 1) * sizeof(char)); - buffer[destLen] = '\0'; + if (SUCCEEDED(hr)) + { + LPSTR buffer = (LPSTR) AllocThrows((length + 1) * sizeof(char)); + + hr = FString::Unicode_Utf8(pString, allAscii, buffer, length); + } - if (!minipal_convert_utf16_to_utf8((const CHAR16_T*)pString, sourceLen, buffer, destLen, 0)) + if (FAILED(hr)) { - ThrowHR(EMAKEHR(errno)); + ThrowHR(hr); } } diff --git a/src/coreclr/inc/fstring.h b/src/coreclr/inc/fstring.h new file mode 100644 index 0000000000000..1be37e242974c --- /dev/null +++ b/src/coreclr/inc/fstring.h @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// --------------------------------------------------------------------------- +// FString.h (Fast String) +// + +// --------------------------------------------------------------------------- + +// ------------------------------------------------------------------------------------------ +// FString is fast string handling namespace + + +// 1) Simple +// 2) No C++ exception +// 3) Optimized for speed + + +#ifndef _FSTRING_H_ +#define _FSTRING_H_ + +namespace FString +{ + // Note: All "length" parameters do not count the space for the null terminator. + // Caller of Unicode_Utf8 and Utf8_Unicode must pass in a buffer of size at least length + 1. + + // Scan for ASCII only string, calculate result UTF8 string length + HRESULT Unicode_Utf8_Length(_In_z_ LPCWSTR pString, _Out_ bool * pAllAscii, _Out_ DWORD * pLength); + + // Convert UNICODE string to UTF8 string. Direct/fast conversion if ASCII + HRESULT Unicode_Utf8(_In_z_ LPCWSTR pString, bool allAscii, _Out_writes_bytes_(length) LPSTR pBuffer, DWORD length); + + // Scan for ASCII string, calculate result UNICODE string length + HRESULT Utf8_Unicode_Length(_In_z_ LPCSTR pString, _Out_ bool * pAllAscii, _Out_ DWORD * pLength); + + // Convert UTF8 string to UNICODE. Direct/fast conversion if ASCII + HRESULT Utf8_Unicode(_In_z_ LPCSTR pString, bool allAscii, _Out_writes_bytes_(length) LPWSTR pBuffer, DWORD length); + + HRESULT ConvertUnicode_Utf8(_In_z_ LPCWSTR pString, _Outptr_result_z_ LPSTR * pBuffer); + + HRESULT ConvertUtf8_Unicode(_In_z_ LPCSTR pString, _Outptr_result_z_ LPWSTR * pBuffer); + +} // namespace FString + +#endif // _FSTRING_H_ diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h index cbcd37fa0fb4f..fe5db13f6b971 100644 --- a/src/coreclr/inc/utilcode.h +++ b/src/coreclr/inc/utilcode.h @@ -2834,6 +2834,27 @@ template class CChainedHash }; +//***************************************************************************** +// +//********** String helper functions. +// +//***************************************************************************** + +//***************************************************************************** +// Checks if string length exceeds the specified limit +//***************************************************************************** +inline BOOL IsStrLongerThan(_In_ _In_z_ char* pstr, unsigned N) +{ + LIMITED_METHOD_CONTRACT; + unsigned i = 0; + if(pstr) + { + for(i=0; (i < N)&&(pstr[i]); i++); + } + return (i >= N); +} + + //***************************************************************************** // Class to parse a list of simple assembly names and then find a match //***************************************************************************** diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt index 944c9c3a66759..ca41eb4e2bb0d 100644 --- a/src/coreclr/minipal/Unix/CMakeLists.txt +++ b/src/coreclr/minipal/Unix/CMakeLists.txt @@ -7,12 +7,6 @@ if(NOT CLR_CROSS_COMPONENTS_BUILD) list(APPEND SOURCES ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c ) - - if(CLR_CMAKE_HOST_OSX) - list(APPEND SOURCES - ${CLR_SRC_NATIVE_DIR}/minipal/utf8.c - ) - endif() endif() add_library(coreclrminipal diff --git a/src/coreclr/pal/inc/strsafe.h b/src/coreclr/pal/inc/strsafe.h new file mode 100644 index 0000000000000..b69feb73c2512 --- /dev/null +++ b/src/coreclr/pal/inc/strsafe.h @@ -0,0 +1,285 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*++ + + + + + +--*/ + +/****************************************************************** +* * +* strsafe.h -- This module defines safer C library string * +* routine replacements. These are meant to make C * +* a bit more safe in reference to security and * +* robustness * +* * +******************************************************************/ +#ifndef _STRSAFE_H_INCLUDED_ +#define _STRSAFE_H_INCLUDED_ +#ifdef _MSC_VER +#pragma once +#endif + +#include // for _vsnprintf, getc, getwc +#include // for memset +#include // for va_start, etc. + +#ifndef _SIZE_T_DEFINED +#ifdef HOST_64BIT +typedef unsigned __int64 size_t; +#else +typedef __w64 unsigned int size_t; +#endif // !HOST_64BIT +#define _SIZE_T_DEFINED +#endif // !_SIZE_T_DEFINED + +#ifndef SUCCEEDED +#define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) +#endif + +#ifndef FAILED +#define FAILED(hr) ((HRESULT)(hr) < 0) +#endif + +#ifndef S_OK +#define S_OK ((HRESULT)0x00000000L) +#endif + +#ifdef __cplusplus +#define _STRSAFE_EXTERN_C extern "C" +#else +#define _STRSAFE_EXTERN_C extern +#endif + +// If you do not want to use these functions inline (and instead want to link w/ strsafe.lib), then +// #define STRSAFE_LIB before including this header file. +#if defined(STRSAFE_LIB) +#define STRSAFEAPI _STRSAFE_EXTERN_C HRESULT __stdcall +#pragma comment(lib, "strsafe.lib") +#elif defined(STRSAFE_LIB_IMPL) +#define STRSAFEAPI _STRSAFE_EXTERN_C HRESULT __stdcall +#else +#define STRSAFEAPI __inline HRESULT __stdcall +#define STRSAFE_INLINE +#endif + +// Some functions always run inline because they use stdin and we want to avoid building multiple +// versions of strsafe lib depending on if you use msvcrt, libcmt, etc. +#define STRSAFE_INLINE_API __inline HRESULT __stdcall + +// The user can request no "Cb" or no "Cch" fuctions, but not both! +#if defined(STRSAFE_NO_CB_FUNCTIONS) && defined(STRSAFE_NO_CCH_FUNCTIONS) +#error cannot specify both STRSAFE_NO_CB_FUNCTIONS and STRSAFE_NO_CCH_FUNCTIONS !! +#endif + +// This should only be defined when we are building strsafe.lib +#ifdef STRSAFE_LIB_IMPL +#define STRSAFE_INLINE +#endif + + +#define STRSAFE_MAX_CCH 2147483647 // max # of characters we support (same as INT_MAX) + +// STRSAFE error return codes +// +#define STRSAFE_E_INSUFFICIENT_BUFFER ((HRESULT)0x8007007AL) // 0x7A = 122L = ERROR_INSUFFICIENT_BUFFER +#define STRSAFE_E_INVALID_PARAMETER ((HRESULT)0x80070057L) // 0x57 = 87L = ERROR_INVALID_PARAMETER +#define STRSAFE_E_END_OF_FILE ((HRESULT)0x80070026L) // 0x26 = 38L = ERROR_HANDLE_EOF + +// Flags for controling the Ex functions +// +// STRSAFE_FILL_BYTE(0xFF) 0x000000FF // bottom byte specifies fill pattern +#define STRSAFE_IGNORE_NULLS 0x00000100 // treat null as TEXT("") -- don't fault on NULL buffers +#define STRSAFE_FILL_BEHIND_NULL 0x00000200 // fill in extra space behind the null terminator +#define STRSAFE_FILL_ON_FAILURE 0x00000400 // on failure, overwrite pszDest with fill pattern and null terminate it +#define STRSAFE_NULL_ON_FAILURE 0x00000800 // on failure, set *pszDest = TEXT('\0') +#define STRSAFE_NO_TRUNCATION 0x00001000 // instead of returning a truncated result, copy/append nothing to pszDest and null terminate it + +#define STRSAFE_VALID_FLAGS (0x000000FF | STRSAFE_IGNORE_NULLS | STRSAFE_FILL_BEHIND_NULL | STRSAFE_FILL_ON_FAILURE | STRSAFE_NULL_ON_FAILURE | STRSAFE_NO_TRUNCATION) + +// helper macro to set the fill character and specify buffer filling +#define STRSAFE_FILL_BYTE(x) ((unsigned long)((x & 0x000000FF) | STRSAFE_FILL_BEHIND_NULL)) +#define STRSAFE_FAILURE_BYTE(x) ((unsigned long)((x & 0x000000FF) | STRSAFE_FILL_ON_FAILURE)) + +#define STRSAFE_GET_FILL_PATTERN(dwFlags) ((int)(dwFlags & 0x000000FF)) + +// prototypes for the worker functions +#ifdef STRSAFE_INLINE +STRSAFEAPI StringCopyWorkerA(char* pszDest, size_t cchDest, const char* pszSrc); +STRSAFEAPI StringCopyWorkerW(WCHAR* pszDest, size_t cchDest, const WCHAR* pszSrc); +#endif // STRSAFE_INLINE + +#ifndef STRSAFE_NO_CCH_FUNCTIONS +/*++ + +STDAPI StringCchCopy(LPTSTR pszDest, + size_t cchDest, + LPCTSTR pszSrc); + +Routine Description: + + This routine is a safer version of the C built-in function 'strcpy'. + The size of the destination buffer (in characters) is a parameter and + this function will not write past the end of this buffer and it will + ALWAYS null terminate the destination buffer (unless it is zero length). + + This routine is not a replacement for strncpy. That function will pad the + destination string with extra null termination characters if the count is + greater than the length of the source string, and it will fail to null + terminate the destination string if the source string length is greater + than or equal to the count. You can not blindly use this instead of strncpy: + it is common for code to use it to "patch" strings and you would introduce + errors if the code started null terminating in the middle of the string. + + This function returns a hresult, and not a pointer. It returns a S_OK + if the string was copied without truncation and null terminated, otherwise + it will return a failure code. In failure cases as much of pszSrc will be + copied to pszDest as possible, and pszDest will be null terminated. + +Arguments: + + pszDest - destination string + + cchDest - size of destination buffer in characters. + length must be = (_tcslen(src) + 1) to hold all of the + source including the null terminator + + pszSrc - source string which must be null terminated + +Notes: + Behavior is undefined if source and destination strings overlap. + + pszDest and pszSrc should not be NULL. See StringCchCopyEx if you require + the handling of NULL values. + +Return Value: + + S_OK - if there was source data and it was all copied and the + resultant dest string was null terminated + + failure - you can use the macro HRESULT_CODE() to get a win32 error + code for all hresult failure cases + + STRSAFE_E_INSUFFICIENT_BUFFER / + HRESULT_CODE(hr) == ERROR_INSUFFICIENT_BUFFER + - this return value is an indication that the copy operation + failed due to insufficient space. When this error occurs, + the destination buffer is modified to contain a truncated + version of the ideal result and is null terminated. This + is useful for situations where truncation is ok + + It is strongly recommended to use the SUCCEEDED() / FAILED() macros to test the + return value of this function. + +--*/ + +STRSAFEAPI StringCchCopyA(char* pszDest, size_t cchDest, const char* pszSrc); +STRSAFEAPI StringCchCopyW(WCHAR* pszDest, size_t cchDest, const WCHAR* pszSrc); +#ifdef UNICODE +#define StringCchCopy StringCchCopyW +#else +#define StringCchCopy StringCchCopyA +#endif // !UNICODE + +#ifdef STRSAFE_INLINE +STRSAFEAPI StringCchCopyA(char* pszDest, size_t cchDest, const char* pszSrc) +{ + HRESULT hr; + + if (cchDest > STRSAFE_MAX_CCH) + { + hr = STRSAFE_E_INVALID_PARAMETER; + } + else + { + hr = StringCopyWorkerA(pszDest, cchDest, pszSrc); + } + + return hr; +} + +STRSAFEAPI StringCchCopyW(WCHAR* pszDest, size_t cchDest, const WCHAR* pszSrc) +{ + HRESULT hr; + + if (cchDest > STRSAFE_MAX_CCH) + { + hr = STRSAFE_E_INVALID_PARAMETER; + } + else + { + hr = StringCopyWorkerW(pszDest, cchDest, pszSrc); + } + + return hr; +} +#endif // STRSAFE_INLINE +#endif // !STRSAFE_NO_CCH_FUNCTIONS + +// these are the worker functions that actually do the work +#ifdef STRSAFE_INLINE +STRSAFEAPI StringCopyWorkerA(char* pszDest, size_t cchDest, const char* pszSrc) +{ + HRESULT hr = S_OK; + + if (cchDest == 0) + { + // can not null terminate a zero-byte dest buffer + hr = STRSAFE_E_INVALID_PARAMETER; + } + else + { + while (cchDest && (*pszSrc != '\0')) + { + *pszDest++ = *pszSrc++; + cchDest--; + } + + if (cchDest == 0) + { + // we are going to truncate pszDest + pszDest--; + hr = STRSAFE_E_INSUFFICIENT_BUFFER; + } + + *pszDest= '\0'; + } + + return hr; +} + +STRSAFEAPI StringCopyWorkerW(WCHAR* pszDest, size_t cchDest, const WCHAR* pszSrc) +{ + HRESULT hr = S_OK; + + if (cchDest == 0) + { + // can not null terminate a zero-byte dest buffer + hr = STRSAFE_E_INVALID_PARAMETER; + } + else + { + while (cchDest && (*pszSrc != L'\0')) + { + *pszDest++ = *pszSrc++; + cchDest--; + } + + if (cchDest == 0) + { + // we are going to truncate pszDest + pszDest--; + hr = STRSAFE_E_INSUFFICIENT_BUFFER; + } + + *pszDest= L'\0'; + } + + return hr; +} +#endif // STRSAFE_INLINE + +#endif // _STRSAFE_H_INCLUDED_ diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt index 1206b5237b9dd..ec543e707d718 100644 --- a/src/coreclr/utilcode/CMakeLists.txt +++ b/src/coreclr/utilcode/CMakeLists.txt @@ -6,6 +6,7 @@ set(UTILCODE_COMMON_SOURCES ex.cpp sbuffer.cpp sstring_com.cpp + fstring.cpp namespaceutil.cpp makepath.cpp splitpath.cpp diff --git a/src/coreclr/utilcode/fstring.cpp b/src/coreclr/utilcode/fstring.cpp new file mode 100644 index 0000000000000..9bcd12d1fffc6 --- /dev/null +++ b/src/coreclr/utilcode/fstring.cpp @@ -0,0 +1,321 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// --------------------------------------------------------------------------- +// FString.cpp +// + +// --------------------------------------------------------------------------- + +#include "stdafx.h" +#include "ex.h" +#include "holder.h" + +#include "fstring.h" + + +namespace FString +{ + +#ifdef _MSC_VER +#pragma optimize("t", on) +#endif // _MSC_VER + +#define MAX_LENGTH 0x1fffff00 + + +HRESULT Unicode_Utf8_Length(_In_z_ LPCWSTR pString, _Out_ bool * pAllAscii, _Out_ DWORD * pLength) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + * pAllAscii = true; + + LPCWSTR p = pString; + + while (true) + { + WCHAR ch = * p; + + // Single check for termination and non ASCII + if (((unsigned) (ch - 1)) >= 0x7F) + { + if (ch != 0) + { + * pAllAscii = false; + } + + break; + } + + p ++; + } + + if (* pAllAscii) + { + if ((p - pString) > MAX_LENGTH) + { + return COR_E_OVERFLOW; + } + + * pLength = (DWORD) (p - pString); + } + else // use WideCharToMultiByte to calculate result length + { + * pLength = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, NULL, 0, NULL, NULL); + + if (*pLength == 0) + { + return HRESULT_FROM_GetLastError(); + } + + // Remove the count of null terminator, to be consistent with the all-ASCII case. + --*pLength; + + if (*pLength > MAX_LENGTH) + { + return COR_E_OVERFLOW; + } + } + + return S_OK; +} + + +// UNICODE to UTF8 +HRESULT Unicode_Utf8(_In_z_ LPCWSTR pString, bool allAscii, _Out_writes_bytes_(length) LPSTR pBuffer, DWORD length) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + pBuffer[length] = 0; + + if (allAscii) + { + LPCWSTR p = pString; + + LPSTR q = pBuffer; + + LPCWSTR endP = p + length - 8; + + // Unfold to optimize for long string: 8 chars per iteration + while (p < endP) + { + q[0] = (char) p[0]; + q[1] = (char) p[1]; + q[2] = (char) p[2]; + q[3] = (char) p[3]; + + q[4] = (char) p[4]; + q[5] = (char) p[5]; + q[6] = (char) p[6]; + q[7] = (char) p[7]; + + q += 8; + p += 8; + } + + endP += 8; + + while (p < endP) + { + * q ++ = (char) * p ++; + } + } + else + { + length = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1, NULL, NULL); + + if (length == 0) + { + return HRESULT_FROM_GetLastError(); + } + } + + return S_OK; +} + + +HRESULT Utf8_Unicode_Length(_In_z_ LPCSTR pString, _Out_ bool * pAllAscii, _Out_ DWORD * pLength) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + * pAllAscii = true; + + LPCSTR p = pString; + + while (true) + { + char ch = * p; + + // Single check for termination and non ASCII + if (((unsigned) (ch - 1)) >= 0x7F) + { + if (ch != 0) + { + * pAllAscii = false; + } + + break; + } + + p ++; + } + + if (* pAllAscii) + { + if ((p - pString) > MAX_LENGTH) + { + return COR_E_OVERFLOW; + } + + * pLength = (DWORD)(p - pString); + } + else + { + * pLength = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, NULL, 0); + + if (* pLength == 0) + { + return HRESULT_FROM_GetLastError(); + } + + // Remove the count of null terminator, to be consistent with the all-ASCII case. + --*pLength; + + if (* pLength > MAX_LENGTH) + { + return COR_E_OVERFLOW; + } + } + + return S_OK; +} + + +// UTF8 to Unicode + +HRESULT Utf8_Unicode(_In_z_ LPCSTR pString, bool allAscii, _Out_writes_bytes_(length) LPWSTR pBuffer, DWORD length) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + pBuffer[length] = 0; + + if (allAscii) + { + LPCSTR p = pString; + + LPWSTR q = pBuffer; + + LPCSTR endP = p + length - 8; + + // Unfold to optimize for long string: 4 chars per iteration + while (p < endP) + { + q[0] = (WCHAR) p[0]; + q[1] = (WCHAR) p[1]; + q[2] = (WCHAR) p[2]; + q[3] = (WCHAR) p[3]; + + q[4] = (WCHAR) p[4]; + q[5] = (WCHAR) p[5]; + q[6] = (WCHAR) p[6]; + q[7] = (WCHAR) p[7]; + + q += 8; + p += 8; + } + + endP += 8; + + while (p < endP) + { + * q ++ = (WCHAR) * p ++; + } + } + else + { + length = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1); + + if (length == 0) + { + return HRESULT_FROM_GetLastError(); + } + } + + return S_OK; +} + + +HRESULT ConvertUnicode_Utf8(_In_z_ LPCWSTR pString, _Outptr_result_z_ LPSTR * pBuffer) +{ + bool allAscii; + DWORD length; + + HRESULT hr = Unicode_Utf8_Length(pString, & allAscii, & length); + + if (SUCCEEDED(hr)) + { + * pBuffer = new (nothrow) char[length + 1]; + + if (* pBuffer == NULL) + { + hr = E_OUTOFMEMORY; + } + else + { + hr = Unicode_Utf8(pString, allAscii, * pBuffer, length); + } + } + + return hr; +} + + +HRESULT ConvertUtf8_Unicode(_In_z_ LPCSTR pString, _Outptr_result_z_ LPWSTR * pBuffer) +{ + bool allAscii; + DWORD length; + + HRESULT hr = Utf8_Unicode_Length(pString, & allAscii, & length); + + if (SUCCEEDED(hr)) + { + * pBuffer = new (nothrow) WCHAR[length + 1]; + + if (* pBuffer == NULL) + { + hr = E_OUTOFMEMORY; + } + else + { + hr = Utf8_Unicode(pString, allAscii, * pBuffer, length); + } + } + + return hr; +} + + +#ifdef _MSC_VER +#pragma optimize("", on) +#endif // _MSC_VER + +} // namespace FString diff --git a/src/coreclr/utilcode/sstring.cpp b/src/coreclr/utilcode/sstring.cpp index e20e361fe5989..d87efd8e10f0b 100644 --- a/src/coreclr/utilcode/sstring.cpp +++ b/src/coreclr/utilcode/sstring.cpp @@ -10,7 +10,6 @@ #include "sstring.h" #include "ex.h" #include "holder.h" -#include #if defined(_MSC_VER) @@ -865,25 +864,27 @@ COUNT_T SString::ConvertToUTF8(SString &s) const UNREACHABLE(); } - size_t length = minipal_get_length_utf16_to_utf8((CHAR16_T*)GetRawUnicode(), GetCount(), MINIPAL_MB_NO_REPLACE_INVALID_CHARS); + // @todo: use WC_NO_BEST_FIT_CHARS + bool allAscii; + DWORD length; - if (length >= COUNT_T_MAX) - { - ThrowHR(COR_E_OVERFLOW); - } - - s.Resize((COUNT_T)length, REPRESENTATION_UTF8); + HRESULT hr = FString::Unicode_Utf8_Length(GetRawUnicode(), & allAscii, & length); - //we optimize the empty string by replacing it with null for SString above in Resize - if (length > 0) + if (SUCCEEDED(hr)) { - if (!minipal_convert_utf16_to_utf8((CHAR16_T*)GetRawUnicode(), GetCount(), s.GetRawUTF8(), length, MINIPAL_MB_NO_REPLACE_INVALID_CHARS)) + s.Resize(length, REPRESENTATION_UTF8); + + //FString::Unicode_Utf8 expects an array all the time + //we optimize the empty string by replacing it with null for SString above in Resize + if (length > 0) { - ThrowHR(EMAKEHR(errno)); + hr = FString::Unicode_Utf8(GetRawUnicode(), allAscii, (LPSTR) s.GetRawUTF8(), length); } } - RETURN (COUNT_T)length + 1; + IfFailThrow(hr); + + RETURN length + 1; } //----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/classcompat.cpp b/src/coreclr/vm/classcompat.cpp index 41fb6735de9da..b089dad7fdafb 100644 --- a/src/coreclr/vm/classcompat.cpp +++ b/src/coreclr/vm/classcompat.cpp @@ -2380,7 +2380,7 @@ VOID MethodTableBuilder::EnumerateClassMethods() { BuildMethodTableThrowException(IDS_CLASSLOAD_BADFORMAT); } - if (strnlen(strMethodName, MAX_CLASS_NAME) >= MAX_CLASS_NAME) + if (IsStrLongerThan(strMethodName,MAX_CLASS_NAME)) { BuildMethodTableThrowException(BFA_METHOD_NAME_TOO_LONG); } diff --git a/src/coreclr/vm/classhash.cpp b/src/coreclr/vm/classhash.cpp index 03b8c5190c007..5d2be11c9b328 100644 --- a/src/coreclr/vm/classhash.cpp +++ b/src/coreclr/vm/classhash.cpp @@ -9,6 +9,7 @@ #include "common.h" #include "classhash.h" #include "dacenumerablehash.inl" +#include "fstring.h" #include "classhash.inl" PTR_EEClassHashEntry EEClassHashEntry::GetEncloser() diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index c271586dc1e7b..6f984b6483396 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -26,6 +26,7 @@ #include "shimload.h" #include "debuginfostore.h" +#include "strsafe.h" #include "configuration.h" diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index cecf537129987..063d8414c5970 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -2751,7 +2751,7 @@ MethodTableBuilder::EnumerateClassMethods() { BuildMethodTableThrowException(IDS_CLASSLOAD_BADFORMAT); } - if (strnlen(strMethodName, MAX_CLASS_NAME) >= MAX_CLASS_NAME) + if(IsStrLongerThan(strMethodName,MAX_CLASS_NAME)) { BuildMethodTableThrowException(BFA_METHOD_NAME_TOO_LONG); } @@ -4057,8 +4057,8 @@ VOID MethodTableBuilder::InitializeFieldDescs(FieldDesc *pFieldDescList, BuildMethodTableThrowException(IDS_CLASSLOAD_BADFORMAT); } - if (strnlen((char *)pszClassName, MAX_CLASS_NAME) >= MAX_CLASS_NAME - || strnlen((char *)pszNameSpace, MAX_CLASS_NAME) >= MAX_CLASS_NAME + if (IsStrLongerThan((char *)pszClassName, MAX_CLASS_NAME) + || IsStrLongerThan((char *)pszNameSpace, MAX_CLASS_NAME) || (strlen(pszClassName) + strlen(pszNameSpace) + 1 >= MAX_CLASS_NAME)) { BuildMethodTableThrowException(BFA_TYPEREG_NAME_TOO_LONG, mdMethodDefNil);