Skip to content

Commit c0d7d2d

Browse files
LakshanFjkotas
andauthored
Use utf conversions from minipal (#89036)
* Support for utf conversion * cast fix * FB * FB * Update src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h Co-authored-by: Jan Kotas <jkotas@microsoft.com> * Apply suggestions from code review Co-authored-by: Jan Kotas <jkotas@microsoft.com> * FB * FB * FB --------- Co-authored-by: Jan Kotas <jkotas@microsoft.com>
1 parent 5bc100e commit c0d7d2d

File tree

3 files changed

+72
-36
lines changed

3 files changed

+72
-36
lines changed

src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,21 @@ set(AOT_EVENTPIPE_SHIM_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
99

1010
set (CONTAINER_SOURCES "")
1111
set (CONTAINER_HEADERS "")
12+
set (MINIPAL_SOURCES "")
1213
set (EVENTPIPE_SOURCES "")
1314
set (EVENTPIPE_HEADERS "")
1415
set (GEN_EVENTPIPE_SOURCES "")
1516

1617
set (SHARED_CONTAINERS_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/containers")
1718
set (SHARED_EVENTPIPE_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/eventpipe")
19+
set (SHARED_MINIPAL_SOURCE_PATH "${CLR_SRC_NATIVE_DIR}/minipal")
1820
include (${SHARED_EVENTPIPE_SOURCE_PATH}/eventpipe.cmake)
1921
include (${SHARED_CONTAINERS_SOURCE_PATH}/containers.cmake)
2022

23+
list(APPEND MINIPAL_SOURCES
24+
utf8.c
25+
)
26+
2127
if(CLR_CMAKE_HOST_WIN32)
2228
list(APPEND SHARED_DIAGNOSTIC_SERVER_SOURCES
2329
ds-ipc-pal-namedpipe.c
@@ -50,6 +56,7 @@ list(APPEND EVENTPIPE_HEADERS
5056

5157
addprefix(CONTAINER_SOURCES ${SHARED_CONTAINERS_SOURCE_PATH} "${SHARED_CONTAINER_SOURCES}")
5258
addprefix(CONTAINER_HEADERS ${SHARED_CONTAINERS_SOURCE_PATH} "${SHARED_CONTAINER_HEADERS}")
59+
addprefix(MINIPAL_SOURCES ${SHARED_MINIPAL_SOURCE_PATH} "${MINIPAL_SOURCES}")
5360

5461
addprefix(EVENTPIPE_SOURCES ${SHARED_EVENTPIPE_SOURCE_PATH} "${EVENTPIPE_SOURCES}")
5562
addprefix(EVENTPIPE_HEADERS ${SHARED_EVENTPIPE_SOURCE_PATH} "${EVENTPIPE_HEADERS}")
@@ -125,6 +132,7 @@ list(APPEND EVENTPIPE_SOURCES
125132
${GEN_EVENTPIPE_SOURCES}
126133
${CONTAINER_SOURCES}
127134
${CONTAINER_HEADERS}
135+
${MINIPAL_SOURCES}
128136
)
129137

130138
list(APPEND AOT_EVENTPIPE_DISABLED_SOURCES

src/coreclr/nativeaot/Runtime/eventpipe/ep-rt-aot.h

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <sys/time.h>
1111
#endif
1212

13+
#include <minipal/utf8.h>
14+
1315
#include <eventpipe/ep-rt-config.h>
1416
#ifdef ENABLE_PERFTRACING
1517
#include <eventpipe/ep-thread.h>
@@ -1371,6 +1373,7 @@ ep_rt_utf8_string_replace (
13711373
return false;
13721374
}
13731375

1376+
13741377
static
13751378
ep_char16_t *
13761379
ep_rt_utf8_to_utf16le_string (
@@ -1382,22 +1385,36 @@ ep_rt_utf8_to_utf16le_string (
13821385
if (!str)
13831386
return NULL;
13841387

1385-
// Shipping criteria: no EVENTPIPE-NATIVEAOT-TODO left in the codebase
1386-
// Implementation would just use strlen and malloc to make a new buffer, and would then copy the string chars one by one.
1387-
// Assumes that only ASCII is used for ep_char8_t
1388-
size_t len_utf8 = strlen(str);
1389-
ep_char16_t *str_utf16 = reinterpret_cast<ep_char16_t *>(malloc ((len_utf8 + 1) * sizeof (ep_char16_t)));
1390-
if (!str_utf16)
1388+
if (len == 0) {
1389+
// Return an empty string if the length is 0
1390+
CHAR16_T * lpDestEmptyStr = reinterpret_cast<CHAR16_T *>(malloc(1 * sizeof(CHAR16_T)));
1391+
if(lpDestEmptyStr==NULL) {
1392+
return NULL;
1393+
}
1394+
*lpDestEmptyStr = '\0';
1395+
return reinterpret_cast<ep_char16_t*>(lpDestEmptyStr);
1396+
}
1397+
1398+
if (len == (size_t) -1) {
1399+
// Following the pattern used in EventPipe library where it allocates 1 extra character
1400+
len = strlen(str) + 1;
1401+
}
1402+
1403+
int32_t flags = MINIPAL_MB_NO_REPLACE_INVALID_CHARS | MINIPAL_TREAT_AS_LITTLE_ENDIAN;
1404+
1405+
size_t ret = minipal_get_length_utf8_to_utf16 (str, len, flags);
1406+
1407+
if (ret <= 0)
13911408
return NULL;
13921409

1393-
for (size_t i = 0; i < len_utf8; i++)
1394-
{
1395-
EP_ASSERT(isascii(str[i]));
1396-
str_utf16[i] = str[i];
1410+
CHAR16_T * lpDestStr = reinterpret_cast<CHAR16_T *>(malloc((ret + 1) * sizeof(CHAR16_T)));
1411+
if(lpDestStr==NULL) {
1412+
return NULL;
13971413
}
1414+
ret = minipal_convert_utf8_to_utf16 (str, len, lpDestStr, ret, flags);
1415+
lpDestStr[ret] = '\0';
13981416

1399-
str_utf16[len_utf8] = 0;
1400-
return str_utf16;
1417+
return reinterpret_cast<ep_char16_t*>(lpDestStr);
14011418
}
14021419

14031420
static
@@ -1446,27 +1463,37 @@ ep_rt_utf16_to_utf8_string (
14461463
size_t len)
14471464
{
14481465
STATIC_CONTRACT_NOTHROW;
1449-
14501466
if (!str)
14511467
return NULL;
1452-
1453-
// shipping criteria: no EVENTPIPE-NATIVEAOT-TODO left in the codebase
1454-
// Simple implementation to create a utf8 string from a utf16 one
1455-
size_t len_utf16 = len;
1456-
if(len_utf16 == (size_t)-1)
1457-
len_utf16 = ep_rt_utf16_string_len (str);
14581468

1459-
ep_char8_t *str_utf8 = reinterpret_cast<ep_char8_t *>(malloc ((len_utf16 + 1) * sizeof (ep_char8_t)));
1460-
if (!str_utf8)
1469+
if (len == 0) {
1470+
// Return an empty string if the length is 0
1471+
char * lpDestEmptyStr = reinterpret_cast<char *>(malloc(1 * sizeof(char)));
1472+
if(lpDestEmptyStr==NULL) {
1473+
return NULL;
1474+
}
1475+
*lpDestEmptyStr = '\0';
1476+
return reinterpret_cast<ep_char8_t*>(lpDestEmptyStr);
1477+
}
1478+
1479+
if (len == (size_t) -1) {
1480+
// Following the pattern used in EventPipe library where it allocates 1 extra character
1481+
len = ep_rt_utf16_string_len (str) + 1;
1482+
}
1483+
1484+
size_t ret = minipal_get_length_utf16_to_utf8 (reinterpret_cast<const CHAR16_T *>(str), len, 0);
1485+
1486+
if (ret <= 0)
14611487
return NULL;
14621488

1463-
for (size_t i = 0; i < len_utf16; i++)
1464-
{
1465-
str_utf8[i] = (char)str[i];
1489+
char* lpDestStr = reinterpret_cast<char *>(malloc((ret + 1) * sizeof(char)));
1490+
if(lpDestStr==NULL) {
1491+
return NULL;
14661492
}
1493+
ret = minipal_convert_utf16_to_utf8 (reinterpret_cast<const CHAR16_T*>(str), len, lpDestStr, ret, 0);
1494+
lpDestStr[ret] = '\0';
14671495

1468-
str_utf8[len_utf16] = 0;
1469-
return str_utf8;
1496+
return reinterpret_cast<ep_char8_t*>(lpDestStr);
14701497
}
14711498

14721499
static

src/native/minipal/utf8.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
365365
// Initialize stuff
366366
unsigned char *pSrc = bytes;
367367
unsigned char *pEnd = pSrc + count;
368-
int availableBytes, chc;
368+
size_t availableBytes;
369+
int chc;
369370

370371
// Start by assuming we have as many as count, charCount always includes the adjustment
371372
// for the character being decoded
@@ -532,7 +533,7 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
532533

533534
EncodeChar:
534535

535-
availableBytes = pEnd - pSrc;
536+
availableBytes = (size_t)(pEnd - pSrc);
536537

537538
// don't fall into the fast decoding loop if we don't have enough bytes
538539
if (availableBytes <= 13)
@@ -749,7 +750,7 @@ static size_t GetCharCount(UTF8Encoding* self, unsigned char* bytes, size_t coun
749750
return 0; \
750751
}
751752

752-
static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
753+
static size_t GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount, CHAR16_T* chars, size_t charCount)
753754
{
754755
assert(chars != NULL);
755756
assert(byteCount >= 0);
@@ -982,8 +983,8 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
982983
*pTarget = (CHAR16_T)ch;
983984
ENSURE_BUFFER_INC
984985

985-
int availableChars = pAllocatedBufferEnd - pTarget;
986-
int availableBytes = pEnd - pSrc;
986+
size_t availableChars = (size_t)(pAllocatedBufferEnd - pTarget);
987+
size_t availableBytes = (size_t)(pEnd - pSrc);
987988

988989
// don't fall into the fast decoding loop if we don't have enough bytes
989990
// Test for availableChars is done because pStop would be <= pTarget.
@@ -1289,7 +1290,7 @@ static int GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCount,
12891290
return 0;
12901291
}
12911292

1292-
return pTarget - chars;
1293+
return (size_t)(pTarget - chars);
12931294
}
12941295

12951296
static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, unsigned char* bytes, size_t byteCount)
@@ -1510,8 +1511,8 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
15101511
if (fallbackUsed && (ch = EncoderReplacementFallbackBuffer_InternalGetNextChar(&self->buffer.encoder)) != 0)
15111512
goto ProcessChar;
15121513

1513-
int availableChars = pEnd - pSrc;
1514-
int availableBytes = pAllocatedBufferEnd - pTarget;
1514+
size_t availableChars = (size_t)(pEnd - pSrc);
1515+
size_t availableBytes = (size_t)(pAllocatedBufferEnd - pTarget);
15151516

15161517
// don't fall into the fast decoding loop if we don't have enough characters
15171518
// Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
@@ -1709,7 +1710,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
17091710
return 0;
17101711
}
17111712

1712-
return (int)(pTarget - bytes);
1713+
return (size_t)(pTarget - bytes);
17131714
}
17141715

17151716
static size_t GetByteCount(UTF8Encoding* self, CHAR16_T *chars, size_t count)
@@ -1889,7 +1890,7 @@ static size_t GetByteCount(UTF8Encoding* self, CHAR16_T *chars, size_t count)
18891890
goto ProcessChar;
18901891
}
18911892

1892-
int availableChars = pEnd - pSrc;
1893+
size_t availableChars = (size_t)(pEnd - pSrc);
18931894

18941895
// don't fall into the fast decoding loop if we don't have enough characters
18951896
if (availableChars <= 13)

0 commit comments

Comments
 (0)