@@ -245,6 +245,63 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
245245 ASSERT_EQ (static_cast <int >(sc.getSourceIndex ()), 4 );
246246}
247247
248+ TEST (LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
249+ // if an invalid character exists in the source string but we don't have space
250+ // to write it, we should return a "stop converting" error rather than an
251+ // invalid character error
252+
253+ // first 4 bytes are clown emoji (🤡)
254+ // next 3 form an invalid character
255+ const char *src1 = " \xF0\x9F\xA4\xA1\x90\x88\x30 " ;
256+ LIBC_NAMESPACE::internal::mbstate ps1;
257+ LIBC_NAMESPACE::internal::StringConverter<char8_t > sc1 (
258+ reinterpret_cast <const char8_t *>(src1), &ps1, 1 );
259+
260+ auto res1 = sc1.popUTF32 ();
261+ ASSERT_TRUE (res1.has_value ());
262+ ASSERT_EQ (static_cast <int >(res1.value ()), 0x1f921 );
263+ ASSERT_EQ (static_cast <int >(sc1.getSourceIndex ()), 4 );
264+
265+ res1 = sc1.popUTF32 ();
266+ ASSERT_FALSE (res1.has_value ());
267+ // no space to write error NOT invalid character error (EILSEQ)
268+ ASSERT_EQ (static_cast <int >(res1.error ()), -1 );
269+ ASSERT_EQ (static_cast <int >(sc1.getSourceIndex ()), 4 );
270+
271+ const wchar_t src2[] = {
272+ static_cast <wchar_t >(0x1f921 ), static_cast <wchar_t >(0xffffff ),
273+ static_cast <wchar_t >(0x0 )}; // clown emoji, invalid utf32
274+ LIBC_NAMESPACE::internal::mbstate ps2;
275+ LIBC_NAMESPACE::internal::StringConverter<char32_t > sc2 (
276+ reinterpret_cast <const char32_t *>(src2), &ps2, 4 );
277+
278+ auto res2 = sc2.popUTF8 ();
279+ ASSERT_TRUE (res2.has_value ());
280+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xF0 );
281+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
282+
283+ res2 = sc2.popUTF8 ();
284+ ASSERT_TRUE (res2.has_value ());
285+ ASSERT_EQ (static_cast <int >(res2.value ()), 0x9F );
286+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
287+
288+ res2 = sc2.popUTF8 ();
289+ ASSERT_TRUE (res2.has_value ());
290+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xA4 );
291+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
292+
293+ res2 = sc2.popUTF8 ();
294+ ASSERT_TRUE (res2.has_value ());
295+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xA1 );
296+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
297+
298+ res2 = sc2.popUTF8 ();
299+ ASSERT_FALSE (res2.has_value ());
300+ // no space to write error NOT invalid character error (EILSEQ)
301+ ASSERT_EQ (static_cast <int >(res2.error ()), -1 );
302+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
303+ }
304+
248305TEST (LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
249306 /*
250307 We do NOT test partially popping a character and expecting the next
0 commit comments