Skip to content

Commit 42db73a

Browse files
authored
[Strings] Fuzz and interpret all relevant StringNew methods (#6526)
This adds fuzzing for string.new_wtf16_array and string.from_code_point. The latter was also missing interpreter support, which this adds.
1 parent 94ddae0 commit 42db73a

File tree

4 files changed

+180
-41
lines changed

4 files changed

+180
-41
lines changed

src/tools/fuzzing.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,8 @@ class TranslateToFuzzReader {
315315
Expression* makeBasicRef(Type type);
316316
Expression* makeCompoundRef(Type type);
317317

318+
Expression* makeString();
319+
318320
// Similar to makeBasic/CompoundRef, but indicates that this value will be
319321
// used in a place that will trap on null. For example, the reference of a
320322
// struct.get or array.set would use this.
@@ -378,6 +380,7 @@ class TranslateToFuzzReader {
378380
Type getLoggableType();
379381
bool isLoggableType(Type type);
380382
Nullability getNullability();
383+
Mutability getMutability();
381384
Nullability getSubType(Nullability nullability);
382385
HeapType getSubType(HeapType type);
383386
Type getSubType(Type type);

src/tools/fuzzing/fuzzing.cpp

Lines changed: 80 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,47 +2605,7 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) {
26052605
return null;
26062606
}
26072607
case HeapType::string: {
2608-
// Construct an interesting WTF-8 string from parts.
2609-
std::stringstream wtf8;
2610-
bool lastWasLeadingSurrogate = false;
2611-
for (size_t i = 0, end = upTo(4); i < end; ++i) {
2612-
switch (upTo(6)) {
2613-
case 0:
2614-
// A simple ascii string.
2615-
wtf8 << std::to_string(upTo(1024));
2616-
break;
2617-
case 1:
2618-
// '£'
2619-
wtf8 << "\xC2\xA3";
2620-
break;
2621-
case 2:
2622-
// '€'
2623-
wtf8 << "\xE2\x82\xAC";
2624-
break;
2625-
case 3:
2626-
// '𐍈'
2627-
wtf8 << "\xF0\x90\x8D\x88";
2628-
break;
2629-
case 4:
2630-
// The leading surrogate in '𐍈'
2631-
wtf8 << "\xED\xA0\x80";
2632-
lastWasLeadingSurrogate = true;
2633-
continue;
2634-
case 5:
2635-
if (lastWasLeadingSurrogate) {
2636-
// Avoid invalid WTF-8.
2637-
continue;
2638-
}
2639-
// The trailing surrogate in '𐍈'
2640-
wtf8 << "\xED\xBD\x88";
2641-
break;
2642-
}
2643-
lastWasLeadingSurrogate = false;
2644-
}
2645-
std::stringstream wtf16;
2646-
// TODO: Use wtf16.view() once we have C++20.
2647-
String::convertWTF8ToWTF16(wtf16, wtf8.str());
2648-
return builder.makeStringConst(wtf16.str());
2608+
return makeString();
26492609
}
26502610
case HeapType::stringview_wtf16:
26512611
// We fully support wtf16 strings.
@@ -2760,6 +2720,81 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) {
27602720
}
27612721
}
27622722

2723+
Expression* TranslateToFuzzReader::makeString() {
2724+
// Fuzz with JS-style strings.
2725+
auto mutability = getMutability();
2726+
auto arrayHeapType =
2727+
HeapType(Array(Field(Field::PackedType::i16, mutability)));
2728+
auto nullability = getNullability();
2729+
auto arrayType = Type(arrayHeapType, nullability);
2730+
switch (upTo(3)) {
2731+
case 0: {
2732+
// Make a string from an array. We can only do this in functions.
2733+
if (funcContext) {
2734+
auto array = make(arrayType);
2735+
auto* start = make(Type::i32);
2736+
auto* end = make(Type::i32);
2737+
return builder.makeStringNew(
2738+
StringNewWTF16Array, array, start, end, false);
2739+
}
2740+
[[fallthrough]];
2741+
}
2742+
case 1: {
2743+
// Make a string from a code point. We can only do this in functions.
2744+
if (funcContext) {
2745+
auto codePoint = make(Type::i32);
2746+
return builder.makeStringNew(
2747+
StringNewFromCodePoint, codePoint, nullptr, false);
2748+
}
2749+
[[fallthrough]];
2750+
}
2751+
case 2: {
2752+
// Construct an interesting WTF-8 string from parts and use string.const.
2753+
std::stringstream wtf8;
2754+
bool lastWasLeadingSurrogate = false;
2755+
for (size_t i = 0, end = upTo(4); i < end; ++i) {
2756+
switch (upTo(6)) {
2757+
case 0:
2758+
// A simple ascii string.
2759+
wtf8 << std::to_string(upTo(1024));
2760+
break;
2761+
case 1:
2762+
// '£'
2763+
wtf8 << "\xC2\xA3";
2764+
break;
2765+
case 2:
2766+
// '€'
2767+
wtf8 << "\xE2\x82\xAC";
2768+
break;
2769+
case 3:
2770+
// '𐍈'
2771+
wtf8 << "\xF0\x90\x8D\x88";
2772+
break;
2773+
case 4:
2774+
// The leading surrogate in '𐍈'
2775+
wtf8 << "\xED\xA0\x80";
2776+
lastWasLeadingSurrogate = true;
2777+
continue;
2778+
case 5:
2779+
if (lastWasLeadingSurrogate) {
2780+
// Avoid invalid WTF-8.
2781+
continue;
2782+
}
2783+
// The trailing surrogate in '𐍈'
2784+
wtf8 << "\xED\xBD\x88";
2785+
break;
2786+
}
2787+
lastWasLeadingSurrogate = false;
2788+
}
2789+
std::stringstream wtf16;
2790+
// TODO: Use wtf16.view() once we have C++20.
2791+
String::convertWTF8ToWTF16(wtf16, wtf8.str());
2792+
return builder.makeStringConst(wtf16.str());
2793+
}
2794+
}
2795+
WASM_UNREACHABLE("bad switch");
2796+
}
2797+
27632798
Expression* TranslateToFuzzReader::makeTrappingRefUse(HeapType type) {
27642799
auto percent = upTo(100);
27652800
// Only give a low probability to emit a nullable reference.
@@ -4071,6 +4106,10 @@ Nullability TranslateToFuzzReader::getNullability() {
40714106
return Nullable;
40724107
}
40734108

4109+
Mutability TranslateToFuzzReader::getMutability() {
4110+
return oneIn(2) ? Mutable : Immutable;
4111+
}
4112+
40744113
Nullability TranslateToFuzzReader::getSubType(Nullability nullability) {
40754114
if (nullability == NonNullable) {
40764115
return NonNullable;

src/wasm-interpreter.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "support/bits.h"
3434
#include "support/safe_integer.h"
3535
#include "support/stdckdint.h"
36+
#include "support/string.h"
3637
#include "wasm-builder.h"
3738
#include "wasm-traversal.h"
3839
#include "wasm.h"
@@ -1898,6 +1899,16 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
18981899
}
18991900
return makeGCData(contents, curr->type);
19001901
}
1902+
case StringNewFromCodePoint: {
1903+
uint32_t codePoint = ptr.getSingleValue().getUnsigned();
1904+
if (codePoint > 0x10FFFF) {
1905+
trap("invalid code point");
1906+
}
1907+
std::stringstream wtf16;
1908+
String::writeWTF16CodePoint(wtf16, codePoint);
1909+
std::string str = wtf16.str();
1910+
return Literal(str);
1911+
}
19011912
default:
19021913
// TODO: others
19031914
return Flow(NONCONSTANT_FLOW);

test/lit/exec/strings.wast

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,64 @@
414414
;; Concatenating these surrogates creates '𐍈'.
415415
(string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88"))
416416
)
417+
418+
;; CHECK: [fuzz-exec] calling string.from_code_point
419+
;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")
420+
(func $string.from_code_point (export "string.from_code_point") (result stringref)
421+
(string.from_code_point
422+
(i32.const 65)
423+
)
424+
)
425+
426+
;; CHECK: [fuzz-exec] calling unsigned_code_point
427+
;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")
428+
(func $unsigned_code_point (export "unsigned_code_point") (result stringref)
429+
(string.from_code_point
430+
;; This must be interpreted as unsigned, that is, in the escaped output
431+
;; the top byte is 0.
432+
(i32.const 147)
433+
)
434+
)
435+
436+
;; CHECK: [fuzz-exec] calling weird_code_point
437+
;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")
438+
(func $weird_code_point (export "weird_code_point") (result stringref)
439+
(string.from_code_point
440+
(i32.const 0x3e8)
441+
)
442+
)
443+
444+
;; CHECK: [fuzz-exec] calling isolated_high_code_point
445+
;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")
446+
(func $isolated_high_code_point (export "isolated_high_code_point") (result stringref)
447+
(string.from_code_point
448+
(i32.const 0xD800)
449+
)
450+
)
451+
452+
;; CHECK: [fuzz-exec] calling isolated_low_code_point
453+
;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")
454+
(func $isolated_low_code_point (export "isolated_low_code_point") (result stringref)
455+
(string.from_code_point
456+
(i32.const 0xDC00)
457+
)
458+
)
459+
460+
;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
461+
;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")
462+
(func $surrogate_pair_code_point (export "surrogate_pair_code_point") (result stringref)
463+
(string.from_code_point
464+
(i32.const 0x286c) ;; 𐍈
465+
)
466+
)
467+
468+
;; CHECK: [fuzz-exec] calling invalid_code_point
469+
;; CHECK-NEXT: [trap invalid code point]
470+
(func $invalid_code_point (export "invalid_code_point") (result stringref)
471+
(string.from_code_point
472+
(i32.const -83)
473+
)
474+
)
417475
)
418476
;; CHECK: [fuzz-exec] calling new_wtf16_array
419477
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
@@ -518,6 +576,27 @@
518576

519577
;; CHECK: [fuzz-exec] calling concat-surrogates
520578
;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48")
579+
580+
;; CHECK: [fuzz-exec] calling string.from_code_point
581+
;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")
582+
583+
;; CHECK: [fuzz-exec] calling unsigned_code_point
584+
;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")
585+
586+
;; CHECK: [fuzz-exec] calling weird_code_point
587+
;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")
588+
589+
;; CHECK: [fuzz-exec] calling isolated_high_code_point
590+
;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")
591+
592+
;; CHECK: [fuzz-exec] calling isolated_low_code_point
593+
;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")
594+
595+
;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
596+
;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")
597+
598+
;; CHECK: [fuzz-exec] calling invalid_code_point
599+
;; CHECK-NEXT: [trap invalid code point]
521600
;; CHECK-NEXT: [fuzz-exec] comparing compare.1
522601
;; CHECK-NEXT: [fuzz-exec] comparing compare.10
523602
;; CHECK-NEXT: [fuzz-exec] comparing compare.2
@@ -540,6 +619,9 @@
540619
;; CHECK-NEXT: [fuzz-exec] comparing eq.5
541620
;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit
542621
;; CHECK-NEXT: [fuzz-exec] comparing get_length
622+
;; CHECK-NEXT: [fuzz-exec] comparing invalid_code_point
623+
;; CHECK-NEXT: [fuzz-exec] comparing isolated_high_code_point
624+
;; CHECK-NEXT: [fuzz-exec] comparing isolated_low_code_point
543625
;; CHECK-NEXT: [fuzz-exec] comparing new_2
544626
;; CHECK-NEXT: [fuzz-exec] comparing new_4
545627
;; CHECK-NEXT: [fuzz-exec] comparing new_empty
@@ -551,3 +633,7 @@
551633
;; CHECK-NEXT: [fuzz-exec] comparing slice
552634
;; CHECK-NEXT: [fuzz-exec] comparing slice-big
553635
;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode
636+
;; CHECK-NEXT: [fuzz-exec] comparing string.from_code_point
637+
;; CHECK-NEXT: [fuzz-exec] comparing surrogate_pair_code_point
638+
;; CHECK-NEXT: [fuzz-exec] comparing unsigned_code_point
639+
;; CHECK-NEXT: [fuzz-exec] comparing weird_code_point

0 commit comments

Comments
 (0)