Skip to content

Commit 8d70b0a

Browse files
committed
LibWeb: Implement TextDecoderStream
1 parent cb597ca commit 8d70b0a

File tree

10 files changed

+337
-25
lines changed

10 files changed

+337
-25
lines changed

Libraries/LibWeb/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ set(SOURCES
356356
Encoding/TextDecoderCommon.cpp
357357
Encoding/TextEncoderCommon.cpp
358358
Encoding/TextEncoderStream.cpp
359+
Encoding/TextDecoderStream.cpp
359360
EncryptedMediaExtensions/Algorithms.cpp
360361
EncryptedMediaExtensions/MediaKeySystemAccess.cpp
361362
EncryptedMediaExtensions/NavigatorEncryptedMediaExtensionsPartial.cpp

Libraries/LibWeb/Encoding/TextDecoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
#include <LibJS/Forward.h>
1212
#include <LibTextCodec/Decoder.h>
1313
#include <LibWeb/Bindings/PlatformObject.h>
14+
#include <LibWeb/Encoding/TextDecoderCommon.h>
1415
#include <LibWeb/Forward.h>
1516
#include <LibWeb/WebIDL/ExceptionOr.h>
16-
#include <LibWeb/Encoding/TextDecoderCommon.h>
1717

1818
namespace Web::Encoding {
1919

Libraries/LibWeb/Encoding/TextDecoder.idl

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,5 @@
11
#import <Encoding/TextDecoderCommon.idl>
22

3-
// https://encoding.spec.whatwg.org/#textdecoderoptions
4-
dictionary TextDecoderOptions {
5-
boolean fatal = false;
6-
boolean ignoreBOM = false;
7-
};
8-
9-
// https://encoding.spec.whatwg.org/#textdecodeoptions
10-
dictionary TextDecodeOptions {
11-
boolean stream = false;
12-
};
13-
143
// https://encoding.spec.whatwg.org/#textdecoder
154
[Exposed=*]
165
interface TextDecoder {

Libraries/LibWeb/Encoding/TextDecoderCommon.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
/*
2-
* Copyright (c) 2024, Tim Flynn <trflynn89@ladybird.org>
3-
*
4-
* SPDX-License-Identifier: BSD-2-Clause
5-
*/
6-
71
#include <LibWeb/Encoding/TextDecoderCommon.h>
82

93
namespace Web::Encoding {

Libraries/LibWeb/Encoding/TextDecoderCommon.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
/*
2-
* Copyright (c) 2024, Tim Flynn <trflynn89@ladybird.org>
3-
*
4-
* SPDX-License-Identifier: BSD-2-Clause
5-
*/
6-
71
#pragma once
82

93
#include <AK/FlyString.h>
@@ -40,7 +34,7 @@ class TextDecoderCommonMixin {
4034
, m_ignore_bom(ignore_bom)
4135
{
4236
}
43-
37+
4438
FlyString m_encoding;
4539
bool m_fatal { false };
4640
bool m_ignore_bom { false };

Libraries/LibWeb/Encoding/TextDecoderCommon.idl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
// https://encoding.spec.whatwg.org/#textdecoderoptions
2+
dictionary TextDecoderOptions {
3+
boolean fatal = false;
4+
boolean ignoreBOM = false;
5+
};
6+
7+
// https://encoding.spec.whatwg.org/#textdecodeoptions
8+
dictionary TextDecodeOptions {
9+
boolean stream = false;
10+
};
11+
112
// https://encoding.spec.whatwg.org/#textdecodercommon
213
interface mixin TextDecoderCommon {
314
readonly attribute DOMString encoding;
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#include <AK/FlyString.h>
2+
#include <LibJS/Runtime/ArrayBuffer.h>
3+
#include <LibJS/Runtime/Realm.h>
4+
#include <LibJS/Runtime/TypedArray.h>
5+
#include <LibTextCodec/Decoder.h>
6+
#include <LibWeb/Bindings/ExceptionOrUtils.h>
7+
#include <LibWeb/Bindings/Intrinsics.h>
8+
#include <LibWeb/Bindings/TextDecoderStreamPrototype.h>
9+
#include <LibWeb/Encoding/TextDecoderStream.h>
10+
#include <LibWeb/Streams/TransformStream.h>
11+
#include <LibWeb/Streams/TransformStreamOperations.h>
12+
#include <LibWeb/WebIDL/AbstractOperations.h>
13+
#include <LibWeb/WebIDL/Buffers.h>
14+
#include <LibWeb/WebIDL/Promise.h>
15+
16+
namespace Web::Encoding {
17+
18+
GC_DEFINE_ALLOCATOR(TextDecoderStream);
19+
20+
// https://encoding.spec.whatwg.org/#dom-textdecoderstream
21+
WebIDL::ExceptionOr<GC::Ref<TextDecoderStream>> TextDecoderStream::construct_impl(JS::Realm& realm, Optional<FlyString> label, Optional<TextDecoderOptions> const& options)
22+
{
23+
auto& vm = realm.vm();
24+
25+
// 1. Let encoding be the result of getting an encoding from label.
26+
// If label is not given, let encoding be UTF-8.
27+
FlyString encoding_label = label.value_or("utf-8"_fly_string);
28+
auto encoding = TextCodec::get_standardized_encoding(encoding_label);
29+
30+
// 2. If encoding is failure or replacement, then throw a RangeError.
31+
if (!encoding.has_value() || encoding->equals_ignoring_ascii_case("replacement"sv))
32+
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::RangeError, TRY_OR_THROW_OOM(vm, String::formatted("Invalid encoding {}", encoding_label)) };
33+
34+
// 3. Set this's encoding to encoding.
35+
// https://encoding.spec.whatwg.org/#dom-textdecoder-encoding
36+
// The encoding getter steps are to return this's encoding's name, ASCII lowercased.
37+
auto lowercase_encoding_name = encoding.value().to_ascii_lowercase_string();
38+
39+
// 4. If options["fatal"] is true, then set this's error mode to "fatal".
40+
auto fatal = options.value_or({}).fatal;
41+
42+
// 5. Set this's ignore BOM to options["ignoreBOM"].
43+
auto ignore_bom = options.value_or({}).ignore_bom;
44+
45+
// 6. Set this's decoder to a new decoder for this's encoding, and set this's I/O queue to a new I/O queue.
46+
auto decoder = TextCodec::decoder_for_exact_name(encoding.value());
47+
VERIFY(decoder.has_value());
48+
49+
// NOTE: We do these steps first so that we may store it as nonnull in the GenericTransformStream.
50+
// 9. Let transformStream be a new TransformStream.
51+
auto transform_stream = realm.create<Streams::TransformStream>(realm);
52+
53+
// 11. Set this's transform to transformStream.
54+
auto stream = realm.create<TextDecoderStream>(realm, *decoder, lowercase_encoding_name, fatal, ignore_bom, transform_stream);
55+
56+
// 7. Let transformAlgorithm be an algorithm which takes a chunk argument and runs the decode and enqueue a chunk
57+
// algorithm with this and chunk.
58+
auto transform_algorithm = GC::create_function(realm.heap(), [stream](JS::Value chunk) -> GC::Ref<WebIDL::Promise> {
59+
auto& realm = stream->realm();
60+
auto& vm = realm.vm();
61+
62+
if (auto result = stream->decode_and_enqueue_chunk(chunk); result.is_error()) {
63+
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
64+
return WebIDL::create_rejected_promise(realm, throw_completion.release_value());
65+
}
66+
67+
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
68+
});
69+
70+
// 8. Let flushAlgorithm be an algorithm which runs the flush and enqueue algorithm with this.
71+
auto flush_algorithm = GC::create_function(realm.heap(), [stream]() -> GC::Ref<WebIDL::Promise> {
72+
auto& realm = stream->realm();
73+
auto& vm = realm.vm();
74+
75+
if (auto result = stream->flush_and_enqueue(); result.is_error()) {
76+
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
77+
return WebIDL::create_rejected_promise(realm, throw_completion.release_value());
78+
}
79+
80+
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
81+
});
82+
83+
// 10. Set up transformStream with transformAlgorithm set to transformAlgorithm and flushAlgorithm set to flushAlgorithm.
84+
transform_stream->set_up(transform_algorithm, flush_algorithm);
85+
86+
return stream;
87+
}
88+
89+
TextDecoderStream::TextDecoderStream(JS::Realm& realm, TextCodec::Decoder& decoder, FlyString encoding, bool fatal, bool ignore_bom, GC::Ref<Streams::TransformStream> transform)
90+
: Bindings::PlatformObject(realm)
91+
, Streams::GenericTransformStreamMixin(transform)
92+
, Encoding::TextDecoderCommonMixin(move(encoding), fatal, ignore_bom)
93+
, m_decoder(decoder)
94+
{
95+
}
96+
97+
TextDecoderStream::~TextDecoderStream() = default;
98+
99+
void TextDecoderStream::initialize(JS::Realm& realm)
100+
{
101+
WEB_SET_PROTOTYPE_FOR_INTERFACE(TextDecoderStream);
102+
Base::initialize(realm);
103+
}
104+
105+
void TextDecoderStream::visit_edges(JS::Cell::Visitor& visitor)
106+
{
107+
Base::visit_edges(visitor);
108+
Streams::GenericTransformStreamMixin::visit_edges(visitor);
109+
}
110+
111+
// https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk
112+
WebIDL::ExceptionOr<void> TextDecoderStream::decode_and_enqueue_chunk(JS::Value chunk)
113+
{
114+
auto& realm = this->realm();
115+
auto& vm = this->vm();
116+
117+
// 1. Let bufferSource be the result of converting chunk to an AllowSharedBufferSource.
118+
// Note: We convert to a BufferSource since we need to copy the bytes anyway
119+
if (!chunk.is_object())
120+
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Chunk is not an object"sv };
121+
122+
auto& chunk_object = chunk.as_object();
123+
if (!WebIDL::is_buffer_source_type(chunk))
124+
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Chunk is not a BufferSource"sv };
125+
126+
// 2. Push a copy of bufferSource to decoder's I/O queue.
127+
// Note: Implementations are strongly encouraged to use an implementation strategy that avoids this copy.
128+
// When doing so they will have to make sure that changes to bufferSource do not affect future
129+
// iterations of the decode-and-enqueue-a-chunk and flush-and-enqueue algorithms.
130+
auto data_buffer_or_error = WebIDL::get_buffer_source_copy(chunk_object);
131+
if (data_buffer_or_error.is_error())
132+
return WebIDL::OperationError::create(realm, "Failed to copy bytes from ArrayBuffer"_utf16);
133+
auto& data_buffer = data_buffer_or_error.value();
134+
m_io_queue.append(move(data_buffer));
135+
136+
// 3. Let output be the I/O queue of scalar values « end-of-queue ».
137+
Vector<u32> output;
138+
139+
// 4. While true:
140+
while (true) {
141+
// 1. Let item be the result of reading from decoder's I/O queue.
142+
if (m_io_queue.is_empty()) {
143+
// 2. If item is end-of-queue, then:
144+
// 1. Let outputChunk be the result of running serialize I/O queue with decoder and output.
145+
auto output_chunk = TRY(serialize_io_queue(output));
146+
147+
// 2. If outputChunk is not the empty string, then enqueue outputChunk in decoder's transform.
148+
if (!output_chunk.is_empty()) {
149+
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), JS::PrimitiveString::create(vm, output_chunk)));
150+
}
151+
152+
// 3. Return.
153+
return {};
154+
}
155+
156+
auto bytes_to_process = m_io_queue.take_first();
157+
158+
// 3. Otherwise:
159+
// 1. Let result be the result of processing an item with item, decoder's decoder, decoder's I/O queue,
160+
// output, and decoder's error mode.
161+
auto result = TRY_OR_THROW_OOM(vm, m_decoder.to_utf8({ bytes_to_process.data(), bytes_to_process.size() }));
162+
163+
// 2. If result is finished, then:
164+
// Note: In our implementation, we process the entire chunk at once, so we check if there are errors
165+
if (m_fatal && result.contains(0xfffd)) {
166+
// If decoder's error mode is "fatal", then throw a TypeError.
167+
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Decoding failed"sv };
168+
}
169+
170+
// Add decoded characters to output
171+
for (auto code_point : result.code_points()) {
172+
output.append(code_point);
173+
}
174+
175+
// Since we processed the entire chunk, we're done with this iteration
176+
// Break to check if there are more chunks in the queue
177+
if (m_io_queue.is_empty()) {
178+
// Serialize and enqueue what we have
179+
auto output_chunk = TRY(serialize_io_queue(output));
180+
if (!output_chunk.is_empty()) {
181+
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), JS::PrimitiveString::create(vm, output_chunk)));
182+
}
183+
return {};
184+
}
185+
}
186+
}
187+
188+
// https://encoding.spec.whatwg.org/#flush-and-enqueue
189+
WebIDL::ExceptionOr<void> TextDecoderStream::flush_and_enqueue()
190+
{
191+
auto& vm = this->vm();
192+
193+
// 1. Let output be the I/O queue of scalar values « end-of-queue ».
194+
Vector<u32> output;
195+
196+
// 2. While true:
197+
while (!m_io_queue.is_empty()) {
198+
// 1. Let item be the result of reading from decoder's I/O queue.
199+
auto bytes_to_process = m_io_queue.take_first();
200+
201+
// 2. Let result be the result of processing an item with item, decoder's decoder, decoder's I/O queue,
202+
// output, and decoder's error mode.
203+
auto result = TRY_OR_THROW_OOM(vm, m_decoder.to_utf8({ bytes_to_process.data(), bytes_to_process.size() }));
204+
205+
// 3. If result is finished, then:
206+
// Note: In our implementation, we check for errors
207+
if (m_fatal && result.contains(0xfffd)) {
208+
// If decoder's error mode is "fatal", then throw a TypeError.
209+
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Decoding failed"sv };
210+
}
211+
212+
// Add decoded characters to output
213+
for (auto code_point : result.code_points()) {
214+
output.append(code_point);
215+
}
216+
}
217+
218+
// When queue is empty, serialize and enqueue final output
219+
// 1. Let outputChunk be the result of running serialize I/O queue with decoder and output.
220+
auto output_chunk = TRY(serialize_io_queue(output));
221+
222+
// 2. If outputChunk is not the empty string, then enqueue outputChunk in decoder's transform.
223+
if (!output_chunk.is_empty()) {
224+
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), JS::PrimitiveString::create(vm, output_chunk)));
225+
}
226+
227+
// 3. Return.
228+
return {};
229+
}
230+
231+
// https://encoding.spec.whatwg.org/#serialize-io-queue
232+
WebIDL::ExceptionOr<String> TextDecoderStream::serialize_io_queue(Vector<u32> const& queue)
233+
{
234+
auto& vm = this->vm();
235+
236+
// 1. Let output be the empty string.
237+
StringBuilder output;
238+
239+
// 2. While true:
240+
for (size_t i = 0; i < queue.size(); ++i) {
241+
auto item = queue[i];
242+
243+
// Skip BOM handling for UTF-8 and UTF-16
244+
// Note: do not flush is BOM seen flag in spec
245+
if (!m_do_not_flush) {
246+
// For UTF-8, UTF-16BE, UTF-16LE encodings
247+
if ((m_encoding == "utf-8"_fly_string || m_encoding == "utf-16be"_fly_string || m_encoding == "utf-16le"_fly_string)) {
248+
// If ignore BOM is false and item is U+FEFF, then:
249+
if (!m_ignore_bom && item == 0xFEFF) {
250+
// Set decoder's do not flush to true.
251+
m_do_not_flush = true;
252+
// Continue to next item (skip this BOM)
253+
continue;
254+
}
255+
}
256+
// Set do not flush to true after first character
257+
m_do_not_flush = true;
258+
}
259+
260+
// 3. Otherwise, append item to output.
261+
TRY_OR_THROW_OOM(vm, output.try_append_code_point(item));
262+
}
263+
264+
// 4. Return output.
265+
return TRY_OR_THROW_OOM(vm, output.to_string());
266+
}
267+
268+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#pragma once
2+
3+
#include <AK/Forward.h>
4+
#include <AK/NonnullRefPtr.h>
5+
#include <LibJS/Forward.h>
6+
#include <LibTextCodec/Decoder.h>
7+
#include <LibWeb/Bindings/PlatformObject.h>
8+
#include <LibWeb/Encoding/TextDecoderCommon.h>
9+
#include <LibWeb/Forward.h>
10+
#include <LibWeb/Streams/GenericTransformStream.h>
11+
#include <LibWeb/WebIDL/ExceptionOr.h>
12+
13+
namespace Web::Encoding {
14+
15+
// https://encoding.spec.whatwg.org/#textdecoderstream
16+
class TextDecoderStream final
17+
: public Bindings::PlatformObject
18+
, public Streams::GenericTransformStreamMixin
19+
, public Encoding::TextDecoderCommonMixin {
20+
WEB_PLATFORM_OBJECT(TextDecoderStream, Bindings::PlatformObject);
21+
GC_DECLARE_ALLOCATOR(TextDecoderStream);
22+
23+
public:
24+
static WebIDL::ExceptionOr<GC::Ref<TextDecoderStream>> construct_impl(JS::Realm&, Optional<FlyString> label = {}, Optional<TextDecoderOptions> const& options = {});
25+
26+
virtual ~TextDecoderStream() override;
27+
28+
WebIDL::ExceptionOr<void> decode_and_enqueue_chunk(JS::Value chunk);
29+
WebIDL::ExceptionOr<void> flush_and_enqueue();
30+
31+
private:
32+
TextDecoderStream(JS::Realm&, TextCodec::Decoder&, FlyString encoding, bool fatal, bool ignore_bom, GC::Ref<Streams::TransformStream> transform);
33+
34+
virtual void initialize(JS::Realm&) override;
35+
virtual void visit_edges(JS::Cell::Visitor&) override;
36+
37+
WebIDL::ExceptionOr<String> serialize_io_queue(Vector<u32> const& queue);
38+
39+
TextCodec::Decoder& m_decoder;
40+
Vector<ByteBuffer> m_io_queue;
41+
bool m_do_not_flush { false };
42+
};
43+
44+
}

0 commit comments

Comments
 (0)