Skip to content

Remove mutability from parser/tokenizer APIs. #548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Install stable toolchain
run: |
rustup set profile minimal
rustup override set 1.61.0
rustup override set 1.65.0

- run: cargo check --lib --all-features

Expand Down
4 changes: 2 additions & 2 deletions html5ever/benches/html5ever.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct Sink;
impl TokenSink for Sink {
type Handle = ();

fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
Expand Down Expand Up @@ -53,7 +53,7 @@ fn run_bench(c: &mut Criterion, name: &str) {

c.bench_function(&test_name, move |b| {
b.iter(|| {
let mut tok = Tokenizer::new(Sink, Default::default());
let tok = Tokenizer::new(Sink, Default::default());
let buffer = BufferQueue::default();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
Expand Down
34 changes: 17 additions & 17 deletions html5ever/examples/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
};

parse_document(sink, Default::default())
Expand All @@ -41,7 +41,7 @@ type Link<'arena> = Cell<Option<Ref<'arena>>>;
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
quirks_mode: Cell<QuirksMode>,
}

/// DOM node which contains links to other nodes in the tree.
Expand Down Expand Up @@ -188,14 +188,14 @@ impl<'arena> TreeSink for Sink<'arena> {
self.document
}

fn parse_error(&mut self, _: Cow<'static, str>) {}
fn parse_error(&self, _: Cow<'static, str>) {}

fn get_document(&mut self) -> Ref<'arena> {
fn get_document(&self) -> Ref<'arena> {
self.document
}

fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.quirks_mode = mode;
fn set_quirks_mode(&self, mode: QuirksMode) {
self.quirks_mode.set(mode);
}

fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
Expand All @@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}
}

fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(contents),
..
Expand All @@ -234,7 +234,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn create_element(
&mut self,
&self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
Expand All @@ -251,26 +251,26 @@ impl<'arena> TreeSink for Sink<'arena> {
})
}

fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
fn create_comment(&self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}

fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}

fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append(&self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}

fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
Expand All @@ -279,7 +279,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_based_on_parent_node(
&mut self,
&self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
Expand All @@ -292,7 +292,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_doctype_to_document(
&mut self,
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
Expand All @@ -304,7 +304,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}))
}

fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
Expand All @@ -322,11 +322,11 @@ impl<'arena> TreeSink for Sink<'arena> {
);
}

fn remove_from_parent(&mut self, target: &Ref<'arena>) {
fn remove_from_parent(&self, target: &Ref<'arena>) {
target.detach()
}

fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
Expand Down
9 changes: 5 additions & 4 deletions html5ever/examples/noop-tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@

extern crate html5ever;

use std::cell::RefCell;
use std::io;

use html5ever::tendril::*;
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};

/// In our case, our sink only contains a tokens vector
struct Sink(Vec<Token>);
struct Sink(RefCell<Vec<Token>>);

impl TokenSink for Sink {
type Handle = ();

/// Each processed token will be handled by this method
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.push(token);
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.borrow_mut().push(token);
TokenSinkResult::Continue
}
}
Expand All @@ -39,7 +40,7 @@ fn main() {
let input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());

let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default());
let _ = tok.feed(&input);
assert!(input.is_empty());
tok.end();
Expand Down
64 changes: 38 additions & 26 deletions html5ever/examples/noop-tree-builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
extern crate html5ever;

use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::io;

Expand All @@ -20,14 +21,14 @@ use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, QualName};

struct Sink {
next_id: usize,
names: HashMap<usize, QualName>,
next_id: Cell<usize>,
names: RefCell<HashMap<usize, &'static QualName>>,
}

impl Sink {
fn get_id(&mut self) -> usize {
let id = self.next_id;
self.next_id += 2;
fn get_id(&self) -> usize {
let id = self.next_id.get();
self.next_id.set(id + 2);
id
}
}
Expand All @@ -43,12 +44,13 @@ impl TreeSink for Sink {
self
}

fn get_document(&mut self) -> usize {
fn get_document(&self) -> usize {
0
}

fn get_template_contents(&mut self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
fn get_template_contents(&self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) =
self.names.borrow().get(target).map(|n| n.expanded())
{
target + 1
} else {
Expand All @@ -61,53 +63,63 @@ impl TreeSink for Sink {
}

fn elem_name(&self, target: &usize) -> ExpandedName {
self.names.get(target).expect("not an element").expanded()
self.names
.borrow()
.get(target)
.expect("not an element")
.expanded()
}

fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
fn create_element(&self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
let id = self.get_id();
self.names.insert(id, name);
// N.B. We intentionally leak memory here to minimize the implementation complexity
// of this example code. A real implementation would either want to use a real
// real DOM tree implentation, or else use an arena as the backing store for
// memory used by the parser.
self.names
.borrow_mut()
.insert(id, Box::leak(Box::new(name)));
id
}

fn create_comment(&mut self, _text: StrTendril) -> usize {
fn create_comment(&self, _text: StrTendril) -> usize {
self.get_id()
}

#[allow(unused_variables)]
fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize {
unimplemented!()
}

fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText<usize>) {}

fn append_based_on_parent_node(
&mut self,
&self,
_element: &usize,
_prev_element: &usize,
_new_node: NodeOrText<usize>,
) {
}

fn parse_error(&mut self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {}
fn parse_error(&self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&self, _mode: QuirksMode) {}
fn append(&self, _parent: &usize, _child: NodeOrText<usize>) {}

fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.contains_key(target), "not an element");
fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.borrow().contains_key(target), "not an element");
}
fn remove_from_parent(&mut self, _target: &usize) {}
fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&mut self, _node: &usize) {}
fn remove_from_parent(&self, _target: &usize) {}
fn reparent_children(&self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&self, _node: &usize) {}
}

/// In this example we implement the TreeSink trait which takes each parsed elements and insert
/// it to a hashmap, while each element is given a numeric id.
fn main() {
let sink = Sink {
next_id: 1,
names: HashMap::new(),
next_id: Cell::new(1),
names: RefCell::new(HashMap::new()),
};

// Read HTML from the standard input and parse it
Expand Down
Loading