(parent.first_child.get().unwrap(), self));
@@ -147,19 +152,26 @@ impl<'arena> Sink<'arena> {
}
fn append_common(&self, child: NodeOrText[>, previous: P, append: A)
- where P: FnOnce() -> Option][>,
- A: FnOnce(Ref<'arena>),
+ where
+ P: FnOnce() -> Option][>,
+ A: FnOnce(Ref<'arena>),
{
let new_node = match child {
NodeOrText::AppendText(text) => {
// Append to an existing Text node if we have one.
- if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() {
+ if let Some(&Node {
+ data: NodeData::Text { ref contents },
+ ..
+ }) = previous()
+ {
contents.borrow_mut().push_tendril(&text);
- return
+ return;
}
- self.new_node(NodeData::Text { contents: RefCell::new(text) })
- }
- NodeOrText::AppendNode(node) => node
+ self.new_node(NodeData::Text {
+ contents: RefCell::new(text),
+ })
+ },
+ NodeOrText::AppendNode(node) => node,
};
append(new_node)
@@ -196,7 +208,11 @@ impl<'arena> TreeSink for Sink<'arena> {
}
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
- if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data {
+ if let NodeData::Element {
+ template_contents: Some(ref contents),
+ ..
+ } = target.data
+ {
contents
} else {
panic!("not a template element!")
@@ -204,14 +220,23 @@ impl<'arena> TreeSink for Sink<'arena> {
}
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
- if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data {
+ if let NodeData::Element {
+ mathml_annotation_xml_integration_point,
+ ..
+ } = target.data
+ {
mathml_annotation_xml_integration_point
} else {
panic!("not an element!")
}
}
- fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Ref<'arena> {
+ fn create_element(
+ &mut self,
+ name: QualName,
+ attrs: Vec,
+ flags: ElementFlags,
+ ) -> Ref<'arena> {
self.new_node(NodeData::Element {
name: name,
attrs: RefCell::new(attrs),
@@ -221,7 +246,6 @@ impl<'arena> TreeSink for Sink<'arena> {
None
},
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
-
})
}
@@ -230,14 +254,17 @@ impl<'arena> TreeSink for Sink<'arena> {
}
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
- self.new_node(NodeData::ProcessingInstruction { target: target, contents: data })
+ self.new_node(NodeData::ProcessingInstruction {
+ target: target,
+ contents: data,
+ })
}
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText][>) {
self.append_common(
child,
|| parent.last_child.get(),
- |new_node| parent.append(new_node)
+ |new_node| parent.append(new_node),
)
}
@@ -245,12 +272,16 @@ impl<'arena> TreeSink for Sink<'arena> {
self.append_common(
child,
|| sibling.previous_sibling.get(),
- |new_node| sibling.insert_before(new_node)
+ |new_node| sibling.insert_before(new_node),
)
}
- fn append_based_on_parent_node(&mut self, element: &Ref<'arena>,
- prev_element: &Ref<'arena>, child: NodeOrText][>) {
+ fn append_based_on_parent_node(
+ &mut self,
+ element: &Ref<'arena>,
+ prev_element: &Ref<'arena>,
+ child: NodeOrText][>,
+ ) {
if element.parent.get().is_some() {
self.append_before_sibling(element, child)
} else {
@@ -258,14 +289,16 @@ impl<'arena> TreeSink for Sink<'arena> {
}
}
- fn append_doctype_to_document(&mut self,
- name: StrTendril,
- public_id: StrTendril,
- system_id: StrTendril) {
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
self.document.append(self.new_node(NodeData::Doctype {
name: name,
public_id: public_id,
- system_id: system_id
+ system_id: system_id,
}))
}
@@ -276,10 +309,15 @@ impl<'arena> TreeSink for Sink<'arena> {
panic!("not an element")
};
- let existing_names = existing.iter().map(|e| e.name.clone()).collect::>();
- existing.extend(attrs.into_iter().filter(|attr| {
- !existing_names.contains(&attr.name)
- }));
+ let existing_names = existing
+ .iter()
+ .map(|e| e.name.clone())
+ .collect::>();
+ existing.extend(
+ attrs
+ .into_iter()
+ .filter(|attr| !existing_names.contains(&attr.name)),
+ );
}
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
diff --git a/html5ever/examples/html2html.rs b/html5ever/examples/html2html.rs
index a3eba1bb..f5340548 100644
--- a/html5ever/examples/html2html.rs
+++ b/html5ever/examples/html2html.rs
@@ -17,15 +17,14 @@
extern crate html5ever;
-use std::io::{self, Write};
use std::default::Default;
+use std::io::{self, Write};
-
-use html5ever::{parse_document, serialize};
use html5ever::driver::ParseOpts;
use html5ever::rcdom::RcDom;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
+use html5ever::{parse_document, serialize};
fn main() {
let opts = ParseOpts {
@@ -42,8 +41,11 @@ fn main() {
.unwrap();
// The validator.nu HTML2HTML always prints a doctype at the very beginning.
- io::stdout().write_all(b"\n")
- .ok().expect("writing DOCTYPE failed");
+ io::stdout()
+ .write_all(b"\n")
+ .ok()
+ .expect("writing DOCTYPE failed");
serialize(&mut io::stdout(), &dom.document, Default::default())
- .ok().expect("serialization failed");
+ .ok()
+ .expect("serialization failed");
}
diff --git a/html5ever/examples/noop-tokenize.rs b/html5ever/examples/noop-tokenize.rs
index c134f485..d6c62f1d 100644
--- a/html5ever/examples/noop-tokenize.rs
+++ b/html5ever/examples/noop-tokenize.rs
@@ -11,11 +11,11 @@
extern crate html5ever;
-use std::io;
use std::default::Default;
+use std::io;
-use html5ever::tokenizer::{BufferQueue, TokenSinkResult, TokenSink, Token, Tokenizer};
use html5ever::tendril::*;
+use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
struct Sink(Vec);
diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs
index fcc125cd..07754498 100644
--- a/html5ever/examples/noop-tree-builder.rs
+++ b/html5ever/examples/noop-tree-builder.rs
@@ -7,17 +7,18 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-#[macro_use] extern crate html5ever;
+#[macro_use]
+extern crate html5ever;
-use std::io;
-use std::default::Default;
-use std::collections::HashMap;
use std::borrow::Cow;
+use std::collections::HashMap;
+use std::default::Default;
+use std::io;
-use html5ever::{Attribute, QualName, ExpandedName};
use html5ever::parse_document;
-use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags};
use html5ever::tendril::*;
+use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+use html5ever::{Attribute, ExpandedName, QualName};
struct Sink {
next_id: usize,
@@ -35,14 +36,17 @@ impl Sink {
impl TreeSink for Sink {
type Handle = usize;
type Output = Self;
- fn finish(self) -> Self { self }
+ fn finish(self) -> Self {
+ self
+ }
fn get_document(&mut self) -> usize {
0
}
fn get_template_contents(&mut self, target: &usize) -> usize {
- if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) {
+ if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded())
+ {
target + 1
} else {
panic!("not a template element")
@@ -72,26 +76,27 @@ impl TreeSink for Sink {
unimplemented!()
}
- fn append_before_sibling(&mut self,
- _sibling: &usize,
- _new_node: NodeOrText) { }
+ fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText) {}
- fn append_based_on_parent_node(&mut self,
+ fn append_based_on_parent_node(
+ &mut self,
_element: &usize,
_prev_element: &usize,
- _new_node: NodeOrText) { }
+ _new_node: NodeOrText,
+ ) {
+ }
- fn parse_error(&mut self, _msg: Cow<'static, str>) { }
- fn set_quirks_mode(&mut self, _mode: QuirksMode) { }
- fn append(&mut self, _parent: &usize, _child: NodeOrText) { }
+ fn parse_error(&mut self, _msg: Cow<'static, str>) {}
+ fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
+ fn append(&mut self, _parent: &usize, _child: NodeOrText) {}
- fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) { }
+ fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec) {
assert!(self.names.contains_key(&target), "not an element");
}
- fn remove_from_parent(&mut self, _target: &usize) { }
- fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) { }
- fn mark_script_already_started(&mut self, _node: &usize) { }
+ fn remove_from_parent(&mut self, _target: &usize) {}
+ fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
+ fn mark_script_already_started(&mut self, _node: &usize) {}
}
fn main() {
diff --git a/html5ever/examples/print-rcdom.rs b/html5ever/examples/print-rcdom.rs
index 9869bac1..df6c86d3 100644
--- a/html5ever/examples/print-rcdom.rs
+++ b/html5ever/examples/print-rcdom.rs
@@ -7,15 +7,16 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-#[macro_use] extern crate html5ever;
+#[macro_use]
+extern crate html5ever;
+use std::default::Default;
use std::io;
use std::iter::repeat;
-use std::default::Default;
use std::string::String;
use html5ever::parse_document;
-use html5ever::rcdom::{NodeData, RcDom, Handle};
+use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::tendril::TendrilSink;
// This is not proper HTML serialization, of course.
@@ -25,19 +26,25 @@ fn walk(indent: usize, handle: Handle) {
// FIXME: don't allocate
print!("{}", repeat(" ").take(indent).collect::());
match node.data {
- NodeData::Document
- => println!("#Document"),
+ NodeData::Document => println!("#Document"),
- NodeData::Doctype { ref name, ref public_id, ref system_id }
- => println!("", name, public_id, system_id),
+ NodeData::Doctype {
+ ref name,
+ ref public_id,
+ ref system_id,
+ } => println!("", name, public_id, system_id),
- NodeData::Text { ref contents }
- => println!("#text: {}", escape_default(&contents.borrow())),
+ NodeData::Text { ref contents } => {
+ println!("#text: {}", escape_default(&contents.borrow()))
+ },
- NodeData::Comment { ref contents }
- => println!("", escape_default(contents)),
+ NodeData::Comment { ref contents } => println!("", escape_default(contents)),
- NodeData::Element { ref name, ref attrs, .. } => {
+ NodeData::Element {
+ ref name,
+ ref attrs,
+ ..
+ } => {
assert!(name.ns == ns!(html));
print!("<{}", name.local);
for attr in attrs.borrow().iter() {
@@ -45,13 +52,13 @@ fn walk(indent: usize, handle: Handle) {
print!(" {}=\"{}\"", attr.name.local, attr.value);
}
println!(">");
- }
+ },
- NodeData::ProcessingInstruction { .. } => unreachable!()
+ NodeData::ProcessingInstruction { .. } => unreachable!(),
}
for child in node.children.borrow().iter() {
- walk(indent+4, child.clone());
+ walk(indent + 4, child.clone());
}
}
diff --git a/html5ever/examples/print-tree-actions.rs b/html5ever/examples/print-tree-actions.rs
index 86cacb06..bd4f562b 100644
--- a/html5ever/examples/print-tree-actions.rs
+++ b/html5ever/examples/print-tree-actions.rs
@@ -7,17 +7,20 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-#[macro_use] extern crate html5ever;
+#[macro_use]
+extern crate html5ever;
-use std::io;
-use std::default::Default;
-use std::collections::HashMap;
use std::borrow::Cow;
+use std::collections::HashMap;
+use std::default::Default;
+use std::io;
-use html5ever::{QualName, ExpandedName, Attribute};
-use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText, ElementFlags};
use html5ever::parse_document;
use html5ever::tendril::*;
+use html5ever::tree_builder::{
+ AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink,
+};
+use html5ever::{Attribute, ExpandedName, QualName};
struct Sink {
next_id: usize,
@@ -35,7 +38,9 @@ impl Sink {
impl TreeSink for Sink {
type Handle = usize;
type Output = Self;
- fn finish(self) -> Self { self }
+ fn finish(self) -> Self {
+ self
+ }
fn parse_error(&mut self, msg: Cow<'static, str>) {
println!("Parse error: {}", msg);
@@ -46,7 +51,8 @@ impl TreeSink for Sink {
}
fn get_template_contents(&mut self, target: &usize) -> usize {
- if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) {
+ if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
+ {
target + 1
} else {
panic!("not a template element")
@@ -85,36 +91,33 @@ impl TreeSink for Sink {
fn append(&mut self, parent: &usize, child: NodeOrText) {
match child {
- AppendNode(n)
- => println!("Append node {} to {}", n, parent),
- AppendText(t)
- => println!("Append text to {}: \"{}\"", parent, escape_default(&t)),
+ AppendNode(n) => println!("Append node {} to {}", n, parent),
+ AppendText(t) => println!("Append text to {}: \"{}\"", parent, escape_default(&t)),
}
}
- fn append_before_sibling(&mut self,
- sibling: &usize,
- new_node: NodeOrText) {
+ fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText) {
match new_node {
- AppendNode(n)
- => println!("Append node {} before {}", n, sibling),
- AppendText(t)
- => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)),
+ AppendNode(n) => println!("Append node {} before {}", n, sibling),
+ AppendText(t) => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)),
}
}
- fn append_based_on_parent_node(&mut self,
+ fn append_based_on_parent_node(
+ &mut self,
element: &Self::Handle,
prev_element: &Self::Handle,
- child: NodeOrText) {
-
+ child: NodeOrText,
+ ) {
self.append_before_sibling(element, child);
}
- fn append_doctype_to_document(&mut self,
- name: StrTendril,
- public_id: StrTendril,
- system_id: StrTendril) {
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
println!("Append doctype: {} {} {}", name, public_id, system_id);
}
@@ -126,7 +129,12 @@ impl TreeSink for Sink {
}
}
- fn associate_with_form(&mut self, _target: &usize, _form: &usize, _nodes: (&usize, Option<&usize>)) {
+ fn associate_with_form(
+ &mut self,
+ _target: &usize,
+ _form: &usize,
+ _nodes: (&usize, Option<&usize>),
+ ) {
// No form owner support.
}
diff --git a/html5ever/examples/tokenize.rs b/html5ever/examples/tokenize.rs
index 13c039d5..039ffb79 100644
--- a/html5ever/examples/tokenize.rs
+++ b/html5ever/examples/tokenize.rs
@@ -9,13 +9,15 @@
extern crate html5ever;
-use std::io;
use std::default::Default;
+use std::io;
-use html5ever::tokenizer::{TokenSink, Tokenizer, Token, TokenizerOpts, ParseError, TokenSinkResult};
-use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken, StartTag, EndTag};
-use html5ever::tokenizer::BufferQueue;
use html5ever::tendril::*;
+use html5ever::tokenizer::BufferQueue;
+use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken};
+use html5ever::tokenizer::{
+ ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
+};
#[derive(Copy, Clone)]
struct TokenPrinter {
@@ -25,8 +27,8 @@ struct TokenPrinter {
impl TokenPrinter {
fn is_char(&mut self, is_char: bool) {
match (self.in_char_run, is_char) {
- (false, true ) => print!("CHAR : \""),
- (true, false) => println!("\""),
+ (false, true) => print!("CHAR : \""),
+ (true, false) => println!("\""),
_ => (),
}
self.in_char_run = is_char;
@@ -47,50 +49,53 @@ impl TokenSink for TokenPrinter {
for c in b.chars() {
self.do_char(c);
}
- }
+ },
NullCharacterToken => self.do_char('\0'),
TagToken(tag) => {
self.is_char(false);
// This is not proper HTML serialization, of course.
match tag.kind {
StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name),
- EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
+ EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
}
for attr in tag.attrs.iter() {
- print!(" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
- attr.name.local, attr.value);
+ print!(
+ " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
+ attr.name.local, attr.value
+ );
}
if tag.self_closing {
print!(" \x1b[31m/\x1b[0m");
}
println!(">");
- }
+ },
ParseError(err) => {
self.is_char(false);
println!("ERROR: {}", err);
- }
+ },
_ => {
self.is_char(false);
println!("OTHER: {:?}", token);
- }
+ },
}
TokenSinkResult::Continue
}
}
fn main() {
- let mut sink = TokenPrinter {
- in_char_run: false,
- };
+ let mut sink = TokenPrinter { in_char_run: false };
let mut chunk = ByteTendril::new();
io::stdin().read_to_tendril(&mut chunk).unwrap();
let mut input = BufferQueue::new();
input.push_back(chunk.try_reinterpret().unwrap());
- let mut tok = Tokenizer::new(sink, TokenizerOpts {
- profile: true,
- .. Default::default()
- });
+ let mut tok = Tokenizer::new(
+ sink,
+ TokenizerOpts {
+ profile: true,
+ ..Default::default()
+ },
+ );
let _ = tok.feed(&mut input);
assert!(input.is_empty());
tok.end();
diff --git a/html5ever/macros/match_token.rs b/html5ever/macros/match_token.rs
index 57453a57..8671841d 100644
--- a/html5ever/macros/match_token.rs
+++ b/html5ever/macros/match_token.rs
@@ -99,25 +99,35 @@ matching, by enforcing the following restrictions on its input:
is common in the HTML5 syntax.
*/
+use proc_macro2::TokenStream;
use quote::ToTokens;
use std::collections::HashSet;
use std::fs::File;
use std::io::{Read, Write};
use std::path::Path;
use syn;
+use syn::ext::IdentExt;
use syn::fold::Fold;
use syn::parse::{Parse, ParseStream, Result};
-use syn::ext::IdentExt;
-use proc_macro2::TokenStream;
pub fn expand(from: &Path, to: &Path) {
let mut source = String::new();
- File::open(from).unwrap().read_to_string(&mut source).unwrap();
+ File::open(from)
+ .unwrap()
+ .read_to_string(&mut source)
+ .unwrap();
let ast = syn::parse_file(&source).expect("Parsing rules.rs module");
let mut m = MatchTokenParser {};
let ast = m.fold_file(ast);
- let code = ast.into_token_stream().to_string().replace("{ ", "{\n").replace(" }", "\n}");
- File::create(to).unwrap().write_all(code.as_bytes()).unwrap();
+ let code = ast
+ .into_token_stream()
+ .to_string()
+ .replace("{ ", "{\n")
+ .replace(" }", "\n}");
+ File::create(to)
+ .unwrap()
+ .write_all(code.as_bytes())
+ .unwrap();
}
struct MatchTokenParser {}
@@ -166,8 +176,12 @@ impl Parse for Tag {
};
input.parse::]>()?;
Ok(Tag {
- kind: if closing.is_some() { TagKind::EndTag } else { TagKind::StartTag },
- name: name
+ kind: if closing.is_some() {
+ TagKind::EndTag
+ } else {
+ TagKind::StartTag
+ },
+ name: name,
})
}
}
@@ -217,11 +231,7 @@ impl Parse for MatchTokenArm {
RHS::Expression(expr)
};
- Ok(MatchTokenArm {
- binding,
- lhs,
- rhs,
- })
+ Ok(MatchTokenArm { binding, lhs, rhs })
}
}
@@ -234,10 +244,7 @@ impl Parse for MatchToken {
while !content.is_empty() {
arms.push(content.parse()?);
}
- Ok(MatchToken {
- ident,
- arms,
- })
+ Ok(MatchToken { ident, arms })
}
}
@@ -274,15 +281,20 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
};
match (lhs, rhs) {
- (LHS::Pattern(_), RHS::Else) => panic!("'else' may not appear with an ordinary pattern"),
+ (LHS::Pattern(_), RHS::Else) => {
+ panic!("'else' may not appear with an ordinary pattern")
+ },
// ordinary pattern => expression
(LHS::Pattern(pat), RHS::Expression(expr)) => {
if !wildcards_patterns.is_empty() {
- panic!("ordinary patterns may not appear after wildcard tags {:?} {:?}", pat, expr);
+ panic!(
+ "ordinary patterns may not appear after wildcard tags {:?} {:?}",
+ pat, expr
+ );
}
arms_code.push(quote!(#binding #pat => #expr,))
- }
+ },
// ... => else
(LHS::Tags(tags), RHS::Else) => {
@@ -295,7 +307,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
}
wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag));
}
- }
+ },
// <_> => expression
// ... => expression
@@ -326,7 +338,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
arms_code.push(make_tag_pattern(&binding, tag));
wildcard = Some(false);
- }
+ },
// <_>
None => {
@@ -336,16 +348,16 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
wildcard = Some(true);
wildcards_patterns.push(make_tag_pattern(&binding, tag));
wildcards_expressions.push(expr.clone());
- }
+ },
}
}
match wildcard {
None => panic!("[internal macro error] tag arm with no tags"),
Some(false) => arms_code.push(quote!( => #expr,)),
- Some(true) => {} // codegen for wildcards is deferred
+ Some(true) => {}, // codegen for wildcards is deferred
}
- }
+ },
}
}
@@ -376,7 +388,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
(Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
(None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
(None, _, RHS::Else) => panic!("the last arm cannot use 'else'"),
- (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e)
+ (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e),
};
quote! {
@@ -402,16 +414,18 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
}
}
-
impl Fold for MatchTokenParser {
fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt {
match stmt {
- syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro{ ref mac, .. })) => {
+ syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => {
if mac.path == parse_quote!(match_token) {
- return syn::fold::fold_stmt(self, syn::Stmt::Expr(expand_match_token(&mac.tts)))
+ return syn::fold::fold_stmt(
+ self,
+ syn::Stmt::Expr(expand_match_token(&mac.tts)),
+ );
}
},
- _ => {}
+ _ => {},
}
syn::fold::fold_stmt(self, stmt)
@@ -419,12 +433,12 @@ impl Fold for MatchTokenParser {
fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr {
match expr {
- syn::Expr::Macro(syn::ExprMacro{ ref mac, .. }) => {
+ syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => {
if mac.path == parse_quote!(match_token) {
- return syn::fold::fold_expr(self, expand_match_token(&mac.tts))
+ return syn::fold::fold_expr(self, expand_match_token(&mac.tts));
}
},
- _ => {}
+ _ => {},
}
syn::fold::fold_expr(self, expr)
@@ -446,4 +460,3 @@ fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream {
::tree_builder::types::TagToken(#binding ::tokenizer::Tag { kind: #kind, #name_field .. })
}
}
-
diff --git a/html5ever/src/driver.rs b/html5ever/src/driver.rs
index 3ee5aaf0..461856ee 100644
--- a/html5ever/src/driver.rs
+++ b/html5ever/src/driver.rs
@@ -9,16 +9,16 @@
//! High-level interface to the parser.
-use {Attribute, QualName};
use buffer_queue::BufferQueue;
use tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult};
-use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink, create_element};
+use tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink};
+use {Attribute, QualName};
use std::borrow::Cow;
use tendril;
-use tendril::StrTendril;
use tendril::stream::{TendrilSink, Utf8LossyDecoder};
+use tendril::StrTendril;
/// All-encompassing options struct for the parser.
#[derive(Clone, Default)]
@@ -37,10 +37,16 @@ pub struct ParseOpts {
/// or all at once with the `one` method.
///
/// If your input is bytes, use `Parser::from_utf8`.
-pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser where Sink: TreeSink {
+pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser
+where
+ Sink: TreeSink,
+{
let tb = TreeBuilder::new(sink, opts.tree_builder);
let tok = Tokenizer::new(tb, opts.tokenizer);
- Parser { tokenizer: tok, input_buffer: BufferQueue::new() }
+ Parser {
+ tokenizer: tok,
+ input_buffer: BufferQueue::new(),
+ }
}
/// Parse an HTML fragment
@@ -50,33 +56,48 @@ pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser where S
/// or all at once with the `one` method.
///
/// If your input is bytes, use `Parser::from_utf8`.
-pub fn parse_fragment(mut sink: Sink, opts: ParseOpts,
- context_name: QualName, context_attrs: Vec)
- -> Parser
- where Sink: TreeSink {
+pub fn parse_fragment(
+ mut sink: Sink,
+ opts: ParseOpts,
+ context_name: QualName,
+ context_attrs: Vec,
+) -> Parser
+where
+ Sink: TreeSink,
+{
let context_elem = create_element(&mut sink, context_name, context_attrs);
parse_fragment_for_element(sink, opts, context_elem, None)
}
/// Like `parse_fragment`, but with an existing context element
/// and optionally a form element.
-pub fn parse_fragment_for_element(sink: Sink, opts: ParseOpts,
- context_element: Sink::Handle,
- form_element: Option)
- -> Parser
- where Sink: TreeSink {
+pub fn parse_fragment_for_element(
+ sink: Sink,
+ opts: ParseOpts,
+ context_element: Sink::Handle,
+ form_element: Option,
+) -> Parser
+where
+ Sink: TreeSink,
+{
let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder);
let tok_opts = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
- .. opts.tokenizer
+ ..opts.tokenizer
};
let tok = Tokenizer::new(tb, tok_opts);
- Parser { tokenizer: tok, input_buffer: BufferQueue::new() }
+ Parser {
+ tokenizer: tok,
+ input_buffer: BufferQueue::new(),
+ }
}
/// An HTML parser,
/// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods.
-pub struct Parser where Sink: TreeSink {
+pub struct Parser
+where
+ Sink: TreeSink,
+{
pub tokenizer: Tokenizer>,
pub input_buffer: BufferQueue,
}
@@ -116,10 +137,10 @@ impl Parser {
#[cfg(test)]
mod tests {
+ use super::*;
use rcdom::RcDom;
use serialize::serialize;
use tendril::TendrilSink;
- use super::*;
#[test]
fn from_utf8() {
@@ -128,7 +149,9 @@ mod tests {
.one("Test".as_bytes());
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
- assert_eq!(String::from_utf8(serialized).unwrap().replace(" ", ""),
- "Test");
+ assert_eq!(
+ String::from_utf8(serialized).unwrap().replace(" ", ""),
+ "Test"
+ );
}
}
diff --git a/html5ever/src/lib.rs b/html5ever/src/lib.rs
index 4b8029d9..0149887a 100644
--- a/html5ever/src/lib.rs
+++ b/html5ever/src/lib.rs
@@ -7,18 +7,20 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-#![crate_name="html5ever"]
-#![crate_type="dylib"]
-
+#![crate_name = "html5ever"]
+#![crate_type = "dylib"]
#![cfg_attr(test, deny(warnings))]
#![allow(unused_parens)]
-#[macro_use] extern crate log;
-#[macro_use] extern crate markup5ever;
-#[macro_use] extern crate mac;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate markup5ever;
+#[macro_use]
+extern crate mac;
+pub use driver::{parse_document, parse_fragment, ParseOpts, Parser};
pub use markup5ever::*;
-pub use driver::{ParseOpts, parse_document, parse_fragment, Parser};
pub use serialize::serialize;
@@ -29,7 +31,7 @@ mod util {
pub mod str;
}
+pub mod driver;
pub mod serialize;
pub mod tokenizer;
pub mod tree_builder;
-pub mod driver;
diff --git a/html5ever/src/macros.rs b/html5ever/src/macros.rs
index 33dc80d7..558a4a95 100644
--- a/html5ever/src/macros.rs
+++ b/html5ever/src/macros.rs
@@ -13,13 +13,13 @@ macro_rules! unwrap_or_else {
None => $else_block,
Some(x) => x,
}
- }
+ };
}
macro_rules! unwrap_or_return {
($opt:expr, $retval:expr) => {
unwrap_or_else!($opt, { return $retval })
- }
+ };
}
macro_rules! time {
@@ -29,5 +29,5 @@ macro_rules! time {
let d = now.elapsed();
let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos());
(result, dt)
- }}
+ }};
}
diff --git a/html5ever/src/serialize/mod.rs b/html5ever/src/serialize/mod.rs
index 69a412bf..bc91e416 100644
--- a/html5ever/src/serialize/mod.rs
+++ b/html5ever/src/serialize/mod.rs
@@ -7,14 +7,17 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-pub use markup5ever::serialize::{Serialize, Serializer, TraversalScope, AttrRef};
-use std::io::{self, Write};
+pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
use std::default::Default;
+use std::io::{self, Write};
use {LocalName, QualName};
pub fn serialize(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
-where Wr: Write, T: Serialize {
+where
+ Wr: Write,
+ T: Serialize,
+{
let mut ser = HtmlSerializer::new(writer, opts.clone());
node.serialize(&mut ser, opts.traversal_scope)
}
@@ -64,7 +67,7 @@ fn tagname(name: &QualName) -> LocalName {
ref ns => {
// FIXME(#122)
warn!("node with weird namespace {:?}", ns);
- }
+ },
}
name.local.clone()
@@ -74,16 +77,16 @@ impl HtmlSerializer {
fn new(writer: Wr, opts: SerializeOpts) -> Self {
let html_name = match opts.traversal_scope {
TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
- TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n))
+ TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
};
HtmlSerializer {
writer: writer,
opts: opts,
- stack: vec!(ElemInfo {
+ stack: vec![ElemInfo {
html_name: html_name,
ignore_children: false,
processed_first_child: false,
- }),
+ }],
}
}
@@ -116,7 +119,9 @@ impl HtmlSerializer {
impl Serializer for HtmlSerializer {
fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
- where AttrIter: Iterator]- > {
+ where
+ AttrIter: Iterator
- >,
+ {
let html_name = match name.ns {
ns!(html) => Some(name.local.clone()),
_ => None,
@@ -143,13 +148,13 @@ impl Serializer for HtmlSerializer {
if name.local != local_name!("xmlns") {
try!(self.writer.write_all(b"xmlns:"));
}
- }
+ },
ns!(xlink) => try!(self.writer.write_all(b"xlink:")),
ref ns => {
// FIXME(#122)
warn!("attr with weird namespace {:?}", ns);
try!(self.writer.write_all(b"unknown_namespace:"));
- }
+ },
}
try!(self.writer.write_all(name.local.as_bytes()));
@@ -159,14 +164,28 @@ impl Serializer for HtmlSerializer {
}
try!(self.writer.write_all(b">"));
- let ignore_children = name.ns == ns!(html) && match name.local {
- local_name!("area") | local_name!("base") | local_name!("basefont") | local_name!("bgsound") | local_name!("br")
- | local_name!("col") | local_name!("embed") | local_name!("frame") | local_name!("hr") | local_name!("img")
- | local_name!("input") | local_name!("keygen") | local_name!("link")
- | local_name!("meta") | local_name!("param") | local_name!("source") | local_name!("track") | local_name!("wbr")
- => true,
- _ => false,
- };
+ let ignore_children = name.ns == ns!(html) &&
+ match name.local {
+ local_name!("area") |
+ local_name!("base") |
+ local_name!("basefont") |
+ local_name!("bgsound") |
+ local_name!("br") |
+ local_name!("col") |
+ local_name!("embed") |
+ local_name!("frame") |
+ local_name!("hr") |
+ local_name!("img") |
+ local_name!("input") |
+ local_name!("keygen") |
+ local_name!("link") |
+ local_name!("meta") |
+ local_name!("param") |
+ local_name!("source") |
+ local_name!("track") |
+ local_name!("wbr") => true,
+ _ => false,
+ };
self.parent().processed_first_child = true;
@@ -185,7 +204,7 @@ impl Serializer for HtmlSerializer {
None if self.opts.create_missing_parent => {
warn!("missing ElemInfo, creating default.");
Default::default()
- }
+ },
_ => panic!("no ElemInfo"),
};
if info.ignore_children {
@@ -199,9 +218,13 @@ impl Serializer for HtmlSerializer {
fn write_text(&mut self, text: &str) -> io::Result<()> {
let escape = match self.parent().html_name {
- Some(local_name!("style")) | Some(local_name!("script")) | Some(local_name!("xmp"))
- | Some(local_name!("iframe")) | Some(local_name!("noembed")) | Some(local_name!("noframes"))
- | Some(local_name!("plaintext")) => false,
+ Some(local_name!("style")) |
+ Some(local_name!("script")) |
+ Some(local_name!("xmp")) |
+ Some(local_name!("iframe")) |
+ Some(local_name!("noembed")) |
+ Some(local_name!("noframes")) |
+ Some(local_name!("plaintext")) => false,
Some(local_name!("noscript")) => !self.opts.scripting_enabled,
diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs
index be7fbe3a..6ea2770d 100644
--- a/html5ever/src/tokenizer/char_ref/mod.rs
+++ b/html5ever/src/tokenizer/char_ref/mod.rs
@@ -7,17 +7,17 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use super::{Tokenizer, TokenSink};
+use super::{TokenSink, Tokenizer};
use buffer_queue::BufferQueue;
use data;
use tendril::StrTendril;
-use util::str::{is_ascii_alnum};
+use util::str::is_ascii_alnum;
-use std::char::from_u32;
use std::borrow::Cow::Borrowed;
+use std::char::from_u32;
-pub use self::Status::*;
use self::State::*;
+pub use self::Status::*;
//§ tokenizing-character-references
pub struct CharRef {
@@ -84,12 +84,14 @@ impl CharRefTokenizer {
}
fn name_buf<'t>(&'t self) -> &'t StrTendril {
- self.name_buf_opt.as_ref()
+ self.name_buf_opt
+ .as_ref()
.expect("name_buf missing in named character reference")
}
fn name_buf_mut<'t>(&'t mut self) -> &'t mut StrTendril {
- self.name_buf_opt.as_mut()
+ self.name_buf_opt
+ .as_mut()
.expect("name_buf missing in named character reference")
}
@@ -112,10 +114,10 @@ impl CharRefTokenizer {
impl CharRefTokenizer {
pub fn step(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
if self.result.is_some() {
return Done;
}
@@ -132,57 +134,55 @@ impl CharRefTokenizer {
}
fn do_begin(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
- '\t' | '\n' | '\x0C' | ' ' | '<' | '&'
- => self.finish_none(),
- c if Some(c) == self.addnl_allowed
- => self.finish_none(),
+ '\t' | '\n' | '\x0C' | ' ' | '<' | '&' => self.finish_none(),
+ c if Some(c) == self.addnl_allowed => self.finish_none(),
'#' => {
tokenizer.discard_char(input);
self.state = Octothorpe;
Progress
- }
+ },
_ => {
self.state = Named;
self.name_buf_opt = Some(StrTendril::new());
Progress
- }
+ },
}
}
fn do_octothorpe(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
match c {
'x' | 'X' => {
tokenizer.discard_char(input);
self.hex_marker = Some(c);
self.state = Numeric(16);
- }
+ },
_ => {
self.hex_marker = None;
self.state = Numeric(10);
- }
+ },
}
Progress
}
fn do_numeric(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue,
- base: u32)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ base: u32,
+ ) -> Status {
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
match c.to_digit(base) {
Some(n) => {
@@ -196,34 +196,36 @@ impl CharRefTokenizer {
self.num = self.num.wrapping_add(n);
self.seen_digit = true;
Progress
- }
+ },
None if !self.seen_digit => self.unconsume_numeric(tokenizer, input),
None => {
self.state = NumericSemicolon;
Progress
- }
+ },
}
}
fn do_numeric_semicolon(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
';' => tokenizer.discard_char(input),
- _ => tokenizer.emit_error(Borrowed("Semicolon missing after numeric character reference")),
+ _ => tokenizer.emit_error(Borrowed(
+ "Semicolon missing after numeric character reference",
+ )),
};
self.finish_numeric(tokenizer)
}
fn unconsume_numeric(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
let mut unconsume = StrTendril::from_char('#');
match self.hex_marker {
Some(c) => unconsume.push_char(c),
@@ -249,19 +251,20 @@ impl CharRefTokenizer {
None => (conv(self.num), true),
},
- 0x01...0x08 | 0x0B | 0x0D...0x1F | 0x7F | 0xFDD0...0xFDEF
- => (conv(self.num), true),
+ 0x01...0x08 | 0x0B | 0x0D...0x1F | 0x7F | 0xFDD0...0xFDEF => (conv(self.num), true),
- n if (n & 0xFFFE) == 0xFFFE
- => (conv(n), true),
+ n if (n & 0xFFFE) == 0xFFFE => (conv(n), true),
n => (conv(n), false),
};
if error {
- let msg = format_if!(tokenizer.opts.exact_errors,
+ let msg = format_if!(
+ tokenizer.opts.exact_errors,
"Invalid numeric character reference",
- "Invalid numeric character reference value 0x{:06X}", self.num);
+ "Invalid numeric character reference value 0x{:06X}",
+ self.num
+ );
tokenizer.emit_error(msg);
}
@@ -269,10 +272,10 @@ impl CharRefTokenizer {
}
fn do_named(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
self.name_buf_mut().push_char(c);
match data::NAMED_ENTITIES.get(&self.name_buf()[..]) {
@@ -285,7 +288,7 @@ impl CharRefTokenizer {
}
// Otherwise we just have a prefix match.
Progress
- }
+ },
// Can't continue the match.
None => self.finish_named(tokenizer, input, Some(c)),
@@ -293,9 +296,12 @@ impl CharRefTokenizer {
}
fn emit_name_error(&mut self, tokenizer: &mut Tokenizer) {
- let msg = format_if!(tokenizer.opts.exact_errors,
+ let msg = format_if!(
+ tokenizer.opts.exact_errors,
"Invalid character reference",
- "Invalid character reference &{}", self.name_buf());
+ "Invalid character reference &{}",
+ self.name_buf()
+ );
tokenizer.emit_error(msg);
}
@@ -303,10 +309,12 @@ impl CharRefTokenizer {
input.push_front(self.name_buf_opt.take().unwrap());
}
- fn finish_named(&mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue,
- end_char: Option) -> Status {
+ fn finish_named(
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ end_char: Option,
+ ) -> Status {
match self.name_match {
None => {
match end_char {
@@ -315,17 +323,16 @@ impl CharRefTokenizer {
// we emit a parse error.
self.state = BogusName;
return Progress;
- }
+ },
// Check length because &; is not a parse error.
- Some(';') if self.name_buf().len() > 1
- => self.emit_name_error(tokenizer),
+ Some(';') if self.name_buf().len() > 1 => self.emit_name_error(tokenizer),
_ => (),
}
self.unconsume_name(input);
self.finish_none()
- }
+ },
Some((c1, c2)) => {
// We have a complete match, but we may have consumed
@@ -338,7 +345,7 @@ impl CharRefTokenizer {
let name_len = self.name_len;
assert!(name_len > 0);
- let last_matched = self.name_buf()[name_len-1..].chars().next().unwrap();
+ let last_matched = self.name_buf()[name_len - 1..].chars().next().unwrap();
// There might not be a next character after the match, if
// we had a full match and then hit EOF.
@@ -361,14 +368,18 @@ impl CharRefTokenizer {
let unconsume_all = match (self.addnl_allowed, last_matched, next_after) {
(_, ';', _) => false,
(Some(_), _, Some('=')) => {
- tokenizer.emit_error(Borrowed("Equals sign after character reference in attribute"));
+ tokenizer.emit_error(Borrowed(
+ "Equals sign after character reference in attribute",
+ ));
true
- }
+ },
(Some(_), _, Some(c)) if is_ascii_alnum(c) => true,
_ => {
- tokenizer.emit_error(Borrowed("Character reference does not end with semicolon"));
+ tokenizer.emit_error(Borrowed(
+ "Character reference does not end with semicolon",
+ ));
false
- }
+ },
};
if unconsume_all {
@@ -382,54 +393,54 @@ impl CharRefTokenizer {
});
Done
}
- }
+ },
}
}
fn do_bogus_name(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue)
- -> Status {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) -> Status {
let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
self.name_buf_mut().push_char(c);
match c {
_ if is_ascii_alnum(c) => return Progress,
';' => self.emit_name_error(tokenizer),
- _ => ()
+ _ => (),
}
self.unconsume_name(input);
self.finish_none()
}
pub fn end_of_file(
- &mut self,
- tokenizer: &mut Tokenizer,
- input: &mut BufferQueue) {
+ &mut self,
+ tokenizer: &mut Tokenizer,
+ input: &mut BufferQueue,
+ ) {
while self.result.is_none() {
match self.state {
Begin => drop(self.finish_none()),
- Numeric(_) if !self.seen_digit
- => drop(self.unconsume_numeric(tokenizer, input)),
+ Numeric(_) if !self.seen_digit => drop(self.unconsume_numeric(tokenizer, input)),
Numeric(_) | NumericSemicolon => {
tokenizer.emit_error(Borrowed("EOF in numeric character reference"));
self.finish_numeric(tokenizer);
- }
+ },
Named => drop(self.finish_named(tokenizer, input, None)),
BogusName => {
self.unconsume_name(input);
self.finish_none();
- }
+ },
Octothorpe => {
input.push_front(StrTendril::from_slice("#"));
tokenizer.emit_error(Borrowed("EOF after '#' in character reference"));
self.finish_none();
- }
+ },
}
}
}
diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs
index 041701b5..d12c72e6 100644
--- a/html5ever/src/tokenizer/interface.rs
+++ b/html5ever/src/tokenizer/interface.rs
@@ -7,15 +7,15 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use LocalName;
use interface::Attribute;
use std::borrow::Cow;
use tendril::StrTendril;
use tokenizer::states;
+use LocalName;
-pub use self::TagKind::{StartTag, EndTag};
-pub use self::Token::{DoctypeToken, TagToken, CommentToken, CharacterTokens};
-pub use self::Token::{NullCharacterToken, EOFToken, ParseError};
+pub use self::TagKind::{EndTag, StartTag};
+pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
+pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
/// A `DOCTYPE` token.
// FIXME: already exists in Servo DOM
@@ -87,7 +87,7 @@ pub enum TokenSinkResult {
Continue,
Script(Handle),
Plaintext,
- RawData(states::RawKind)
+ RawData(states::RawKind),
}
/// Types which can receive tokens from the tokenizer.
diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs
index 4cf09315..a1118d20 100644
--- a/html5ever/src/tokenizer/mod.rs
+++ b/html5ever/src/tokenizer/mod.rs
@@ -9,43 +9,43 @@
//! The HTML5 tokenizer.
-pub use self::interface::{Doctype, TagKind, StartTag, EndTag, Tag};
-pub use self::interface::{Token, DoctypeToken, TagToken, CommentToken};
-pub use self::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError};
+pub use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
+pub use self::interface::{CommentToken, DoctypeToken, TagToken, Token};
+pub use self::interface::{Doctype, EndTag, StartTag, Tag, TagKind};
pub use self::interface::{TokenSink, TokenSinkResult};
-use self::states::{Rcdata, Rawtext, ScriptData, ScriptDataEscaped};
-use self::states::{Escaped, DoubleEscaped};
-use self::states::{Unquoted, SingleQuoted, DoubleQuoted};
use self::states::{DoctypeIdKind, Public, System};
+use self::states::{DoubleEscaped, Escaped};
+use self::states::{DoubleQuoted, SingleQuoted, Unquoted};
+use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};
use self::char_ref::{CharRef, CharRefTokenizer};
use util::str::lower_ascii_letter;
-use std::mem::replace;
-use std::default::Default;
use std::borrow::Cow::{self, Borrowed};
use std::collections::BTreeMap;
+use std::default::Default;
+use std::mem::replace;
-use {LocalName, QualName, Attribute, SmallCharSet};
+pub use buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult};
use tendril::StrTendril;
-pub use buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet};
+use {Attribute, LocalName, QualName, SmallCharSet};
-pub mod states;
-mod interface;
mod char_ref;
+mod interface;
+pub mod states;
pub enum ProcessResult {
Continue,
Suspend,
- Script(Handle)
+ Script(Handle),
}
#[must_use]
pub enum TokenizerResult {
Done,
- Script(Handle)
+ Script(Handle),
}
fn option_push(opt_str: &mut Option, c: char) {
@@ -170,7 +170,9 @@ pub struct Tokenizer {
impl Tokenizer {
/// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer {
- let start_tag_name = opts.last_start_tag_name.take()
+ let start_tag_name = opts
+ .last_start_tag_name
+ .take()
.map(|s| LocalName::from(&*s));
let state = opts.initial_state.unwrap_or(states::Data);
let discard_bom = opts.discard_bom;
@@ -187,7 +189,7 @@ impl Tokenizer {
current_tag_kind: StartTag,
current_tag_name: StrTendril::new(),
current_tag_self_closing: false,
- current_tag_attrs: vec!(),
+ current_tag_attrs: vec![],
current_attr_name: StrTendril::new(),
current_attr_value: StrTendril::new(),
current_comment: StrTendril::new(),
@@ -234,17 +236,16 @@ impl Tokenizer {
}
fn process_token_and_continue(&mut self, token: Token) {
- assert!(matches!(self.process_token(token), TokenSinkResult::Continue));
+ assert!(matches!(
+ self.process_token(token),
+ TokenSinkResult::Continue
+ ));
}
//§ preprocessing-the-input-stream
// Get the next input character, which might be the character
// 'c' that we already consumed from the buffers.
- fn get_preprocessed_char(
- &mut self,
- mut c: char,
- input: &mut BufferQueue)
- -> Option {
+ fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option {
if self.ignore_lf {
self.ignore_lf = false;
if c == '\n' {
@@ -261,11 +262,13 @@ impl Tokenizer {
self.current_line += 1;
}
- if self.opts.exact_errors && match c as u32 {
- 0x01...0x08 | 0x0B | 0x0E...0x1F | 0x7F...0x9F | 0xFDD0...0xFDEF => true,
- n if (n & 0xFFFE) == 0xFFFE => true,
- _ => false,
- } {
+ if self.opts.exact_errors &&
+ match c as u32 {
+ 0x01...0x08 | 0x0B | 0x0E...0x1F | 0x7F...0x9F | 0xFDD0...0xFDEF => true,
+ n if (n & 0xFFFE) == 0xFFFE => true,
+ _ => false,
+ }
+ {
let msg = format!("Bad character {}", c);
self.emit_error(Cow::Owned(msg));
}
@@ -282,7 +285,9 @@ impl Tokenizer {
self.reconsume = false;
Some(self.current_char)
} else {
- input.next().and_then(|c| self.get_preprocessed_char(c, input))
+ input
+ .next()
+ .and_then(|c| self.get_preprocessed_char(c, input))
}
}
@@ -303,7 +308,7 @@ impl Tokenizer {
// NB: We don't set self.current_char for a run of characters not
// in the set. It shouldn't matter for the codepaths that use
// this.
- _ => d
+ _ => d,
}
}
@@ -313,11 +318,11 @@ impl Tokenizer {
// NB: this doesn't do input stream preprocessing or set the current input
// character.
fn eat(
- &mut self,
- input: &mut BufferQueue,
- pat: &str,
- eq: fn(&u8, &u8) -> bool)
- -> Option {
+ &mut self,
+ input: &mut BufferQueue,
+ pat: &str,
+ eq: fn(&u8, &u8) -> bool,
+ ) -> Option {
input.push_front(replace(&mut self.temp_buf, StrTendril::new()));
match input.eat(pat, eq) {
None if self.at_eof => Some(false),
@@ -343,7 +348,7 @@ impl Tokenizer {
Some(x) => {
*x += dt;
false
- }
+ },
None => true,
};
if new {
@@ -372,7 +377,10 @@ impl Tokenizer {
let msg = format_if!(
self.opts.exact_errors,
"Bad character",
- "Saw {} in state {:?}", self.current_char, self.state);
+ "Saw {} in state {:?}",
+ self.current_char,
+ self.state
+ );
self.emit_error(msg);
}
@@ -380,7 +388,9 @@ impl Tokenizer {
let msg = format_if!(
self.opts.exact_errors,
"Unexpected EOF",
- "Saw EOF in state {:?}", self.state);
+ "Saw EOF in state {:?}",
+ self.state
+ );
self.emit_error(msg);
}
@@ -405,7 +415,7 @@ impl Tokenizer {
match self.current_tag_kind {
StartTag => {
self.last_start_tag_name = Some(name.clone());
- }
+ },
EndTag => {
if !self.current_tag_attrs.is_empty() {
self.emit_error(Borrowed("Attributes on an end tag"));
@@ -413,13 +423,14 @@ impl Tokenizer {
if self.current_tag_self_closing {
self.emit_error(Borrowed("Self-closing end tag"));
}
- }
+ },
}
- let token = TagToken(Tag { kind: self.current_tag_kind,
+ let token = TagToken(Tag {
+ kind: self.current_tag_kind,
name: name,
self_closing: self.current_tag_self_closing,
- attrs: replace(&mut self.current_tag_attrs, vec!()),
+ attrs: replace(&mut self.current_tag_attrs, vec![]),
});
match self.process_token(token) {
@@ -435,7 +446,7 @@ impl Tokenizer {
TokenSinkResult::RawData(kind) => {
self.state = states::RawData(kind);
ProcessResult::Continue
- }
+ },
}
}
@@ -458,7 +469,7 @@ impl Tokenizer {
fn discard_tag(&mut self) {
self.current_tag_name.clear();
self.current_tag_self_closing = false;
- self.current_tag_attrs = vec!();
+ self.current_tag_attrs = vec![];
}
fn create_tag(&mut self, kind: TagKind, c: char) {
@@ -469,9 +480,7 @@ impl Tokenizer {
fn have_appropriate_end_tag(&self) -> bool {
match self.last_start_tag_name.as_ref() {
- Some(last) =>
- (self.current_tag_kind == EndTag)
- && (*self.current_tag_name == **last),
+ Some(last) => (self.current_tag_kind == EndTag) && (*self.current_tag_name == **last),
None => false,
}
}
@@ -492,7 +501,9 @@ impl Tokenizer {
// FIXME: linear time search, do we care?
let dup = {
let name = &*self.current_attr_name;
- self.current_tag_attrs.iter().any(|a| &*a.name.local == name)
+ self.current_tag_attrs
+ .iter()
+ .any(|a| &*a.name.local == name)
};
if dup {
@@ -680,9 +691,9 @@ impl Tokenizer {
states::Data => loop {
match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) {
FromSet('\0') => go!(self: error; emit '\0'),
- FromSet('&') => go!(self: consume_char_ref),
- FromSet('<') => go!(self: to TagOpen),
- FromSet(c) => go!(self: emit c),
+ FromSet('&') => go!(self: consume_char_ref),
+ FromSet('<') => go!(self: to TagOpen),
+ FromSet(c) => go!(self: emit c),
NotFromSet(b) => self.emit_chars(b),
}
},
@@ -734,7 +745,9 @@ impl Tokenizer {
match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
FromSet('\0') => go!(self: error; emit '\u{fffd}'),
FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped),
- FromSet('<') => go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped),
+ FromSet('<') => {
+ go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped)
+ },
FromSet(c) => go!(self: emit c),
NotFromSet(b) => self.emit_chars(b),
}
@@ -744,72 +757,87 @@ impl Tokenizer {
states::Plaintext => loop {
match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) {
FromSet('\0') => go!(self: error; emit '\u{fffd}'),
- FromSet(c) => go!(self: emit c),
+ FromSet(c) => go!(self: emit c),
NotFromSet(b) => self.emit_chars(b),
}
},
//§ tag-open-state
- states::TagOpen => loop { match get_char!(self, input) {
- '!' => go!(self: clear_temp; to MarkupDeclarationOpen),
- '/' => go!(self: to EndTagOpen),
- '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: create_tag StartTag cl; to TagName),
- None => go!(self: error; emit '<'; reconsume Data),
+ states::TagOpen => loop {
+ match get_char!(self, input) {
+ '!' => go!(self: clear_temp; to MarkupDeclarationOpen),
+ '/' => go!(self: to EndTagOpen),
+ '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment),
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: create_tag StartTag cl; to TagName),
+ None => go!(self: error; emit '<'; reconsume Data),
+ },
}
- }},
+ },
//§ end-tag-open-state
- states::EndTagOpen => loop { match get_char!(self, input) {
- '>' => go!(self: error; to Data),
- '\0' => go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: create_tag EndTag cl; to TagName),
- None => go!(self: error; clear_comment; push_comment c; to BogusComment),
+ states::EndTagOpen => loop {
+ match get_char!(self, input) {
+ '>' => go!(self: error; to Data),
+ '\0' => {
+ go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment)
+ },
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: create_tag EndTag cl; to TagName),
+ None => go!(self: error; clear_comment; push_comment c; to BogusComment),
+ },
}
- }},
+ },
//§ tag-name-state
- states::TagName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BeforeAttributeName),
- '/' => go!(self: to SelfClosingStartTag),
- '>' => go!(self: emit_tag Data),
- '\0' => go!(self: error; push_tag '\u{fffd}'),
- c => go!(self: push_tag (c.to_ascii_lowercase())),
- }},
+ states::TagName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
+ '/' => go!(self: to SelfClosingStartTag),
+ '>' => go!(self: emit_tag Data),
+ '\0' => go!(self: error; push_tag '\u{fffd}'),
+ c => go!(self: push_tag (c.to_ascii_lowercase())),
+ }
+ },
//§ script-data-escaped-less-than-sign-state
- states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop { match get_char!(self, input) {
- '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: clear_temp; push_temp cl; emit '<'; emit c;
+ states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop {
+ match get_char!(self, input) {
+ '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped),
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: clear_temp; push_temp cl; emit '<'; emit c;
to ScriptDataEscapeStart DoubleEscaped),
- None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped),
+ None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped),
+ },
}
- }},
+ },
//§ script-data-double-escaped-less-than-sign-state
- states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop { match get_char!(self, input) {
- '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd),
- _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
- }},
+ states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop {
+ match get_char!(self, input) {
+ '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd),
+ _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+ }
+ },
//§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state
// otherwise
- states::RawLessThanSign(kind) => loop { match get_char!(self, input) {
- '/' => go!(self: clear_temp; to RawEndTagOpen kind),
- '!' if kind == ScriptData => go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped),
- _ => go!(self: emit '<'; reconsume RawData kind),
- }},
+ states::RawLessThanSign(kind) => loop {
+ match get_char!(self, input) {
+ '/' => go!(self: clear_temp; to RawEndTagOpen kind),
+ '!' if kind == ScriptData => {
+ go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped)
+ },
+ _ => go!(self: emit '<'; reconsume RawData kind),
+ }
+ },
//§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state
states::RawEndTagOpen(kind) => loop {
let c = get_char!(self, input);
match lower_ascii_letter(c) {
Some(cl) => go!(self: create_tag EndTag cl; push_temp c; to RawEndTagName kind),
- None => go!(self: emit '<'; emit '/'; reconsume RawData kind),
+ None => go!(self: emit '<'; emit '/'; reconsume RawData kind),
}
},
@@ -818,8 +846,7 @@ impl Tokenizer {
let c = get_char!(self, input);
if self.have_appropriate_end_tag() {
match c {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BeforeAttributeName),
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
'/' => go!(self: to SelfClosingStartTag),
'>' => go!(self: emit_tag Data),
_ => (),
@@ -828,7 +855,9 @@ impl Tokenizer {
match lower_ascii_letter(c) {
Some(cl) => go!(self: push_tag cl; push_temp c),
- None => go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind),
+ None => {
+ go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind)
+ },
}
},
@@ -837,136 +866,165 @@ impl Tokenizer {
let c = get_char!(self, input);
match c {
'\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
- let esc = if &*self.temp_buf == "script" { DoubleEscaped } else { Escaped };
+ let esc = if &*self.temp_buf == "script" {
+ DoubleEscaped
+ } else {
+ Escaped
+ };
go!(self: emit c; to RawData ScriptDataEscaped esc);
- }
+ },
_ => match lower_ascii_letter(c) {
Some(cl) => go!(self: push_temp cl; emit c),
- None => go!(self: reconsume RawData ScriptDataEscaped Escaped),
- }
+ None => go!(self: reconsume RawData ScriptDataEscaped Escaped),
+ },
}
},
//§ script-data-escape-start-state
- states::ScriptDataEscapeStart(Escaped) => loop { match get_char!(self, input) {
- '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash),
- _ => go!(self: reconsume RawData ScriptData),
- }},
+ states::ScriptDataEscapeStart(Escaped) => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash),
+ _ => go!(self: reconsume RawData ScriptData),
+ }
+ },
//§ script-data-escape-start-dash-state
- states::ScriptDataEscapeStartDash => loop { match get_char!(self, input) {
- '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped),
- _ => go!(self: reconsume RawData ScriptData),
- }},
+ states::ScriptDataEscapeStartDash => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped),
+ _ => go!(self: reconsume RawData ScriptData),
+ }
+ },
//§ script-data-escaped-dash-state script-data-double-escaped-dash-state
- states::ScriptDataEscapedDash(kind) => loop { match get_char!(self, input) {
- '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind),
- '<' => {
- if kind == DoubleEscaped { go!(self: emit '<'); }
- go!(self: to RawLessThanSign ScriptDataEscaped kind);
+ states::ScriptDataEscapedDash(kind) => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind),
+ '<' => {
+ if kind == DoubleEscaped {
+ go!(self: emit '<');
+ }
+ go!(self: to RawLessThanSign ScriptDataEscaped kind);
+ },
+ '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
+ c => go!(self: emit c; to RawData ScriptDataEscaped kind),
}
- '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
- c => go!(self: emit c; to RawData ScriptDataEscaped kind),
- }},
+ },
//§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state
- states::ScriptDataEscapedDashDash(kind) => loop { match get_char!(self, input) {
- '-' => go!(self: emit '-'),
- '<' => {
- if kind == DoubleEscaped { go!(self: emit '<'); }
- go!(self: to RawLessThanSign ScriptDataEscaped kind);
+ states::ScriptDataEscapedDashDash(kind) => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: emit '-'),
+ '<' => {
+ if kind == DoubleEscaped {
+ go!(self: emit '<');
+ }
+ go!(self: to RawLessThanSign ScriptDataEscaped kind);
+ },
+ '>' => go!(self: emit '>'; to RawData ScriptData),
+ '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
+ c => go!(self: emit c; to RawData ScriptDataEscaped kind),
}
- '>' => go!(self: emit '>'; to RawData ScriptData),
- '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
- c => go!(self: emit c; to RawData ScriptDataEscaped kind),
- }},
+ },
//§ script-data-double-escape-end-state
states::ScriptDataDoubleEscapeEnd => loop {
let c = get_char!(self, input);
match c {
'\t' | '\n' | '\x0C' | ' ' | '/' | '>' => {
- let esc = if &*self.temp_buf == "script" { Escaped } else { DoubleEscaped };
+ let esc = if &*self.temp_buf == "script" {
+ Escaped
+ } else {
+ DoubleEscaped
+ };
go!(self: emit c; to RawData ScriptDataEscaped esc);
- }
+ },
_ => match lower_ascii_letter(c) {
Some(cl) => go!(self: push_temp cl; emit c),
- None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
- }
+ None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
+ },
}
},
//§ before-attribute-name-state
- states::BeforeAttributeName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '/' => go!(self: to SelfClosingStartTag),
- '>' => go!(self: emit_tag Data),
- '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: create_attr cl; to AttributeName),
- None => {
- go_match!(self: c,
+ states::BeforeAttributeName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '/' => go!(self: to SelfClosingStartTag),
+ '>' => go!(self: emit_tag Data),
+ '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: create_attr cl; to AttributeName),
+ None => {
+ go_match!(self: c,
'"' , '\'' , '<' , '=' => error);
- go!(self: create_attr c; to AttributeName);
- }
+ go!(self: create_attr c; to AttributeName);
+ },
+ },
}
- }},
+ },
//§ attribute-name-state
- states::AttributeName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to AfterAttributeName),
- '/' => go!(self: to SelfClosingStartTag),
- '=' => go!(self: to BeforeAttributeValue),
- '>' => go!(self: emit_tag Data),
- '\0' => go!(self: error; push_name '\u{fffd}'),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: push_name cl),
- None => {
- go_match!(self: c,
+ states::AttributeName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterAttributeName),
+ '/' => go!(self: to SelfClosingStartTag),
+ '=' => go!(self: to BeforeAttributeValue),
+ '>' => go!(self: emit_tag Data),
+ '\0' => go!(self: error; push_name '\u{fffd}'),
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: push_name cl),
+ None => {
+ go_match!(self: c,
'"' , '\'' , '<' => error);
- go!(self: push_name c);
- }
+ go!(self: push_name c);
+ },
+ },
}
- }},
+ },
//§ after-attribute-name-state
- states::AfterAttributeName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '/' => go!(self: to SelfClosingStartTag),
- '=' => go!(self: to BeforeAttributeValue),
- '>' => go!(self: emit_tag Data),
- '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
- c => match lower_ascii_letter(c) {
- Some(cl) => go!(self: create_attr cl; to AttributeName),
- None => {
- go_match!(self: c,
+ states::AfterAttributeName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '/' => go!(self: to SelfClosingStartTag),
+ '=' => go!(self: to BeforeAttributeValue),
+ '>' => go!(self: emit_tag Data),
+ '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName),
+ c => match lower_ascii_letter(c) {
+ Some(cl) => go!(self: create_attr cl; to AttributeName),
+ None => {
+ go_match!(self: c,
'"' , '\'' , '<' => error);
- go!(self: create_attr c; to AttributeName);
- }
+ go!(self: create_attr c; to AttributeName);
+ },
+ },
}
- }},
+ },
//§ before-attribute-value-state
// Use peek so we can handle the first attr character along with the rest,
// hopefully in the same zero-copy buffer.
- states::BeforeAttributeValue => loop { match peek!(self, input) {
- '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input),
- '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted),
- '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted),
- '\0' => go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted),
- '>' => go!(self: discard_char input; error; emit_tag Data),
- _ => go!(self: to AttributeValue Unquoted),
- }},
+ states::BeforeAttributeValue => loop {
+ match peek!(self, input) {
+ '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input),
+ '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted),
+ '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted),
+ '\0' => {
+ go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted)
+ },
+ '>' => go!(self: discard_char input; error; emit_tag Data),
+ _ => go!(self: to AttributeValue Unquoted),
+ }
+ },
//§ attribute-value-(double-quoted)-state
states::AttributeValue(DoubleQuoted) => loop {
match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) {
- FromSet('"') => go!(self: to AfterAttributeValueQuoted),
- FromSet('&') => go!(self: consume_char_ref '"'),
+ FromSet('"') => go!(self: to AfterAttributeValueQuoted),
+ FromSet('&') => go!(self: consume_char_ref '"'),
FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
- FromSet(c) => go!(self: push_value c),
+ FromSet(c) => go!(self: push_value c),
NotFromSet(ref b) => go!(self: append_value b),
}
},
@@ -975,119 +1033,145 @@ impl Tokenizer {
states::AttributeValue(SingleQuoted) => loop {
match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) {
FromSet('\'') => go!(self: to AfterAttributeValueQuoted),
- FromSet('&') => go!(self: consume_char_ref '\''),
+ FromSet('&') => go!(self: consume_char_ref '\''),
FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
- FromSet(c) => go!(self: push_value c),
+ FromSet(c) => go!(self: push_value c),
NotFromSet(ref b) => go!(self: append_value b),
}
},
//§ attribute-value-(unquoted)-state
states::AttributeValue(Unquoted) => loop {
- match pop_except_from!(self, input, small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')) {
- FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ')
- => go!(self: to BeforeAttributeName),
- FromSet('&') => go!(self: consume_char_ref '>'),
- FromSet('>') => go!(self: emit_tag Data),
+ match pop_except_from!(
+ self,
+ input,
+ small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')
+ ) {
+ FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
+ go!(self: to BeforeAttributeName)
+ },
+ FromSet('&') => go!(self: consume_char_ref '>'),
+ FromSet('>') => go!(self: emit_tag Data),
FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
FromSet(c) => {
go_match!(self: c,
'"' , '\'' , '<' , '=' , '`' => error);
go!(self: push_value c);
- }
+ },
NotFromSet(ref b) => go!(self: append_value b),
}
},
//§ after-attribute-value-(quoted)-state
- states::AfterAttributeValueQuoted => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BeforeAttributeName),
- '/' => go!(self: to SelfClosingStartTag),
- '>' => go!(self: emit_tag Data),
- _ => go!(self: error; reconsume BeforeAttributeName),
- }},
+ states::AfterAttributeValueQuoted => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName),
+ '/' => go!(self: to SelfClosingStartTag),
+ '>' => go!(self: emit_tag Data),
+ _ => go!(self: error; reconsume BeforeAttributeName),
+ }
+ },
//§ self-closing-start-tag-state
- states::SelfClosingStartTag => loop { match get_char!(self, input) {
- '>' => {
- self.current_tag_self_closing = true;
- go!(self: emit_tag Data);
+ states::SelfClosingStartTag => loop {
+ match get_char!(self, input) {
+ '>' => {
+ self.current_tag_self_closing = true;
+ go!(self: emit_tag Data);
+ },
+ _ => go!(self: error; reconsume BeforeAttributeName),
}
- _ => go!(self: error; reconsume BeforeAttributeName),
- }},
+ },
//§ comment-start-state
- states::CommentStart => loop { match get_char!(self, input) {
- '-' => go!(self: to CommentStartDash),
- '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment),
- '>' => go!(self: error; emit_comment; to Data),
- c => go!(self: push_comment c; to Comment),
- }},
+ states::CommentStart => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: to CommentStartDash),
+ '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment),
+ '>' => go!(self: error; emit_comment; to Data),
+ c => go!(self: push_comment c; to Comment),
+ }
+ },
//§ comment-start-dash-state
- states::CommentStartDash => loop { match get_char!(self, input) {
- '-' => go!(self: to CommentEnd),
- '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
- '>' => go!(self: error; emit_comment; to Data),
- c => go!(self: push_comment '-'; push_comment c; to Comment),
- }},
+ states::CommentStartDash => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: to CommentEnd),
+ '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
+ '>' => go!(self: error; emit_comment; to Data),
+ c => go!(self: push_comment '-'; push_comment c; to Comment),
+ }
+ },
//§ comment-state
- states::Comment => loop { match get_char!(self, input) {
- '-' => go!(self: to CommentEndDash),
- '\0' => go!(self: error; push_comment '\u{fffd}'),
- c => go!(self: push_comment c),
- }},
+ states::Comment => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: to CommentEndDash),
+ '\0' => go!(self: error; push_comment '\u{fffd}'),
+ c => go!(self: push_comment c),
+ }
+ },
//§ comment-end-dash-state
- states::CommentEndDash => loop { match get_char!(self, input) {
- '-' => go!(self: to CommentEnd),
- '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
- c => go!(self: push_comment '-'; push_comment c; to Comment),
- }},
+ states::CommentEndDash => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: to CommentEnd),
+ '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment),
+ c => go!(self: push_comment '-'; push_comment c; to Comment),
+ }
+ },
//§ comment-end-state
- states::CommentEnd => loop { match get_char!(self, input) {
- '>' => go!(self: emit_comment; to Data),
- '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment),
- '!' => go!(self: error; to CommentEndBang),
- '-' => go!(self: error; push_comment '-'),
- c => go!(self: error; append_comment "--"; push_comment c; to Comment),
- }},
+ states::CommentEnd => loop {
+ match get_char!(self, input) {
+ '>' => go!(self: emit_comment; to Data),
+ '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment),
+ '!' => go!(self: error; to CommentEndBang),
+ '-' => go!(self: error; push_comment '-'),
+ c => go!(self: error; append_comment "--"; push_comment c; to Comment),
+ }
+ },
//§ comment-end-bang-state
- states::CommentEndBang => loop { match get_char!(self, input) {
- '-' => go!(self: append_comment "--!"; to CommentEndDash),
- '>' => go!(self: emit_comment; to Data),
- '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment),
- c => go!(self: append_comment "--!"; push_comment c; to Comment),
- }},
+ states::CommentEndBang => loop {
+ match get_char!(self, input) {
+ '-' => go!(self: append_comment "--!"; to CommentEndDash),
+ '>' => go!(self: emit_comment; to Data),
+ '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment),
+ c => go!(self: append_comment "--!"; push_comment c; to Comment),
+ }
+ },
//§ doctype-state
- states::Doctype => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BeforeDoctypeName),
- _ => go!(self: error; reconsume BeforeDoctypeName),
- }},
+ states::Doctype => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName),
+ _ => go!(self: error; reconsume BeforeDoctypeName),
+ }
+ },
//§ before-doctype-name-state
- states::BeforeDoctypeName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '\0' => go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName),
- '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),
- c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase());
+ states::BeforeDoctypeName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '\0' => {
+ go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName)
+ },
+ '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),
+ c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase());
to DoctypeName),
- }},
+ }
+ },
//§ doctype-name-state
- states::DoctypeName => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: clear_temp; to AfterDoctypeName),
- '>' => go!(self: emit_doctype; to Data),
- '\0' => go!(self: error; push_doctype_name '\u{fffd}'),
- c => go!(self: push_doctype_name (c.to_ascii_lowercase())),
- }},
+ states::DoctypeName => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to AfterDoctypeName),
+ '>' => go!(self: emit_doctype; to Data),
+ '\0' => go!(self: error; push_doctype_name '\u{fffd}'),
+ c => go!(self: push_doctype_name (c.to_ascii_lowercase())),
+ }
+ },
//§ after-doctype-name-state
states::AfterDoctypeName => loop {
@@ -1099,84 +1183,114 @@ impl Tokenizer {
match get_char!(self, input) {
'\t' | '\n' | '\x0C' | ' ' => (),
'>' => go!(self: emit_doctype; to Data),
- _ => go!(self: error; force_quirks; to BogusDoctype),
+ _ => go!(self: error; force_quirks; to BogusDoctype),
}
}
},
//§ after-doctype-public-keyword-state after-doctype-system-keyword-state
- states::AfterDoctypeKeyword(kind) => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BeforeDoctypeIdentifier kind),
- '"' => go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
- '\'' => go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
- '>' => go!(self: error; force_quirks; emit_doctype; to Data),
- _ => go!(self: error; force_quirks; to BogusDoctype),
- }},
+ states::AfterDoctypeKeyword(kind) => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind),
+ '"' => {
+ go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind)
+ },
+ '\'' => {
+ go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind)
+ },
+ '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+ _ => go!(self: error; force_quirks; to BogusDoctype),
+ }
+ },
//§ before-doctype-public-identifier-state before-doctype-system-identifier-state
- states::BeforeDoctypeIdentifier(kind) => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
- '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
- '>' => go!(self: error; force_quirks; emit_doctype; to Data),
- _ => go!(self: error; force_quirks; to BogusDoctype),
- }},
+ states::BeforeDoctypeIdentifier(kind) => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind),
+ '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind),
+ '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+ _ => go!(self: error; force_quirks; to BogusDoctype),
+ }
+ },
//§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state
- states::DoctypeIdentifierDoubleQuoted(kind) => loop { match get_char!(self, input) {
- '"' => go!(self: to AfterDoctypeIdentifier kind),
- '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
- '>' => go!(self: error; force_quirks; emit_doctype; to Data),
- c => go!(self: push_doctype_id kind c),
- }},
+ states::DoctypeIdentifierDoubleQuoted(kind) => loop {
+ match get_char!(self, input) {
+ '"' => go!(self: to AfterDoctypeIdentifier kind),
+ '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
+ '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+ c => go!(self: push_doctype_id kind c),
+ }
+ },
//§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state
- states::DoctypeIdentifierSingleQuoted(kind) => loop { match get_char!(self, input) {
- '\'' => go!(self: to AfterDoctypeIdentifier kind),
- '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
- '>' => go!(self: error; force_quirks; emit_doctype; to Data),
- c => go!(self: push_doctype_id kind c),
- }},
+ states::DoctypeIdentifierSingleQuoted(kind) => loop {
+ match get_char!(self, input) {
+ '\'' => go!(self: to AfterDoctypeIdentifier kind),
+ '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'),
+ '>' => go!(self: error; force_quirks; emit_doctype; to Data),
+ c => go!(self: push_doctype_id kind c),
+ }
+ },
//§ after-doctype-public-identifier-state
- states::AfterDoctypeIdentifier(Public) => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' '
- => go!(self: to BetweenDoctypePublicAndSystemIdentifiers),
- '>' => go!(self: emit_doctype; to Data),
- '"' => go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System),
- '\'' => go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System),
- _ => go!(self: error; force_quirks; to BogusDoctype),
- }},
+ states::AfterDoctypeIdentifier(Public) => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => {
+ go!(self: to BetweenDoctypePublicAndSystemIdentifiers)
+ },
+ '>' => go!(self: emit_doctype; to Data),
+ '"' => {
+ go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+ },
+ '\'' => {
+ go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+ },
+ _ => go!(self: error; force_quirks; to BogusDoctype),
+ }
+ },
//§ after-doctype-system-identifier-state
- states::AfterDoctypeIdentifier(System) => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '>' => go!(self: emit_doctype; to Data),
- _ => go!(self: error; to BogusDoctype),
- }},
+ states::AfterDoctypeIdentifier(System) => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '>' => go!(self: emit_doctype; to Data),
+ _ => go!(self: error; to BogusDoctype),
+ }
+ },
//§ between-doctype-public-and-system-identifiers-state
- states::BetweenDoctypePublicAndSystemIdentifiers => loop { match get_char!(self, input) {
- '\t' | '\n' | '\x0C' | ' ' => (),
- '>' => go!(self: emit_doctype; to Data),
- '"' => go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System),
- '\'' => go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System),
- _ => go!(self: error; force_quirks; to BogusDoctype),
- }},
+ states::BetweenDoctypePublicAndSystemIdentifiers => loop {
+ match get_char!(self, input) {
+ '\t' | '\n' | '\x0C' | ' ' => (),
+ '>' => go!(self: emit_doctype; to Data),
+ '"' => {
+ go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
+ },
+ '\'' => {
+ go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
+ },
+ _ => go!(self: error; force_quirks; to BogusDoctype),
+ }
+ },
//§ bogus-doctype-state
- states::BogusDoctype => loop { match get_char!(self, input) {
- '>' => go!(self: emit_doctype; to Data),
- _ => (),
- }},
+ states::BogusDoctype => loop {
+ match get_char!(self, input) {
+ '>' => go!(self: emit_doctype; to Data),
+ _ => (),
+ }
+ },
//§ bogus-comment-state
- states::BogusComment => loop { match get_char!(self, input) {
- '>' => go!(self: emit_comment; to Data),
- '\0' => go!(self: push_comment '\u{fffd}'),
- c => go!(self: push_comment c),
- }},
+ states::BogusComment => loop {
+ match get_char!(self, input) {
+ '>' => go!(self: emit_comment; to Data),
+ '\0' => go!(self: push_comment '\u{fffd}'),
+ c => go!(self: push_comment c),
+ }
+ },
//§ markup-declaration-open-state
states::MarkupDeclarationOpen => loop {
@@ -1185,7 +1299,10 @@ impl Tokenizer {
} else if eat!(self, input, "doctype") {
go!(self: to Doctype);
} else {
- if self.sink.adjusted_current_node_present_but_not_in_html_namespace() {
+ if self
+ .sink
+ .adjusted_current_node_present_but_not_in_html_namespace()
+ {
if eat_exact!(self, input, "[CDATA[") {
go!(self: clear_temp; to CdataSection);
}
@@ -1195,11 +1312,13 @@ impl Tokenizer {
},
//§ cdata-section-state
- states::CdataSection => loop { match get_char!(self, input) {
- ']' => go!(self: to CdataSectionBracket),
- '\0' => go!(self: emit_temp; emit '\0'),
- c => go!(self: push_temp c),
- }},
+ states::CdataSection => loop {
+ match get_char!(self, input) {
+ ']' => go!(self: to CdataSectionBracket),
+ '\0' => go!(self: emit_temp; emit '\0'),
+ c => go!(self: push_temp c),
+ }
+ },
//§ cdata-section-bracket
states::CdataSectionBracket => match get_char!(self, input) {
@@ -1208,12 +1327,13 @@ impl Tokenizer {
},
//§ cdata-section-end
- states::CdataSectionEnd => loop { match get_char!(self, input) {
- ']' => go!(self: push_temp ']'),
- '>' => go!(self: emit_temp; to Data),
- _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection),
- }},
-
+ states::CdataSectionEnd => loop {
+ match get_char!(self, input) {
+ ']' => go!(self: push_temp ']'),
+ '>' => go!(self: emit_temp; to Data),
+ _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection),
+ }
+ },
//§ END
}
}
@@ -1228,7 +1348,7 @@ impl Tokenizer {
char_ref::Done => {
self.process_char_ref(tok.get_result());
return ProcessResult::Continue;
- }
+ },
char_ref::Stuck => ProcessResult::Suspend,
char_ref::Progress => ProcessResult::Continue,
@@ -1239,23 +1359,27 @@ impl Tokenizer {
}
fn process_char_ref(&mut self, char_ref: CharRef) {
- let CharRef { mut chars, mut num_chars } = char_ref;
+ let CharRef {
+ mut chars,
+ mut num_chars,
+ } = char_ref;
if num_chars == 0 {
chars[0] = '&';
num_chars = 1;
}
- for i in 0 .. num_chars {
+ for i in 0..num_chars {
let c = chars[i as usize];
match self.state {
- states::Data | states::RawData(states::Rcdata)
- => go!(self: emit c),
+ states::Data | states::RawData(states::Rcdata) => go!(self: emit c),
- states::AttributeValue(_)
- => go!(self: push_value c),
+ states::AttributeValue(_) => go!(self: push_value c),
- _ => panic!("state {:?} should not be reachable in process_char_ref", self.state),
+ _ => panic!(
+ "state {:?} should not be reachable in process_char_ref",
+ self.state
+ ),
}
}
}
@@ -1270,7 +1394,7 @@ impl Tokenizer {
Some(mut tok) => {
tok.end_of_file(self, &mut input);
self.process_char_ref(tok.get_result());
- }
+ },
}
// Process all remaining buffered input.
@@ -1295,11 +1419,14 @@ impl Tokenizer {
}
fn dump_profile(&self) {
- let mut results: Vec<(states::State, u64)>
- = self.state_profile.iter().map(|(s, t)| (*s, *t)).collect();
+ let mut results: Vec<(states::State, u64)> =
+ self.state_profile.iter().map(|(s, t)| (*s, *t)).collect();
results.sort_by(|&(_, x), &(_, y)| y.cmp(&x));
- let total: u64 = results.iter().map(|&(_, t)| t).fold(0, ::std::ops::Add::add);
+ let total: u64 = results
+ .iter()
+ .map(|&(_, t)| t)
+ .fold(0, ::std::ops::Add::add);
println!("\nTokenizer profile, in nanoseconds");
println!("\n{:12} total in token sink", self.time_in_sink);
println!("\n{:12} total in tokenizer", total);
@@ -1313,76 +1440,81 @@ impl Tokenizer {
fn eof_step(&mut self) -> ProcessResult {
debug!("processing EOF in state {:?}", self.state);
match self.state {
- states::Data | states::RawData(Rcdata) | states::RawData(Rawtext)
- | states::RawData(ScriptData) | states::Plaintext
- => go!(self: eof),
-
- states::TagName | states::RawData(ScriptDataEscaped(_))
- | states::BeforeAttributeName | states::AttributeName
- | states::AfterAttributeName | states::BeforeAttributeValue
- | states::AttributeValue(_) | states::AfterAttributeValueQuoted
- | states::SelfClosingStartTag | states::ScriptDataEscapedDash(_)
- | states::ScriptDataEscapedDashDash(_)
- => go!(self: error_eof; to Data),
-
- states::TagOpen
- => go!(self: error_eof; emit '<'; to Data),
-
- states::EndTagOpen
- => go!(self: error_eof; emit '<'; emit '/'; to Data),
-
- states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped))
- => go!(self: to RawData ScriptDataEscaped DoubleEscaped),
+ states::Data |
+ states::RawData(Rcdata) |
+ states::RawData(Rawtext) |
+ states::RawData(ScriptData) |
+ states::Plaintext => go!(self: eof),
+
+ states::TagName |
+ states::RawData(ScriptDataEscaped(_)) |
+ states::BeforeAttributeName |
+ states::AttributeName |
+ states::AfterAttributeName |
+ states::BeforeAttributeValue |
+ states::AttributeValue(_) |
+ states::AfterAttributeValueQuoted |
+ states::SelfClosingStartTag |
+ states::ScriptDataEscapedDash(_) |
+ states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data),
+
+ states::TagOpen => go!(self: error_eof; emit '<'; to Data),
+
+ states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data),
+
+ states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => {
+ go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+ },
- states::RawLessThanSign(kind)
- => go!(self: emit '<'; to RawData kind),
+ states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind),
- states::RawEndTagOpen(kind)
- => go!(self: emit '<'; emit '/'; to RawData kind),
+ states::RawEndTagOpen(kind) => go!(self: emit '<'; emit '/'; to RawData kind),
- states::RawEndTagName(kind)
- => go!(self: emit '<'; emit '/'; emit_temp; to RawData kind),
+ states::RawEndTagName(kind) => {
+ go!(self: emit '<'; emit '/'; emit_temp; to RawData kind)
+ },
- states::ScriptDataEscapeStart(kind)
- => go!(self: to RawData ScriptDataEscaped kind),
+ states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind),
- states::ScriptDataEscapeStartDash
- => go!(self: to RawData ScriptData),
+ states::ScriptDataEscapeStartDash => go!(self: to RawData ScriptData),
- states::ScriptDataDoubleEscapeEnd
- => go!(self: to RawData ScriptDataEscaped DoubleEscaped),
+ states::ScriptDataDoubleEscapeEnd => {
+ go!(self: to RawData ScriptDataEscaped DoubleEscaped)
+ },
- states::CommentStart | states::CommentStartDash
- | states::Comment | states::CommentEndDash
- | states::CommentEnd | states::CommentEndBang
- => go!(self: error_eof; emit_comment; to Data),
+ states::CommentStart |
+ states::CommentStartDash |
+ states::Comment |
+ states::CommentEndDash |
+ states::CommentEnd |
+ states::CommentEndBang => go!(self: error_eof; emit_comment; to Data),
- states::Doctype | states::BeforeDoctypeName
- => go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data),
+ states::Doctype | states::BeforeDoctypeName => {
+ go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data)
+ },
- states::DoctypeName | states::AfterDoctypeName | states::AfterDoctypeKeyword(_)
- | states::BeforeDoctypeIdentifier(_) | states::DoctypeIdentifierDoubleQuoted(_)
- | states::DoctypeIdentifierSingleQuoted(_) | states::AfterDoctypeIdentifier(_)
- | states::BetweenDoctypePublicAndSystemIdentifiers
- => go!(self: error_eof; force_quirks; emit_doctype; to Data),
+ states::DoctypeName |
+ states::AfterDoctypeName |
+ states::AfterDoctypeKeyword(_) |
+ states::BeforeDoctypeIdentifier(_) |
+ states::DoctypeIdentifierDoubleQuoted(_) |
+ states::DoctypeIdentifierSingleQuoted(_) |
+ states::AfterDoctypeIdentifier(_) |
+ states::BetweenDoctypePublicAndSystemIdentifiers => {
+ go!(self: error_eof; force_quirks; emit_doctype; to Data)
+ },
- states::BogusDoctype
- => go!(self: emit_doctype; to Data),
+ states::BogusDoctype => go!(self: emit_doctype; to Data),
- states::BogusComment
- => go!(self: emit_comment; to Data),
+ states::BogusComment => go!(self: emit_comment; to Data),
- states::MarkupDeclarationOpen
- => go!(self: error; to BogusComment),
+ states::MarkupDeclarationOpen => go!(self: error; to BogusComment),
- states::CdataSection
- => go!(self: emit_temp; error_eof; to Data),
+ states::CdataSection => go!(self: emit_temp; error_eof; to Data),
- states::CdataSectionBracket
- => go!(self: push_temp ']'; to CdataSection),
+ states::CdataSectionBracket => go!(self: push_temp ']'; to CdataSection),
- states::CdataSectionEnd
- => go!(self: push_temp ']'; push_temp ']'; to CdataSection),
+ states::CdataSectionEnd => go!(self: push_temp ']'; push_temp ']'; to CdataSection),
}
}
}
@@ -1391,18 +1523,18 @@ impl Tokenizer {
#[allow(non_snake_case)]
mod test {
use super::option_push; // private items
- use tendril::{StrTendril, SliceExt};
+ use tendril::{SliceExt, StrTendril};
- use super::{TokenSink, Tokenizer, TokenizerOpts, TokenSinkResult};
+ use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
- use super::interface::{Token, TagToken};
- use super::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError};
- use super::interface::{TagKind, StartTag, EndTag, Tag};
+ use super::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
+ use super::interface::{EndTag, StartTag, Tag, TagKind};
+ use super::interface::{TagToken, Token};
- use markup5ever::buffer_queue::{BufferQueue};
+ use markup5ever::buffer_queue::BufferQueue;
use std::mem::replace;
- use {LocalName};
+ use LocalName;
// LinesMatch implements the TokenSink trait. It is used for testing to see
// if current_line is being updated when process_token is called. The lines
@@ -1416,9 +1548,9 @@ mod test {
impl LinesMatch {
fn new() -> LinesMatch {
LinesMatch {
- tokens: vec!(),
+ tokens: vec![],
current_str: StrTendril::new(),
- lines: vec!(),
+ lines: vec![],
}
}
@@ -1433,27 +1565,28 @@ mod test {
self.tokens.push(CharacterTokens(s));
}
}
-
}
impl TokenSink for LinesMatch {
-
type Handle = ();
- fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult {
-
+ fn process_token(
+ &mut self,
+ token: Token,
+ line_number: u64,
+ ) -> TokenSinkResult {
match token {
CharacterTokens(b) => {
self.current_str.push_slice(&b);
- }
+ },
NullCharacterToken => {
self.current_str.push_char('\0');
- }
+ },
ParseError(_) => {
panic!("unexpected parse error");
- }
+ },
TagToken(mut t) => {
// The spec seems to indicate that one can emit
@@ -1462,12 +1595,12 @@ mod test {
match t.kind {
EndTag => {
t.self_closing = false;
- t.attrs = vec!();
- }
+ t.attrs = vec![];
+ },
_ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
}
self.push(TagToken(t), line_number);
- }
+ },
EOFToken => (),
@@ -1494,10 +1627,11 @@ mod test {
// Create a tag token
fn create_tag(token: StrTendril, tagkind: TagKind) -> Token {
let name = LocalName::from(&*token);
- let token = TagToken(Tag { kind: tagkind,
+ let token = TagToken(Tag {
+ kind: tagkind,
name: name,
self_closing: false,
- attrs: vec!(),
+ attrs: vec![],
});
token
}
@@ -1532,12 +1666,18 @@ mod test {
initial_state: None,
last_start_tag_name: None,
};
- let vector = vec![StrTendril::from("\n"), StrTendril::from("\n"),
- StrTendril::from("\n"), StrTendril::from("\n")];
- let expected = vec![(create_tag(StrTendril::from("a"), StartTag), 1),
+ let vector = vec![
+ StrTendril::from("\n"),
+ StrTendril::from("\n"),
+ StrTendril::from("\n"),
+ StrTendril::from("\n"),
+ ];
+ let expected = vec![
+ (create_tag(StrTendril::from("a"), StartTag), 1),
(create_tag(StrTendril::from("b"), StartTag), 2),
(create_tag(StrTendril::from("b"), EndTag), 3),
- (create_tag(StrTendril::from("a"), EndTag), 4)];
+ (create_tag(StrTendril::from("a"), EndTag), 4),
+ ];
let results = tokenize(vector, opts);
assert_eq!(results, expected);
}
@@ -1551,12 +1691,18 @@ mod test {
initial_state: None,
last_start_tag_name: None,
};
- let vector = vec![StrTendril::from("\r\n"), StrTendril::from("\r\n"),
- StrTendril::from("\r\n"), StrTendril::from("\r\n")];
- let expected = vec![(create_tag(StrTendril::from("a"), StartTag), 1),
+ let vector = vec![
+ StrTendril::from("\r\n"),
+ StrTendril::from("\r\n"),
+ StrTendril::from("\r\n"),
+ StrTendril::from("\r\n"),
+ ];
+ let expected = vec![
+ (create_tag(StrTendril::from("a"), StartTag), 1),
(create_tag(StrTendril::from("b"), StartTag), 2),
(create_tag(StrTendril::from("b"), EndTag), 3),
- (create_tag(StrTendril::from("a"), EndTag), 4)];
+ (create_tag(StrTendril::from("a"), EndTag), 4),
+ ];
let results = tokenize(vector, opts);
assert_eq!(results, expected);
}
diff --git a/html5ever/src/tokenizer/states.rs b/html5ever/src/tokenizer/states.rs
index 142b48c7..d455e9a8 100644
--- a/html5ever/src/tokenizer/states.rs
+++ b/html5ever/src/tokenizer/states.rs
@@ -12,10 +12,10 @@
//! This is public for use by the tokenizer tests. Other library
//! users should not have to care about this.
-pub use self::ScriptEscapeKind::*;
+pub use self::AttrValueKind::*;
pub use self::DoctypeIdKind::*;
pub use self::RawKind::*;
-pub use self::AttrValueKind::*;
+pub use self::ScriptEscapeKind::*;
pub use self::State::*;
#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
diff --git a/html5ever/src/tree_builder/data.rs b/html5ever/src/tree_builder/data.rs
index ae181591..477e3fb2 100644
--- a/html5ever/src/tree_builder/data.rs
+++ b/html5ever/src/tree_builder/data.rs
@@ -7,7 +7,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
+use interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
use tendril::StrTendril;
use tokenizer::Doctype;
@@ -75,9 +75,8 @@ static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[
"html",
];
-static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = &[
- "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd",
-];
+static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] =
+ &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"];
static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[
"-//w3c//dtd xhtml 1.0 frameset//",
@@ -110,15 +109,30 @@ pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool
let system = opt_tendril_as_slice(&doctype.system_id);
let err = match (name, public, system) {
- (Some("html"), None, None)
- | (Some("html"), None, Some("about:legacy-compat"))
- | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None)
- | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), Some("http://www.w3.org/TR/REC-html40/strict.dtd"))
- | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None)
- | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), Some("http://www.w3.org/TR/html4/strict.dtd"))
- | (Some("html"), Some("-//W3C//DTD XHTML 1.0 Strict//EN"), Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"))
- | (Some("html"), Some("-//W3C//DTD XHTML 1.1//EN"), Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"))
- => false,
+ (Some("html"), None, None) |
+ (Some("html"), None, Some("about:legacy-compat")) |
+ (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD HTML 4.0//EN"),
+ Some("http://www.w3.org/TR/REC-html40/strict.dtd"),
+ ) |
+ (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD HTML 4.01//EN"),
+ Some("http://www.w3.org/TR/html4/strict.dtd"),
+ ) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD XHTML 1.0 Strict//EN"),
+ Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
+ ) |
+ (
+ Some("html"),
+ Some("-//W3C//DTD XHTML 1.1//EN"),
+ Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"),
+ ) => false,
_ => true,
};
diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs
index ee09e865..e1f664b3 100644
--- a/html5ever/src/tree_builder/mod.rs
+++ b/html5ever/src/tree_builder/mod.rs
@@ -11,37 +11,38 @@
//! The HTML5 tree builder.
-pub use interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
-pub use interface::{NodeOrText, AppendNode, AppendText, Attribute};
-pub use interface::{TreeSink, Tracer, NextParserState, create_element, ElementFlags};
+pub use interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink};
+pub use interface::{AppendNode, AppendText, Attribute, NodeOrText};
+pub use interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
use self::types::*;
-use {ExpandedName, QualName, LocalName, Namespace};
use tendril::StrTendril;
+use {ExpandedName, LocalName, Namespace, QualName};
use tokenizer;
-use tokenizer::{Doctype, StartTag, Tag, EndTag, TokenSink, TokenSinkResult};
use tokenizer::states as tok_state;
+use tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};
use util::str::is_ascii_whitespace;
-use std::{slice, fmt};
use std::borrow::Cow::Borrowed;
use std::collections::VecDeque;
use std::default::Default;
-use std::iter::{Rev, Enumerate};
+use std::iter::{Enumerate, Rev};
use std::mem::replace;
+use std::{fmt, slice};
+use log::Level;
use tokenizer::states::{RawData, RawKind};
-use tree_builder::types::*;
use tree_builder::tag_sets::*;
+use tree_builder::types::*;
use util::str::to_escaped_string;
-use log::Level;
pub use self::PushFlag::*;
-#[macro_use] mod tag_sets;
+#[macro_use]
+mod tag_sets;
mod data;
mod types;
@@ -124,7 +125,6 @@ pub struct TreeBuilder {
/// Form element pointer.
form_elem: Option,
//§ END
-
/// Frameset-ok flag.
frameset_ok: bool,
@@ -139,7 +139,6 @@ pub struct TreeBuilder {
/// Track current line
current_line: u64,
-
// WARNING: If you add new fields that contain Handles, you
// must add them to trace_handles() below to preserve memory
// safety!
@@ -148,8 +147,9 @@ pub struct TreeBuilder {
}
impl TreeBuilder
- where Handle: Clone,
- Sink: TreeSink,
+where
+ Handle: Clone,
+ Sink: TreeSink,
{
/// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
///
@@ -161,12 +161,12 @@ impl TreeBuilder
sink: sink,
mode: Initial,
orig_mode: None,
- template_modes: vec!(),
- pending_table_text: vec!(),
+ template_modes: vec![],
+ pending_table_text: vec![],
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
- open_elems: vec!(),
- active_formatting: vec!(),
+ open_elems: vec![],
+ active_formatting: vec![],
head_elem: None,
form_elem: None,
frameset_ok: true,
@@ -181,24 +181,29 @@ impl TreeBuilder
/// This is for parsing fragments.
///
/// The tree builder is also a `TokenSink`.
- pub fn new_for_fragment(mut sink: Sink,
- context_elem: Handle,
- form_elem: Option,
- opts: TreeBuilderOpts) -> TreeBuilder {
+ pub fn new_for_fragment(
+ mut sink: Sink,
+ context_elem: Handle,
+ form_elem: Option,
+ opts: TreeBuilderOpts,
+ ) -> TreeBuilder {
let doc_handle = sink.get_document();
- let context_is_template =
- sink.elem_name(&context_elem) == expanded_name!(html "template");
+ let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template");
let mut tb = TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
- template_modes: if context_is_template { vec![InTemplate] } else { vec![] },
- pending_table_text: vec!(),
+ template_modes: if context_is_template {
+ vec![InTemplate]
+ } else {
+ vec![]
+ },
+ pending_table_text: vec![],
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
- open_elems: vec!(),
- active_formatting: vec!(),
+ open_elems: vec![],
+ active_formatting: vec![],
head_elem: None,
form_elem: form_elem,
frameset_ok: true,
@@ -212,7 +217,7 @@ impl TreeBuilder
// 5. Let root be a new html element with no attributes.
// 6. Append the element root to the Document node created above.
// 7. Set up the parser's stack of open elements so that it contains just the single element root.
- tb.create_root(vec!());
+ tb.create_root(vec![]);
// 10. Reset the parser's insertion mode appropriately.
tb.mode = tb.reset_insertion_mode();
@@ -224,32 +229,40 @@ impl TreeBuilder
pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
let elem = self.context_elem.as_ref().expect("no context element");
let name = match self.sink.elem_name(elem) {
- ExpandedName { ns: &ns!(html), local } => local,
- _ => return tok_state::Data
+ ExpandedName {
+ ns: &ns!(html),
+ local,
+ } => local,
+ _ => return tok_state::Data,
};
match *name {
local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
- local_name!("style") | local_name!("xmp") | local_name!("iframe")
- | local_name!("noembed") | local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
+ local_name!("style") |
+ local_name!("xmp") |
+ local_name!("iframe") |
+ local_name!("noembed") |
+ local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
local_name!("script") => tok_state::RawData(tok_state::ScriptData),
- local_name!("noscript") => if self.opts.scripting_enabled {
- tok_state::RawData(tok_state::Rawtext)
- } else {
- tok_state::Data
+ local_name!("noscript") => {
+ if self.opts.scripting_enabled {
+ tok_state::RawData(tok_state::Rawtext)
+ } else {
+ tok_state::Data
+ }
},
local_name!("plaintext") => tok_state::Plaintext,
- _ => tok_state::Data
+ _ => tok_state::Data,
}
}
/// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
/// internal state. This is intended to support garbage-collected DOMs.
- pub fn trace_handles(&self, tracer: &Tracer) {
+ pub fn trace_handles(&self, tracer: &Tracer) {
tracer.trace_handle(&self.doc_handle);
for e in &self.open_elems {
tracer.trace_handle(e);
@@ -287,7 +300,7 @@ impl TreeBuilder
ns!(html) => print!(" {}", name.local),
_ => panic!(),
}
- }
+ },
}
}
println!("");
@@ -295,7 +308,11 @@ impl TreeBuilder
fn debug_step(&self, mode: InsertionMode, token: &Token) {
if log_enabled!(Level::Debug) {
- debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode);
+ debug!(
+ "processing {} in insertion mode {:?}",
+ to_escaped_string(token),
+ mode
+ );
}
}
@@ -305,8 +322,14 @@ impl TreeBuilder
let mut more_tokens = VecDeque::new();
loop {
- let should_have_acknowledged_self_closing_flag =
- matches!(token, TagToken(Tag { self_closing: true, kind: StartTag, .. }));
+ let should_have_acknowledged_self_closing_flag = matches!(
+ token,
+ TagToken(Tag {
+ self_closing: true,
+ kind: StartTag,
+ ..
+ })
+ );
let result = if self.is_foreign(&token) {
self.step_foreign(token)
} else {
@@ -316,20 +339,27 @@ impl TreeBuilder
match result {
Done => {
if should_have_acknowledged_self_closing_flag {
- self.sink.parse_error(Borrowed("Unacknowledged self-closing tag"));
+ self.sink
+ .parse_error(Borrowed("Unacknowledged self-closing tag"));
}
- token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue);
- }
+ token = unwrap_or_return!(
+ more_tokens.pop_front(),
+ tokenizer::TokenSinkResult::Continue
+ );
+ },
DoneAckSelfClosing => {
- token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue);
- }
+ token = unwrap_or_return!(
+ more_tokens.pop_front(),
+ tokenizer::TokenSinkResult::Continue
+ );
+ },
Reprocess(m, t) => {
self.mode = m;
token = t;
- }
+ },
ReprocessForeign(t) => {
token = t;
- }
+ },
SplitWhitespace(mut buf) => {
let p = buf.pop_front_char_run(is_ascii_whitespace);
let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue);
@@ -339,19 +369,19 @@ impl TreeBuilder
if buf.len32() > 0 {
more_tokens.push_back(CharacterTokens(NotSplit, buf));
}
- }
+ },
Script(node) => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::Script(node);
- }
+ },
ToPlaintext => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::Plaintext;
- }
+ },
ToRawData(k) => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::RawData(k);
- }
+ },
}
}
}
@@ -362,9 +392,10 @@ impl TreeBuilder
}
/// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node
- fn appropriate_place_for_insertion(&mut self,
- override_target: Option)
- -> InsertionPoint {
+ fn appropriate_place_for_insertion(
+ &mut self,
+ override_target: Option,
+ ) -> InsertionPoint {
use self::tag_sets::*;
declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
@@ -401,22 +432,28 @@ impl TreeBuilder
match insertion_point {
LastChild(parent) => self.sink.append(&parent, child),
BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child),
- TableFosterParenting { element, prev_element } => self.sink.append_based_on_parent_node(
- &element,
- &prev_element,
- child),
+ TableFosterParenting {
+ element,
+ prev_element,
+ } => self
+ .sink
+ .append_based_on_parent_node(&element, &prev_element, child),
}
}
}
-impl TokenSink
- for TreeBuilder
- where Handle: Clone,
- Sink: TreeSink,
+impl TokenSink for TreeBuilder
+where
+ Handle: Clone,
+ Sink: TreeSink,
{
type Handle = Handle;
- fn process_token(&mut self, token: tokenizer::Token, line_number: u64) -> TokenSinkResult {
+ fn process_token(
+ &mut self,
+ token: tokenizer::Token,
+ line_number: u64,
+ ) -> TokenSinkResult {
if line_number != self.current_line {
self.sink.set_current_line(line_number);
}
@@ -427,34 +464,45 @@ impl TokenSink
tokenizer::ParseError(e) => {
self.sink.parse_error(e);
return tokenizer::TokenSinkResult::Continue;
- }
+ },
+
+ tokenizer::DoctypeToken(dt) => {
+ if self.mode == Initial {
+ let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
+ if err {
+ self.sink.parse_error(format_if!(
+ self.opts.exact_errors,
+ "Bad DOCTYPE",
+ "Bad DOCTYPE: {:?}",
+ dt
+ ));
+ }
+ let Doctype {
+ name,
+ public_id,
+ system_id,
+ force_quirks: _,
+ } = dt;
+ if !self.opts.drop_doctype {
+ self.sink.append_doctype_to_document(
+ name.unwrap_or(StrTendril::new()),
+ public_id.unwrap_or(StrTendril::new()),
+ system_id.unwrap_or(StrTendril::new()),
+ );
+ }
+ self.set_quirks_mode(quirk);
- tokenizer::DoctypeToken(dt) => if self.mode == Initial {
- let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
- if err {
+ self.mode = BeforeHtml;
+ return tokenizer::TokenSinkResult::Continue;
+ } else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
- "Bad DOCTYPE",
- "Bad DOCTYPE: {:?}", dt));
- }
- let Doctype { name, public_id, system_id, force_quirks: _ } = dt;
- if !self.opts.drop_doctype {
- self.sink.append_doctype_to_document(
- name.unwrap_or(StrTendril::new()),
- public_id.unwrap_or(StrTendril::new()),
- system_id.unwrap_or(StrTendril::new())
- );
+ "DOCTYPE in body",
+ "DOCTYPE in insertion mode {:?}",
+ self.mode
+ ));
+ return tokenizer::TokenSinkResult::Continue;
}
- self.set_quirks_mode(quirk);
-
- self.mode = BeforeHtml;
- return tokenizer::TokenSinkResult::Continue;
- } else {
- self.sink.parse_error(format_if!(
- self.opts.exact_errors,
- "DOCTYPE in body",
- "DOCTYPE in insertion mode {:?}", self.mode));
- return tokenizer::TokenSinkResult::Continue;
},
tokenizer::TagToken(x) => TagToken(x),
@@ -470,7 +518,7 @@ impl TokenSink
return tokenizer::TokenSinkResult::Continue;
}
CharacterTokens(NotSplit, x)
- }
+ },
};
self.process_to_completion(token)
@@ -484,12 +532,12 @@ impl TokenSink
fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
!self.open_elems.is_empty() &&
- self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
+ self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
}
}
pub fn html_elem(open_elems: &[Handle]) -> &Handle {
- &open_elems[0]
+ &open_elems[0]
}
pub struct ActiveFormattingIter<'a, Handle: 'a> {
@@ -530,19 +578,23 @@ macro_rules! qualname {
ns: ns!($ns),
local: local_name!($local),
}
- }
+ };
}
#[doc(hidden)]
impl TreeBuilder
- where Handle: Clone,
- Sink: TreeSink,
+where
+ Handle: Clone,
+ Sink: TreeSink,
{
fn unexpected(&mut self, _thing: &T) -> ProcessResult {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Unexpected token",
- "Unexpected token {} in insertion mode {:?}", to_escaped_string(_thing), self.mode));
+ "Unexpected token {} in insertion mode {:?}",
+ to_escaped_string(_thing),
+ self.mode
+ ));
Done
}
@@ -559,14 +611,10 @@ impl TreeBuilder
}
fn position_in_active_formatting(&self, element: &Handle) -> Option {
- self.active_formatting
- .iter()
- .position(|n| {
- match n {
- &Marker => false,
- &Element(ref handle, _) => self.sink.same_node(handle, element)
- }
- })
+ self.active_formatting.iter().position(|n| match n {
+ &Marker => false,
+ &Element(ref handle, _) => self.sink.same_node(handle, element),
+ })
}
fn set_quirks_mode(&mut self, mode: QuirksMode) {
@@ -610,7 +658,8 @@ impl TreeBuilder
}
fn current_node_in(&self, set: TagSet) -> bool
- where TagSet: Fn(ExpandedName) -> bool
+ where
+ TagSet: Fn(ExpandedName) -> bool,
{
set(self.sink.elem_name(self.current_node()))
}
@@ -624,7 +673,10 @@ impl TreeBuilder
fn adoption_agency(&mut self, subject: LocalName) {
// 1.
if self.current_node_named(subject.clone()) {
- if self.position_in_active_formatting(self.current_node()).is_none() {
+ if self
+ .position_in_active_formatting(self.current_node())
+ .is_none()
+ {
self.pop();
return;
}
@@ -639,47 +691,49 @@ impl