Upgrade pulldown_cmark to 0.6.1

This commit is contained in:
Marcus Klaas de Vries 2019-11-11 20:25:38 +01:00
parent 1f505c2b2e
commit 2a3088422a
6 changed files with 245 additions and 219 deletions

379
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@ lazy_static = "1.0"
log = "0.4" log = "0.4"
memchr = "2.0" memchr = "2.0"
open = "1.1" open = "1.1"
pulldown-cmark = "0.5" pulldown-cmark = "0.6"
regex = "1.0.0" regex = "1.0.0"
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"

View File

@ -153,7 +153,8 @@ impl From<Link> for SummaryItem {
/// > match the following regex: "[^<>\n[]]+". /// > match the following regex: "[^<>\n[]]+".
struct SummaryParser<'a> { struct SummaryParser<'a> {
src: &'a str, src: &'a str,
stream: pulldown_cmark::Parser<'a>, stream: pulldown_cmark::OffsetIter<'a>,
offset: usize,
} }
/// Reads `Events` from the provided stream until the corresponding /// Reads `Events` from the provided stream until the corresponding
@ -174,7 +175,7 @@ macro_rules! collect_events {
let mut events = Vec::new(); let mut events = Vec::new();
loop { loop {
let event = $stream.next(); let event = $stream.next().map(|(ev, _range)| ev);
trace!("Next event: {:?}", event); trace!("Next event: {:?}", event);
match event { match event {
@ -196,23 +197,22 @@ macro_rules! collect_events {
impl<'a> SummaryParser<'a> { impl<'a> SummaryParser<'a> {
fn new(text: &str) -> SummaryParser<'_> { fn new(text: &str) -> SummaryParser<'_> {
let pulldown_parser = pulldown_cmark::Parser::new(text); let pulldown_parser = pulldown_cmark::Parser::new(text).into_offset_iter();
SummaryParser { SummaryParser {
src: text, src: text,
stream: pulldown_parser, stream: pulldown_parser,
offset: 0,
} }
} }
/// Get the current line and column to give the user more useful error /// Get the current line and column to give the user more useful error
/// messages. /// messages.
fn current_location(&self) -> (usize, usize) { fn current_location(&self) -> (usize, usize) {
let byte_offset = self.stream.get_offset(); let previous_text = self.src[..self.offset].as_bytes();
let previous_text = self.src[..byte_offset].as_bytes();
let line = Memchr::new(b'\n', previous_text).count() + 1; let line = Memchr::new(b'\n', previous_text).count() + 1;
let start_of_line = memchr::memrchr(b'\n', previous_text).unwrap_or(0); let start_of_line = memchr::memrchr(b'\n', previous_text).unwrap_or(0);
let col = self.src[start_of_line..byte_offset].chars().count(); let col = self.src[start_of_line..self.offset].chars().count();
(line, col) (line, col)
} }
@ -263,7 +263,7 @@ impl<'a> SummaryParser<'a> {
let link = self.parse_link(href.to_string())?; let link = self.parse_link(href.to_string())?;
items.push(SummaryItem::Link(link)); items.push(SummaryItem::Link(link));
} }
Some(Event::Start(Tag::Rule)) => items.push(SummaryItem::Separator), Some(Event::Rule) => items.push(SummaryItem::Separator),
Some(_) => {} Some(_) => {}
None => break, None => break,
} }
@ -319,9 +319,6 @@ impl<'a> SummaryParser<'a> {
break; break;
} }
Some(Event::Start(other_tag)) => { Some(Event::Start(other_tag)) => {
if other_tag == Tag::Rule {
items.push(SummaryItem::Separator);
}
trace!("Skipping contents of {:?}", other_tag); trace!("Skipping contents of {:?}", other_tag);
// Skip over the contents of this tag // Skip over the contents of this tag
@ -337,6 +334,14 @@ impl<'a> SummaryParser<'a> {
break; break;
} }
} }
Some(Event::Rule) => {
items.push(SummaryItem::Separator);
if let Some(Event::Start(Tag::List(..))) = self.next_event() {
continue;
} else {
break;
}
}
Some(_) => { Some(_) => {
// something else... ignore // something else... ignore
continue; continue;
@ -352,7 +357,10 @@ impl<'a> SummaryParser<'a> {
} }
fn next_event(&mut self) -> Option<Event<'a>> { fn next_event(&mut self) -> Option<Event<'a>> {
let next = self.stream.next(); let next = self.stream.next().map(|(ev, range)| {
self.offset = range.start;
ev
});
trace!("Next event: {:?}", next); trace!("Next event: {:?}", next);
next next
@ -431,10 +439,10 @@ impl<'a> SummaryParser<'a> {
/// Try to parse the title line. /// Try to parse the title line.
fn parse_title(&mut self) -> Option<String> { fn parse_title(&mut self) -> Option<String> {
if let Some(Event::Start(Tag::Header(1))) = self.next_event() { if let Some(Event::Start(Tag::Heading(1))) = self.next_event() {
debug!("Found a h1 in the SUMMARY"); debug!("Found a h1 in the SUMMARY");
let tags = collect_events!(self.stream, end Tag::Header(1)); let tags = collect_events!(self.stream, end Tag::Heading(1));
Some(stringify_events(tags)) Some(stringify_events(tags))
} else { } else {
None None
@ -629,7 +637,7 @@ mod tests {
let _ = parser.stream.next(); // skip past start of paragraph let _ = parser.stream.next(); // skip past start of paragraph
let href = match parser.stream.next() { let href = match parser.stream.next() {
Some(Event::Start(Tag::Link(_type, href, _title))) => href.to_string(), Some((Event::Start(Tag::Link(_type, href, _title)), _range)) => href.to_string(),
other => panic!("Unreachable, {:?}", other), other => panic!("Unreachable, {:?}", other),
}; };

View File

@ -150,7 +150,7 @@ impl HelperDef for RenderToc {
// filter all events that are not inline code blocks // filter all events that are not inline code blocks
let parser = Parser::new(name).filter(|event| match *event { let parser = Parser::new(name).filter(|event| match *event {
Event::Code(_) | Event::InlineHtml(_) | Event::Text(_) => true, Event::Code(_) | Event::Html(_) | Event::Text(_) => true,
_ => false, _ => false,
}); });

View File

@ -81,22 +81,21 @@ fn render_item(
.chain_err(|| "Could not convert HTML path to str")?; .chain_err(|| "Could not convert HTML path to str")?;
let anchor_base = utils::fs::normalize_path(filepath); let anchor_base = utils::fs::normalize_path(filepath);
let p = utils::new_cmark_parser(&chapter.content); let mut p = utils::new_cmark_parser(&chapter.content).peekable();
let mut in_header = false; let mut in_heading = false;
let max_section_depth = i32::from(search_config.heading_split_level); let max_section_depth = u32::from(search_config.heading_split_level);
let mut section_id = None; let mut section_id = None;
let mut heading = String::new(); let mut heading = String::new();
let mut body = String::new(); let mut body = String::new();
let mut html_block = String::new();
let mut breadcrumbs = chapter.parent_names.clone(); let mut breadcrumbs = chapter.parent_names.clone();
let mut footnote_numbers = HashMap::new(); let mut footnote_numbers = HashMap::new();
for event in p { while let Some(event) = p.next() {
match event { match event {
Event::Start(Tag::Header(i)) if i <= max_section_depth => { Event::Start(Tag::Heading(i)) if i <= max_section_depth => {
if !heading.is_empty() { if !heading.is_empty() {
// Section finished, the next header is following now // Section finished, the next heading is following now
// Write the data to the index, and clear it for the next section // Write the data to the index, and clear it for the next section
add_doc( add_doc(
index, index,
@ -111,10 +110,10 @@ fn render_item(
breadcrumbs.pop(); breadcrumbs.pop();
} }
in_header = true; in_heading = true;
} }
Event::End(Tag::Header(i)) if i <= max_section_depth => { Event::End(Tag::Heading(i)) if i <= max_section_depth => {
in_header = false; in_heading = false;
section_id = Some(utils::id_from_content(&heading)); section_id = Some(utils::id_from_content(&heading));
breadcrumbs.push(heading.clone()); breadcrumbs.push(heading.clone());
} }
@ -123,31 +122,34 @@ fn render_item(
footnote_numbers.entry(name).or_insert(number); footnote_numbers.entry(name).or_insert(number);
} }
Event::Html(html) => { Event::Html(html) => {
html_block.push_str(&html); let mut html_block = html.into_string();
}
Event::End(Tag::HtmlBlock) => { // As of pulldown_cmark 0.6, html events are no longer contained
// in an HtmlBlock tag. We must collect consecutive Html events
// into a block ourselves.
while let Some(Event::Html(html)) = p.peek() {
html_block.push_str(&html);
p.next();
}
body.push_str(&clean_html(&html_block)); body.push_str(&clean_html(&html_block));
html_block.clear();
} }
Event::Start(_) | Event::End(_) | Event::SoftBreak | Event::HardBreak => { Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
// Insert spaces where HTML output would usually seperate text // Insert spaces where HTML output would usually seperate text
// to ensure words don't get merged together // to ensure words don't get merged together
if in_header { if in_heading {
heading.push(' '); heading.push(' ');
} else { } else {
body.push(' '); body.push(' ');
} }
} }
Event::Text(text) | Event::Code(text) => { Event::Text(text) | Event::Code(text) => {
if in_header { if in_heading {
heading.push_str(&text); heading.push_str(&text);
} else { } else {
body.push_str(&text); body.push_str(&text);
} }
} }
Event::InlineHtml(html) => {
body.push_str(&clean_html(&html));
}
Event::FootnoteReference(name) => { Event::FootnoteReference(name) => {
let len = footnote_numbers.len() + 1; let len = footnote_numbers.len() + 1;
let number = footnote_numbers.entry(name).or_insert(len); let number = footnote_numbers.entry(name).or_insert(len);

View File

@ -158,7 +158,6 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
Event::Start(Tag::Image(link_type, fix(dest, path), title)) Event::Start(Tag::Image(link_type, fix(dest, path), title))
} }
Event::Html(html) => Event::Html(fix_html(html, path)), Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
_ => event, _ => event,
} }
} }