Merge pull request #1021 from marcusklaas/pulldown0.6

Upgrade pulldown_cmark to 0.6
This commit is contained in:
Eric Huss 2019-11-11 13:27:19 -08:00 committed by GitHub
commit efdb83266a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 245 additions and 219 deletions

379
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@ lazy_static = "1.0"
log = "0.4"
memchr = "2.0"
open = "1.1"
pulldown-cmark = "0.5"
pulldown-cmark = "0.6.1"
regex = "1.0.0"
serde = "1.0"
serde_derive = "1.0"

View File

@ -153,7 +153,8 @@ impl From<Link> for SummaryItem {
/// > match the following regex: "[^<>\n[]]+".
struct SummaryParser<'a> {
src: &'a str,
stream: pulldown_cmark::Parser<'a>,
stream: pulldown_cmark::OffsetIter<'a>,
offset: usize,
}
/// Reads `Events` from the provided stream until the corresponding
@ -174,7 +175,7 @@ macro_rules! collect_events {
let mut events = Vec::new();
loop {
let event = $stream.next();
let event = $stream.next().map(|(ev, _range)| ev);
trace!("Next event: {:?}", event);
match event {
@ -196,23 +197,22 @@ macro_rules! collect_events {
impl<'a> SummaryParser<'a> {
fn new(text: &str) -> SummaryParser<'_> {
let pulldown_parser = pulldown_cmark::Parser::new(text);
let pulldown_parser = pulldown_cmark::Parser::new(text).into_offset_iter();
SummaryParser {
src: text,
stream: pulldown_parser,
offset: 0,
}
}
/// Get the current line and column to give the user more useful error
/// messages.
fn current_location(&self) -> (usize, usize) {
let byte_offset = self.stream.get_offset();
let previous_text = self.src[..byte_offset].as_bytes();
let previous_text = self.src[..self.offset].as_bytes();
let line = Memchr::new(b'\n', previous_text).count() + 1;
let start_of_line = memchr::memrchr(b'\n', previous_text).unwrap_or(0);
let col = self.src[start_of_line..byte_offset].chars().count();
let col = self.src[start_of_line..self.offset].chars().count();
(line, col)
}
@ -263,7 +263,7 @@ impl<'a> SummaryParser<'a> {
let link = self.parse_link(href.to_string())?;
items.push(SummaryItem::Link(link));
}
Some(Event::Start(Tag::Rule)) => items.push(SummaryItem::Separator),
Some(Event::Rule) => items.push(SummaryItem::Separator),
Some(_) => {}
None => break,
}
@ -319,9 +319,6 @@ impl<'a> SummaryParser<'a> {
break;
}
Some(Event::Start(other_tag)) => {
if other_tag == Tag::Rule {
items.push(SummaryItem::Separator);
}
trace!("Skipping contents of {:?}", other_tag);
// Skip over the contents of this tag
@ -337,6 +334,14 @@ impl<'a> SummaryParser<'a> {
break;
}
}
Some(Event::Rule) => {
items.push(SummaryItem::Separator);
if let Some(Event::Start(Tag::List(..))) = self.next_event() {
continue;
} else {
break;
}
}
Some(_) => {
// something else... ignore
continue;
@ -352,7 +357,10 @@ impl<'a> SummaryParser<'a> {
}
fn next_event(&mut self) -> Option<Event<'a>> {
let next = self.stream.next();
let next = self.stream.next().map(|(ev, range)| {
self.offset = range.start;
ev
});
trace!("Next event: {:?}", next);
next
@ -431,10 +439,10 @@ impl<'a> SummaryParser<'a> {
/// Try to parse the title line.
fn parse_title(&mut self) -> Option<String> {
if let Some(Event::Start(Tag::Header(1))) = self.next_event() {
if let Some(Event::Start(Tag::Heading(1))) = self.next_event() {
debug!("Found a h1 in the SUMMARY");
let tags = collect_events!(self.stream, end Tag::Header(1));
let tags = collect_events!(self.stream, end Tag::Heading(1));
Some(stringify_events(tags))
} else {
None
@ -629,7 +637,7 @@ mod tests {
let _ = parser.stream.next(); // skip past start of paragraph
let href = match parser.stream.next() {
Some(Event::Start(Tag::Link(_type, href, _title))) => href.to_string(),
Some((Event::Start(Tag::Link(_type, href, _title)), _range)) => href.to_string(),
other => panic!("Unreachable, {:?}", other),
};

View File

@ -150,7 +150,7 @@ impl HelperDef for RenderToc {
// filter all events that are not inline code blocks
let parser = Parser::new(name).filter(|event| match *event {
Event::Code(_) | Event::InlineHtml(_) | Event::Text(_) => true,
Event::Code(_) | Event::Html(_) | Event::Text(_) => true,
_ => false,
});

View File

@ -81,22 +81,21 @@ fn render_item(
.chain_err(|| "Could not convert HTML path to str")?;
let anchor_base = utils::fs::normalize_path(filepath);
let p = utils::new_cmark_parser(&chapter.content);
let mut p = utils::new_cmark_parser(&chapter.content).peekable();
let mut in_header = false;
let max_section_depth = i32::from(search_config.heading_split_level);
let mut in_heading = false;
let max_section_depth = u32::from(search_config.heading_split_level);
let mut section_id = None;
let mut heading = String::new();
let mut body = String::new();
let mut html_block = String::new();
let mut breadcrumbs = chapter.parent_names.clone();
let mut footnote_numbers = HashMap::new();
for event in p {
while let Some(event) = p.next() {
match event {
Event::Start(Tag::Header(i)) if i <= max_section_depth => {
Event::Start(Tag::Heading(i)) if i <= max_section_depth => {
if !heading.is_empty() {
// Section finished, the next header is following now
// Section finished, the next heading is following now
// Write the data to the index, and clear it for the next section
add_doc(
index,
@ -111,10 +110,10 @@ fn render_item(
breadcrumbs.pop();
}
in_header = true;
in_heading = true;
}
Event::End(Tag::Header(i)) if i <= max_section_depth => {
in_header = false;
Event::End(Tag::Heading(i)) if i <= max_section_depth => {
in_heading = false;
section_id = Some(utils::id_from_content(&heading));
breadcrumbs.push(heading.clone());
}
@ -123,31 +122,34 @@ fn render_item(
footnote_numbers.entry(name).or_insert(number);
}
Event::Html(html) => {
let mut html_block = html.into_string();
// As of pulldown_cmark 0.6, html events are no longer contained
// in an HtmlBlock tag. We must collect consecutive Html events
// into a block ourselves.
while let Some(Event::Html(html)) = p.peek() {
html_block.push_str(&html);
p.next();
}
Event::End(Tag::HtmlBlock) => {
body.push_str(&clean_html(&html_block));
html_block.clear();
}
Event::Start(_) | Event::End(_) | Event::SoftBreak | Event::HardBreak => {
Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
// Insert spaces where HTML output would usually seperate text
// to ensure words don't get merged together
if in_header {
if in_heading {
heading.push(' ');
} else {
body.push(' ');
}
}
Event::Text(text) | Event::Code(text) => {
if in_header {
if in_heading {
heading.push_str(&text);
} else {
body.push_str(&text);
}
}
Event::InlineHtml(html) => {
body.push_str(&clean_html(&html));
}
Event::FootnoteReference(name) => {
let len = footnote_numbers.len() + 1;
let number = footnote_numbers.entry(name).or_insert(len);

View File

@ -158,7 +158,6 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
Event::Start(Tag::Image(link_type, fix(dest, path), title))
}
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
_ => event,
}
}