Did a little refactoring to clean up the summary parser

This commit is contained in:
Michael Bryan 2017-06-30 21:25:27 +08:00
parent 131a404064
commit eb59319dc3
2 changed files with 104 additions and 64 deletions

View File

@ -1,4 +1,42 @@
//! Functionality for loading the internal book representation from disk. //! Functionality for loading the internal book representation from disk.
//!
//! The typical use case is to create a `Loader` pointing at the correct
//! source directory then call the `load()` method. Internally this will
//! search for the `SUMMARY.md` file, parse it, then use the parsed
//! `Summary` to construct an in-memory representation of the entire book.
//!
//! # Examples
//!
//! ```rust,no_run
//! # fn run() -> mdbook::errors::Result<()> {
//! use mdbook::loader::Loader;
//! let loader = Loader::new("./src/");
//! let book = loader.load()?;
//! # Ok(())
//! # }
//! # fn main() { run().unwrap() }
//! ```
//!
//! Alternatively, if you are using the `mdbook` crate as a library and
//! only want to read the `SUMMARY.md` file without having to load the
//! entire book from disk, you can use the `parse_summary()` function.
//!
//! ```rust
//! # fn run() -> mdbook::errors::Result<()> {
//! use mdbook::loader::parse_summary;
//! let src = "# Book Summary
//!
//! [Introduction](./index.md)
//! - [First Chapter](./first/index.md)
//! - [Sub-Section](./first/subsection.md)
//! - [Second Chapter](./second/index.md)
//! ";
//! let summary = parse_summary(src)?;
//! println!("{:#?}", summary);
//! # Ok(())
//! # }
//! # fn main() { run().unwrap() }
//! ```
#![deny(missing_docs)] #![deny(missing_docs)]

View File

@ -130,7 +130,22 @@ enum State {
End, End,
} }
/// A stateful parser for parsing a `SUMMARY.md` file. /// A state machine parser for parsing a `SUMMARY.md` file.
///
/// The parser has roughly 5 states,
///
/// - **Begin:** the initial state
/// - **Prefix Chapters:** Parsing the prefix chapters
/// - **Numbered Chapters:** Parsing the numbered chapters, using a `usize` to
/// indicate the nesting level (because chapters can have sub-chapters)
/// - **Suffix Chapters:** pretty much identical to the Prefix Chapters
/// - **End:** The final state
///
/// The `parse()` method then continually invokes `step()` until it reaches the
/// `End` state. Parsing is guaranteed to (eventually) finish because the next
/// `Event` is read from the underlying `pulldown_cmark::Parser` and passed
/// into the current state's associated method.
///
/// ///
/// # Grammar /// # Grammar
/// ///
@ -209,7 +224,7 @@ impl<'a> SummaryParser<'a> {
if let Some(ref title) = self.summary.title { if let Some(ref title) = self.summary.title {
debug!("[*] Title is {:?}", title); debug!("[*] Title is {:?}", title);
} }
while self.state != State::End { while self.state != State::End {
self.step()?; self.step()?;
} }
@ -219,15 +234,15 @@ impl<'a> SummaryParser<'a> {
fn step(&mut self) -> Result<()> { fn step(&mut self) -> Result<()> {
if let Some(next_event) = self.stream.next() { if let Some(next_event) = self.stream.next() {
trace!("[*] Current state: {:?}, next event: {:?}", self.state, next_event); trace!("[*] Current state: {:?}, next event: {:?}", self.state, next_event);
match self.state { match self.state {
State::Begin => self.step_start(next_event)?, State::Begin => self.step_start(next_event)?,
State::PrefixChapters => self.step_prefix(next_event)?, State::PrefixChapters => self.step_prefix(next_event)?,
State::NumberedChapters(n) => self.step_numbered(next_event, n)?, State::NumberedChapters(n) => self.step_numbered(next_event, n)?,
State::SuffixChapters => self.step_suffix(next_event)?, State::SuffixChapters => self.step_suffix(next_event)?,
State::End => {}, State::End => {},
} }
} else { } else {
trace!("[*] Reached end of SUMMARY.md"); trace!("[*] Reached end of SUMMARY.md");
self.state = State::End; self.state = State::End;
@ -236,7 +251,7 @@ impl<'a> SummaryParser<'a> {
Ok(()) Ok(())
} }
/// The very first state, we should see a `BeginParagraph` token or /// The very first state, we should see a `Begin Paragraph` token or
/// it's an error... /// it's an error...
fn step_start(&mut self, event: Event<'a>) -> Result<()> { fn step_start(&mut self, event: Event<'a>) -> Result<()> {
match event { match event {
@ -258,14 +273,9 @@ impl<'a> SummaryParser<'a> {
Event::Start(Tag::Link(location, _)) => { Event::Start(Tag::Link(location, _)) => {
let content = collect_events!(self.stream, Tag::Link(_, _)); let content = collect_events!(self.stream, Tag::Link(_, _));
let text = stringify_events(content); let text = stringify_events(content);
let link = Link { let link = Link::new(text, location.as_ref());
name: text,
location: PathBuf::from(location.as_ref()),
number: None,
nested_items: Vec::new(),
};
debug!("[*] Found a prefix chapter, {:?}", link.name); debug!("[*] Found a prefix chapter: {:?}", link.name);
self.summary.prefix_chapters.push(SummaryItem::Link(link)); self.summary.prefix_chapters.push(SummaryItem::Link(link));
}, },
Event::End(Tag::Rule) => { Event::End(Tag::Rule) => {
@ -292,8 +302,8 @@ impl<'a> SummaryParser<'a> {
/// ///
/// If the event is the start of a new list, bump the nesting level. /// If the event is the start of a new list, bump the nesting level.
/// ///
/// If the event is the end of a list, decrement the nesting level. When /// If the event is the end of a list, decrement the nesting level. When
/// the nesting level would go negative, we've finished the numbered /// the nesting level would go negative, we've finished the numbered
/// section and need to parse the suffix section. /// section and need to parse the suffix section.
/// ///
/// Otherwise, ignore the event. /// Otherwise, ignore the event.
@ -308,33 +318,26 @@ impl<'a> SummaryParser<'a> {
trace!("[*] Section number is {}", section_number); trace!("[*] Section number is {}", section_number);
}, },
Event::Start(Tag::List(_)) => { Event::Start(Tag::List(_)) => {
match self.state { if let State::NumberedChapters(n) = self.state {
State::NumberedChapters(n) => { self.state = State::NumberedChapters(n + 1);
let new_nest = n + 1; trace!("[*] Nesting level increased to {}", n + 1);
self.state = State::NumberedChapters(new_nest);
trace!("[*] Nesting level increased to {}", new_nest);
},
other => unreachable!(),
} }
}, },
Event::End(Tag::List(_)) => { Event::End(Tag::List(_)) => {
match self.state { if let State::NumberedChapters(n) = self.state {
State::NumberedChapters(n) => { if n == 0 {
if n == 0 { trace!("[*] Finished parsing the numbered chapters");
trace!("[*] Finished parsing the numbered chapters"); self.state = State::SuffixChapters;
self.state = State::SuffixChapters; } else {
} else { trace!("[*] Nesting level decreased to {}", n - 1);
trace!("[*] Nesting level decreased to {}", n - 1); self.state = State::NumberedChapters(n - 1);
self.state = State::NumberedChapters(n - 1); }
}
},
other => unreachable!(),
} }
}, },
other => { other => {
trace!("[*] skipping unexpected token: {:?}", other); trace!("[*] skipping unexpected token: {:?}", other);
}, },
} }
Ok(()) Ok(())
} }
@ -345,14 +348,9 @@ impl<'a> SummaryParser<'a> {
Event::Start(Tag::Link(location, _)) => { Event::Start(Tag::Link(location, _)) => {
let content = collect_events!(self.stream, Tag::Link(_, _)); let content = collect_events!(self.stream, Tag::Link(_, _));
let text = stringify_events(content); let text = stringify_events(content);
let link = Link { let link = Link::new(text, location.as_ref());
name: text,
location: PathBuf::from(location.as_ref()),
number: None,
nested_items: Vec::new(),
};
debug!("[*] Found a suffix chapter, {:?}", link.name); debug!("[*] Found a suffix chapter: {:?}", link.name);
self.summary.suffix_chapters.push(SummaryItem::Link(link)); self.summary.suffix_chapters.push(SummaryItem::Link(link));
}, },
Event::End(Tag::Rule) => { Event::End(Tag::Rule) => {
@ -375,12 +373,7 @@ impl<'a> SummaryParser<'a> {
if let Some(Event::Start(Tag::Link(dest, _))) = next { if let Some(Event::Start(Tag::Link(dest, _))) = next {
let content = collect_events!(self.stream, Tag::Link(..)); let content = collect_events!(self.stream, Tag::Link(..));
Ok(Link { Ok(Link::new(stringify_events(content), dest.as_ref()))
name: stringify_events(content),
location: PathBuf::from(dest.to_string()),
number: None,
nested_items: Vec::new(),
})
} else { } else {
bail!("Expected a link, got {:?}", next) bail!("Expected a link, got {:?}", next)
} }
@ -407,8 +400,8 @@ impl<'a> SummaryParser<'a> {
if let State::NumberedChapters(level) = self.state { if let State::NumberedChapters(level) = self.state {
push_item_at_nesting_level( push_item_at_nesting_level(
&mut self.summary.numbered_chapters, &mut self.summary.numbered_chapters,
item, item,
level as usize, level as usize,
SectionNumber::default(), SectionNumber::default(),
).chain_err(|| { ).chain_err(|| {
format!("The parser should always ensure we add the next \ format!("The parser should always ensure we add the next \
@ -449,23 +442,31 @@ fn push_item_at_nesting_level(links: &mut Vec<SummaryItem>, mut item: SummaryIte
let index_for_item = links.len() + 1; let index_for_item = links.len() + 1;
// FIXME: This bit needs simplifying! // FIXME: This bit needs simplifying!
let (index, last_link) = let (index, last_link) = get_last_link(links).chain_err(|| {
links format!("The list of links needs to be {} levels deeper (current position {})",
.iter_mut() level, section_number)
.enumerate() })?;
.filter_map(|(i, item)| item.maybe_link_mut().map(|l| (i, l)))
.rev()
.next()
.ok_or_else(|| format!("Can't recurse any further, summary needs to be {} more levels deep", level))?;
section_number.push(index as u32 + 1); section_number.push(index as u32 + 1);
push_item_at_nesting_level(&mut last_link.nested_items, item, level - 1, section_number) push_item_at_nesting_level(&mut last_link.nested_items, item, level - 1, section_number)
} }
} }
/// Gets a pointer to the last `Link` in a list of `SummaryItem`s, and its
/// index.
fn get_last_link(links: &mut [SummaryItem]) -> Result<(usize, &mut Link)> {
links
.iter_mut()
.enumerate()
.filter_map(|(i, item)| item.maybe_link_mut().map(|l| (i, l)))
.rev()
.next()
.ok_or_else(|| "The list of SummaryItems doesn't contain any Links".into())
}
/// Extracts the text from formatted markdown. /// Removes the styling from a list of Markdown events and returns just the
/// plain text.
fn stringify_events(events: Vec<Event>) -> String { fn stringify_events(events: Vec<Event>) -> String {
events events
.into_iter() .into_iter()
@ -476,7 +477,8 @@ fn stringify_events(events: Vec<Event>) -> String {
.collect() .collect()
} }
/// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`. /// A section number like "1.2.3", basically just a newtype'd `Vec<u32>` with
/// a pretty `Display` impl.
#[derive(Debug, PartialEq, Clone, Default)] #[derive(Debug, PartialEq, Clone, Default)]
pub struct SectionNumber(pub Vec<u32>); pub struct SectionNumber(pub Vec<u32>);