use std::fmt::{self, Display, Formatter}; use std::ops::{Deref, DerefMut}; use std::path::{Path, PathBuf}; use pulldown_cmark::{self, Event, Tag}; use errors::*; /// Parse the text from a `SUMMARY.md` file into a sort of "recipe" to be /// used when loading a book from disk. /// /// # Summary Format /// /// **Title:** It's common practice to begin with a title, generally /// "# Summary". But it is not mandatory, the parser just ignores it. So you /// can too if you feel like it. /// /// **Prefix Chapter:** Before the main numbered chapters you can add a couple /// of elements that will not be numbered. This is useful for forewords, /// introductions, etc. There are however some constraints. You can not nest /// prefix chapters, they should all be on the root level. And you can not add /// prefix chapters once you have added numbered chapters. /// /// ```markdown /// [Title of prefix element](relative/path/to/markdown.md) /// ``` /// /// **Numbered Chapter:** Numbered chapters are the main content of the book, /// they /// will be numbered and can be nested, resulting in a nice hierarchy (chapters, /// sub-chapters, etc.) /// /// ```markdown /// - [Title of the Chapter](relative/path/to/markdown.md) /// ``` /// /// You can either use - or * to indicate a numbered chapter. /// /// **Suffix Chapter:** After the numbered chapters you can add a couple of /// non-numbered chapters. They are the same as prefix chapters but come after /// the numbered chapters instead of before. /// /// All other elements are unsupported and will be ignored at best or result in /// an error. pub fn parse_summary(summary: &str) -> Result { let parser = SummaryParser::new(summary); parser.parse() } /// The parsed `SUMMARY.md`, specifying how the book should be laid out. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub struct Summary { /// An optional title for the `SUMMARY.md`, currently just ignored. pub title: Option, /// Chapters before the main text (e.g. an introduction). pub prefix_chapters: Vec, /// The main chapters in the document. pub numbered_chapters: Vec, /// Items which come after the main document (e.g. a conclusion). pub suffix_chapters: Vec, } /// A struct representing an entry in the `SUMMARY.md`, possibly with nested /// entries. /// /// This is roughly the equivalent of `[Some section](./path/to/file.md)`. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Link { /// The name of the chapter. pub name: String, /// The location of the chapter's source file, taking the book's `src` /// directory as the root. pub location: PathBuf, /// The section number, if this chapter is in the numbered section. pub number: Option, /// Any nested items this chapter may contain. pub nested_items: Vec, } impl Link { /// Create a new link with no nested items. pub fn new, P: AsRef>(name: S, location: P) -> Link { Link { name: name.into(), location: location.as_ref().to_path_buf(), number: None, nested_items: Vec::new(), } } } impl Default for Link { fn default() -> Self { Link { name: String::new(), location: PathBuf::new(), number: None, nested_items: Vec::new(), } } } /// An item in `SUMMARY.md` which could be either a separator or a `Link`. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum SummaryItem { /// A link to a chapter. Link(Link), /// A separator (`---`). Separator, } impl SummaryItem { fn maybe_link_mut(&mut self) -> Option<&mut Link> { match *self { SummaryItem::Link(ref mut l) => Some(l), _ => None, } } } impl From for SummaryItem { fn from(other: Link) -> SummaryItem { SummaryItem::Link(other) } } #[derive(Debug, Copy, Clone, PartialEq)] enum State { Begin, PrefixChapters, /// Numbered chapters, including the nesting level. NumberedChapters(u32), SuffixChapters, End, } /// A state machine parser for parsing a `SUMMARY.md` file. /// /// The parser has roughly 5 states, /// /// - **Begin:** the initial state /// - **Prefix Chapters:** Parsing the prefix chapters /// - **Numbered Chapters:** Parsing the numbered chapters, using a `usize` to /// indicate the nesting level (because chapters can have sub-chapters) /// - **Suffix Chapters:** pretty much identical to the Prefix Chapters /// - **End:** The final state /// /// The `parse()` method then continually invokes `step()` until it reaches the /// `End` state. Parsing is guaranteed to (eventually) finish because the next /// `Event` is read from the underlying `pulldown_cmark::Parser` and passed /// into the current state's associated method. /// /// /// # Grammar /// /// The `SUMMARY.md` file has a grammar which looks something like this: /// /// ```text /// summary ::= title prefix_chapters numbered_chapters /// suffix_chapters /// title ::= "# " TEXT /// | EPSILON /// prefix_chapters ::= item* /// suffix_chapters ::= item* /// numbered_chapters ::= dotted_item+ /// dotted_item ::= INDENT* DOT_POINT item /// item ::= link /// | separator /// separator ::= "---" /// link ::= "[" TEXT "]" "(" TEXT ")" /// DOT_POINT ::= "-" /// | "*" /// ``` /// /// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly) /// > match the following regex: "[^<>\n[]]+". struct SummaryParser<'a> { stream: pulldown_cmark::Parser<'a>, summary: Summary, state: State, } /// Reads `Events` from the provided stream until the corresponding /// `Event::End` is encountered which matches the `$delimiter` pattern. /// /// This is the equivalent of doing /// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to /// use pattern matching and you won't get errors because `take_while()` /// moves `$stream` out of self. macro_rules! collect_events { ($stream:expr, $delimiter:pat) => { { let mut events = Vec::new(); loop { let event = $stream.next(); trace!("Next event: {:?}", event); match event { Some(Event::End($delimiter)) => break, Some(other) => events.push(other), None => { debug!("Reached end of stream without finding the closing pattern, {}", stringify!($delimiter)); break; } } } events } } } impl<'a> SummaryParser<'a> { fn new(text: &str) -> SummaryParser { let pulldown_parser = pulldown_cmark::Parser::new(text); let intermediate_summary = Summary::default(); SummaryParser { stream: pulldown_parser, summary: intermediate_summary, state: State::Begin, } } /// Parse the text the `SummaryParser` was created with. fn parse(mut self) -> Result { self.summary.title = self.parse_title(); if let Some(ref title) = self.summary.title { debug!("[*] Title is {:?}", title); } while self.state != State::End { self.step()?; } Ok(self.summary) } fn step(&mut self) -> Result<()> { if let Some(next_event) = self.stream.next() { trace!("[*] Current state: {:?}, next event: {:?}", self.state, next_event); match self.state { State::Begin => self.step_start(next_event)?, State::PrefixChapters | State::SuffixChapters => self.step_affix(next_event)?, State::NumberedChapters(_) => self.step_numbered(next_event)?, State::End => {}, } } else { trace!("[*] Reached end of SUMMARY.md"); self.state = State::End; } Ok(()) } /// The very first state, we should see a `Begin Paragraph` token or /// it's an error... fn step_start(&mut self, event: Event<'a>) -> Result<()> { match event { Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters, Event::Start(Tag::List(_)) => self.state = State::NumberedChapters(0), other => bail!("Expected a start of paragraph but got {:?}", other), } Ok(()) } /// Try to step through an "affix" section (recognising prefix and suffix /// chapters). /// /// If we encounter a link or horizontal line, it'll get added to the /// section. If we encounter a list, we'll either change to /// `State::NumberedChapter` (for prefix) or throw an error (suffix chapters). /// /// Anything else will be ignored. fn step_affix(&mut self, event: Event<'a>) -> Result<()> { match event { Event::Start(tag) => self.handle_start_tag_in_affix_chapter(tag)?, Event::End(Tag::Rule) => { debug!("[*] Found an affix chapter separator"); self.affix_chapter_list().push(SummaryItem::Separator); }, other => { trace!("[*] Skipping unexpected token in summary: {:?}", other); }, } Ok(()) } /// A helper function to get the `SummaryItem` list we should add items to /// when parsing an affix chapter (i.e. prefix or suffix chapters). fn affix_chapter_list(&mut self) -> &mut Vec { match self.state { State::PrefixChapters => &mut self.summary.prefix_chapters, State::SuffixChapters => &mut self.summary.suffix_chapters, other => panic!("affix_chapter_list() called with invalid state: {:?}", other), } } fn handle_start_tag_in_affix_chapter(&mut self, tag: Tag) -> Result<()> { match tag { Tag::Link(location, _) => { let content = collect_events!(self.stream, Tag::Link(_, _)); let text = stringify_events(content); let link = Link::new(text, location.as_ref()); debug!("[*] Found an affix chapter: {:?}", link.name); self.affix_chapter_list().push(SummaryItem::Link(link)); }, Tag::List(_) => { match self.state { State::PrefixChapters => { debug!("[*] Changing from prefix chapters to numbered chapters"); self.state = State::NumberedChapters(0); }, State::SuffixChapters => bail!("Suffix chapters can't be followed by a list"), _ => unreachable!(), } }, other => trace!("[*] Skipping unknown start tag while parsing affix chapters: {:?}", other), } Ok(()) } /// Parse the numbered chapters. /// /// If the event is the start of a list item, consume the entire item and /// add a new link to the summary with `push_numbered_section`. /// /// If the event is the start of a new list, bump the nesting level. /// /// If the event is the end of a list, decrement the nesting level. When /// the nesting level would go negative, we've finished the numbered /// section and need to parse the suffix section. /// /// Otherwise, ignore the event. fn step_numbered(&mut self, event: Event) -> Result<()> { match event { Event::Start(Tag::Item) => { let it = self.parse_item().chain_err( || "List items should only contain links", )?; debug!("[*] Found a chapter: {:?} ({})", it.name, it.location.display()); let section_number = self.push_numbered_section(SummaryItem::Link(it)); trace!("[*] Section number is {}", section_number); }, Event::End(Tag::Rule) => { debug!("[*] Found a numbered chapter separator"); self.summary.numbered_chapters.push(SummaryItem::Separator); self.state = State::NumberedChapters(0); }, Event::Start(Tag::List(_)) => { if let State::NumberedChapters(n) = self.state { self.state = State::NumberedChapters(n + 1); trace!("[*] Nesting level increased to {}", n + 1); } }, Event::End(Tag::List(_)) => { if let State::NumberedChapters(n) = self.state { if n == 0 { trace!("[*] Finished parsing the numbered chapters"); self.state = State::SuffixChapters; } else { trace!("[*] Nesting level decreased to {}", n - 1); self.state = State::NumberedChapters(n - 1); } } }, Event::End(Tag::Item) => { /* Ignore */ }, other => { trace!("[*] ignoring token: {:?}", other); }, } Ok(()) } /// Parse a single item (`[Some Chapter Name](./path/to/chapter.md)`). fn parse_item(&mut self) -> Result { let next = self.stream.next(); trace!("Parsing an item, next event is {:?}", next); if let Some(Event::Start(Tag::Link(dest, _))) = next { let content = collect_events!(self.stream, Tag::Link(..)); Ok(Link::new(stringify_events(content), dest.as_ref())) } else { bail!("Expected a link, got {:?}", next) } } /// Try to parse the title line. fn parse_title(&mut self) -> Option { if let Some(Event::Start(Tag::Header(1))) = self.stream.next() { debug!("[*] Found a h1 in the SUMMARY"); let tags = collect_events!(self.stream, Tag::Header(1)); // TODO: How do we deal with headings like "# My **awesome** summary"? // for now, I'm just going to scan through and concatenate the // Event::Text tags, skipping any styling. Some(stringify_events(tags)) } else { None } } /// Push a new section at the end of the current nesting level. fn push_numbered_section(&mut self, item: SummaryItem) -> SectionNumber { if let State::NumberedChapters(level) = self.state { push_item_at_nesting_level( &mut self.summary.numbered_chapters, item, level as usize, SectionNumber::default(), ).chain_err(|| { format!("The parser should always ensure we add the next \ item at the correct level ({}:{})", module_path!(), line!()) }) .unwrap() } else { // this method should only ever be called when parsing a numbered // section, therefore if we ever get here something has gone // hideously wrong... error!("Calling push_numbered_section() when not in a numbered section"); error!("Current state: {:?}", self.state); error!("Item: {:?}", item); error!("Summary:"); error!("{:#?}", self.summary); panic!("Entered unreachable code, this is a bug"); } } } /// Given a particular level (e.g. 3), go that many levels down the `Link`'s /// nested items then append the provided item to the last `Link` in the /// list. fn push_item_at_nesting_level(links: &mut Vec, mut item: SummaryItem, level: usize, mut section_number: SectionNumber) -> Result { if level == 0 { // set the section number, if applicable section_number.push(links.len() as u32 + 1); if let SummaryItem::Link(ref mut l) = item { l.number = Some(section_number.clone()); } links.push(item); Ok(section_number) } else { let (index, last_link) = get_last_link(links).chain_err(|| { format!("The list of links needs to be {} levels deeper (current position {})", level, section_number) })?; section_number.push(index as u32 + 1); push_item_at_nesting_level(&mut last_link.nested_items, item, level - 1, section_number) } } /// Gets a pointer to the last `Link` in a list of `SummaryItem`s, and its /// index. fn get_last_link(links: &mut [SummaryItem]) -> Result<(usize, &mut Link)> { // TODO: This should probably be integrated into `Link::push_item()` links .iter_mut() .enumerate() .filter_map(|(i, item)| item.maybe_link_mut().map(|l| (i, l))) .rev() .next() .ok_or_else(|| "The list of SummaryItems doesn't contain any Links".into()) } /// Removes the styling from a list of Markdown events and returns just the /// plain text. fn stringify_events(events: Vec) -> String { events .into_iter() .filter_map(|t| match t { Event::Text(text) => Some(text.into_owned()), _ => None, }) .collect() } /// A section number like "1.2.3", basically just a newtype'd `Vec` with /// a pretty `Display` impl. #[derive(Debug, PartialEq, Clone, Default, Serialize, Deserialize)] pub struct SectionNumber(pub Vec); impl Display for SectionNumber { fn fmt(&self, f: &mut Formatter) -> fmt::Result { for item in &self.0 { write!(f, "{}.", item)?; } Ok(()) } } impl Deref for SectionNumber { type Target = Vec; fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for SectionNumber { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } #[cfg(test)] mod tests { use super::*; #[test] fn section_number_has_correct_dotted_representation() { let inputs = vec![ (vec![0], "0."), (vec![1, 3], "1.3."), (vec![1, 2, 3], "1.2.3."), ]; for (input, should_be) in inputs { let section_number = SectionNumber(input).to_string(); assert_eq!(section_number, should_be); } } #[test] fn parse_initial_title() { let src = "# Summary"; let should_be = String::from("Summary"); let mut parser = SummaryParser::new(src); let got = parser.parse_title().unwrap(); assert_eq!(got, should_be); } #[test] fn parse_title_with_styling() { let src = "# My **Awesome** Summary"; let should_be = String::from("My Awesome Summary"); let mut parser = SummaryParser::new(src); let got = parser.parse_title().unwrap(); assert_eq!(got, should_be); } #[test] fn parse_a_single_item() { let src = "[A Chapter](./path/to/chapter)"; let should_be = Link { name: String::from("A Chapter"), location: PathBuf::from("./path/to/chapter"), number: None, nested_items: Vec::new(), }; let mut parser = SummaryParser::new(src); let _ = parser.stream.next(); // skip the opening paragraph tag let got = parser.parse_item().unwrap(); assert_eq!(got, should_be); } #[test] fn convert_markdown_events_to_a_string() { let src = "Hello *World*, `this` is some text [and a link](./path/to/link)"; let should_be = "Hello World, this is some text and a link"; let events = pulldown_cmark::Parser::new(src).collect(); let got = stringify_events(events); assert_eq!(got, should_be); } #[test] fn can_step_past_first_token() { let src = "hello world"; let should_be = State::PrefixChapters; let mut parser = SummaryParser::new(src); assert_eq!(parser.state, State::Begin); parser.step().unwrap(); assert_eq!(parser.state, should_be); } #[test] fn first_token_must_be_open_paragraph() { let src = "hello world"; let mut parser = SummaryParser::new(src); let _ = parser.stream.next(); // manually step past the Start Paragraph assert!(parser.step().is_err()); } #[test] fn can_parse_prefix_chapter_links() { let src = "[Hello World](./foo/bar/baz)"; let should_be = Link { name: String::from("Hello World"), location: PathBuf::from("./foo/bar/baz"), number: None, nested_items: Vec::new(), }; let mut parser = SummaryParser::new(src); parser.state = State::PrefixChapters; assert!(parser.summary.prefix_chapters.is_empty()); let _ = parser.stream.next(); // manually step past the Start Paragraph parser.step().unwrap(); assert_eq!(parser.summary.prefix_chapters.len(), 1); assert_eq!(parser.summary.prefix_chapters[0], SummaryItem::Link(should_be)); assert_eq!(parser.state, State::PrefixChapters); } #[test] fn can_parse_prefix_chapter_horizontal_rules() { let src = "---"; let should_be = SummaryItem::Separator; let mut parser = SummaryParser::new(src); parser.state = State::PrefixChapters; assert!(parser.summary.prefix_chapters.is_empty()); let _ = parser.stream.next(); // manually step past the Start Paragraph parser.step().unwrap(); assert_eq!(parser.summary.prefix_chapters.len(), 1); assert_eq!(parser.summary.prefix_chapters[0], should_be); assert_eq!(parser.state, State::PrefixChapters); } #[test] fn step_from_prefix_chapters_to_numbered() { let src = "- foo"; let mut parser = SummaryParser::new(src); parser.state = State::PrefixChapters; // let _ = parser.stream.next(); // manually step past the Start Paragraph parser.step().unwrap(); assert_eq!(parser.state, State::NumberedChapters(0)); } #[test] fn push_item_onto_empty_link() { let root = Link::new("First", "/"); let mut links = vec![SummaryItem::Link(root)]; assert_eq!(links[0].maybe_link_mut().unwrap().nested_items.len(), 0); let got = push_item_at_nesting_level(&mut links, SummaryItem::Separator, 1, SectionNumber::default()).unwrap(); assert_eq!(links[0].maybe_link_mut().unwrap().nested_items.len(), 1); assert_eq!(*got, vec![1, 1]); } #[test] fn push_item_onto_complex_link() { let mut root = Link::new("First", "/first"); root.nested_items.push(SummaryItem::Separator); let mut child = Link::new("Second", "/first/second"); child.nested_items.push(SummaryItem::Link( Link::new("Third", "/first/second/third"), )); root.nested_items.push(SummaryItem::Link(child)); root.nested_items.push(SummaryItem::Separator); let mut links = vec![SummaryItem::Link(root)]; // FIXME: This crap for getting a deeply nested member is just plain ugly :( assert_eq!(links[0].maybe_link_mut().unwrap() .nested_items[1].maybe_link_mut() .unwrap() .nested_items[0].maybe_link_mut() .unwrap() .nested_items.len() , 0); let got = push_item_at_nesting_level( &mut links, SummaryItem::Link(Link::new("Dummy", "")), 3, SectionNumber::default(), ).unwrap(); assert_eq!(links[0].maybe_link_mut().unwrap() .nested_items[1].maybe_link_mut() .unwrap() .nested_items[0].maybe_link_mut() .unwrap() .nested_items.len() , 1); println!("{:#?}", links); assert_eq!(*got, vec![1, 2, 1, 1]); } #[test] fn parse_a_numbered_chapter() { let src = "- [First](./second)"; let mut parser = SummaryParser::new(src); let _ = parser.stream.next(); assert_eq!(parser.summary.numbered_chapters.len(), 0); parser.state = State::NumberedChapters(0); parser.step().unwrap(); assert_eq!(parser.summary.numbered_chapters.len(), 1); } }