From dcc83685439848a560f2ef79eccb7f6474516f9d Mon Sep 17 00:00:00 2001 From: Michael Bryan Date: Tue, 27 Jun 2017 23:40:41 +0800 Subject: [PATCH] Encoded the first state in the SummaryParser --- src/loader/summary.rs | 135 ++++++++++++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 38 deletions(-) diff --git a/src/loader/summary.rs b/src/loader/summary.rs index c6d74fe3..fc3dab59 100644 --- a/src/loader/summary.rs +++ b/src/loader/summary.rs @@ -11,39 +11,40 @@ use pulldown_cmark::{self, Event, Tag}; /// /// # Summary Format /// -/// **Title:** It's common practice to begin with a title, generally -/// "# Summary". But it is not mandatory, the parser just ignores it. So you +/// **Title:** It's common practice to begin with a title, generally +/// "# Summary". But it is not mandatory, the parser just ignores it. So you /// can too if you feel like it. -/// -/// **Prefix Chapter:** Before the main numbered chapters you can add a couple +/// +/// **Prefix Chapter:** Before the main numbered chapters you can add a couple /// of elements that will not be numbered. This is useful for forewords, /// introductions, etc. There are however some constraints. You can not nest /// prefix chapters, they should all be on the root level. And you can not add /// prefix chapters once you have added numbered chapters. -/// +/// /// ```markdown /// [Title of prefix element](relative/path/to/markdown.md) /// ``` -/// -/// **Numbered Chapter:** Numbered chapters are the main content of the book, they +/// +/// **Numbered Chapter:** Numbered chapters are the main content of the book, +/// they /// will be numbered and can be nested, resulting in a nice hierarchy (chapters, /// sub-chapters, etc.) -/// +/// /// ```markdown /// - [Title of the Chapter](relative/path/to/markdown.md) /// ``` -/// +/// /// You can either use - or * to indicate a numbered chapter. -/// +/// /// **Suffix Chapter:** After the numbered chapters you can add a couple of /// non-numbered chapters. They are the same as prefix chapters but come after /// the numbered chapters instead of before. -/// +/// /// All other elements are unsupported and will be ignored at best or result in /// an error. pub fn parse_summary(summary: &str) -> Result> { let parser = SummaryParser::new(summary); - parser.parse() + parser.parse() } /// The parsed `SUMMARY.md`, specifying how the book should be laid out. @@ -52,7 +53,7 @@ pub struct Summary { title: Option, } -/// A struct representing an entry in the `SUMMARY.md`, possibly with nested +/// A struct representing an entry in the `SUMMARY.md`, possibly with nested /// entries. /// /// This is roughly the equivalent of `[Some section](./path/to/file.md)`. @@ -69,41 +70,53 @@ enum SummaryItem { Separator, } +#[derive(Debug, Copy, Clone, PartialEq)] +enum State { + Begin, + PrefixChapters, + /// Numbered chapters, including the nesting level. + NumberedChapters(u32), + SuffixChapters, + End, +} + /// A stateful parser for parsing a `SUMMARY.md` file. /// /// # Grammar -/// +/// /// The `SUMMARY.md` file has a grammar which looks something like this: /// /// ```text -/// summary ::= title prefix_chapters numbered_chapters suffix_chapters +/// summary ::= title prefix_chapters numbered_chapters +/// suffix_chapters /// title ::= "# " TEXT /// | EPSILON /// prefix_chapters ::= item* /// suffix_chapters ::= item* /// numbered_chapters ::= dotted_item+ /// dotted_item ::= INDENT* DOT_POINT item -/// item ::= link +/// item ::= link /// | separator /// separator ::= "---" /// link ::= "[" TEXT "]" "(" TEXT ")" /// DOT_POINT ::= "-" /// | "*" /// ``` -/// -/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly) +/// +/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly) /// > match the following regex: "[^<>\n[]]+". struct SummaryParser<'a> { stream: pulldown_cmark::Parser<'a>, summary: Summary, + state: State, } /// Reads `Events` from the provided stream until the corresponding /// `Event::End` is encountered which matches the `$delimiter` pattern. /// -/// This is the equivalent of doing -/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to -/// use pattern matching and you won't get errors because `take_while()` +/// This is the equivalent of doing +/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to +/// use pattern matching and you won't get errors because `take_while()` /// moves `$stream` out of self. macro_rules! collect_events { ($stream:expr, $delimiter:pat) => { @@ -127,8 +140,7 @@ macro_rules! collect_events { } } -impl<'a> SummaryParser<'a> -{ +impl<'a> SummaryParser<'a> { fn new(text: &str) -> SummaryParser { let pulldown_parser = pulldown_cmark::Parser::new(text); let intermediate_summary = Summary::default(); @@ -136,6 +148,7 @@ impl<'a> SummaryParser<'a> SummaryParser { stream: pulldown_parser, summary: intermediate_summary, + state: State::Begin, } } @@ -143,17 +156,38 @@ impl<'a> SummaryParser<'a> fn parse(mut self) -> Result> { self.summary.title = self.parse_title(); - Ok(self.summary) + Ok(self.summary) + } + + fn step(&mut self) -> Result<(), Box> { + let next_event = self.stream.next().expect("TODO: error-chain"); + trace!("[*] Current state = {:?}, Next Event = {:?}", self.state, next_event); + + match self.state { + State::Begin => self.step_start(next_event), + other => unimplemented!() + } + } + + /// The very first state, we should see a `BeginParagraph` token or + /// it's an error... + fn step_start(&mut self, event: Event<'a>) -> Result<(), Box> { + match event { + Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters, + other => panic!("Unexpected tag! {:?}", other), + } + + Ok(()) } fn parse_title(&mut self) -> Option { if let Some(Event::Start(Tag::Header(1))) = self.stream.next() { debug!("[*] Found a h1 in the SUMMARY"); - + let tags = collect_events!(self.stream, Tag::Header(1)); // TODO: How do we deal with headings like "# My **awesome** summary"? - // for now, I'm just going to scan through and concatenate the + // for now, I'm just going to scan through and concatenate the // Event::Text tags, skipping any styling. Some(stringify_events(tags)) } else { @@ -179,15 +213,15 @@ impl<'a> SummaryParser<'a> } } -/// Extract just the text from a bunch of events and concatenate it into a -/// single string. +/// Extracts the text from formatted markdown. fn stringify_events<'a>(events: Vec>) -> String { - events.into_iter() - .filter_map(|t| match t { - Event::Text(text) => Some(text), - _ => None, - }) - .collect() + events + .into_iter() + .filter_map(|t| match t { + Event::Text(text) => Some(text.into_owned()), + _ => None, + }) + .collect() } /// A section number like "1.2.3", basically just a newtype'd `Vec`. @@ -196,9 +230,11 @@ struct SectionNumber(Vec); impl Display for SectionNumber { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let dotted_number: String = self.0.iter().map(|i| format!("{}", i)) - .collect::>() - .join("."); + let dotted_number: String = self.0 + .iter() + .map(|i| format!("{}", i)) + .collect::>() + .join("."); write!(f, "{}", dotted_number) } @@ -274,4 +310,27 @@ mod tests { assert_eq!(got, should_be); } -} \ No newline at end of file + + #[test] + fn convert_markdown_events_to_a_string() { + let src = "Hello *World*, `this` is some text [and a link](./path/to/link)"; + let should_be = "Hello World, this is some text and a link"; + + let events = pulldown_cmark::Parser::new(src).collect(); + let got = stringify_events(events); + + assert_eq!(got, should_be); + + } + + #[test] + fn can_step_past_first_token() { + let src = "hello world"; + let should_be = State::PrefixChapters; + + let mut parser = SummaryParser::new(src); + assert_eq!(parser.state, State::Begin); + parser.step().unwrap(); + assert_eq!(parser.state, should_be); + } +}