Encoded the first state in the SummaryParser

2017-06-27 23:40:41 +08:00 · 2017-06-27 23:40:41 +08:00 · dcc8368543
parent dacb3e082e
commit dcc8368543
1 changed files with 97 additions and 38 deletions
--- a/src/loader/summary.rs
+++ b/src/loader/summary.rs
@ -11,39 +11,40 @@ use pulldown_cmark::{self, Event, Tag};
 ///
 /// # Summary Format
 ///
-/// **Title:** It's common practice to begin with a title, generally 
-/// "# Summary". But it is not mandatory, the parser just ignores it. So you 
+/// **Title:** It's common practice to begin with a title, generally
+/// "# Summary". But it is not mandatory, the parser just ignores it. So you
 /// can too if you feel like it.
-/// 
-/// **Prefix Chapter:** Before the main numbered chapters you can add a couple 
+///
+/// **Prefix Chapter:** Before the main numbered chapters you can add a couple
 /// of elements that will not be numbered. This is useful for forewords,
 /// introductions, etc. There are however some constraints. You can not nest
 /// prefix chapters, they should all be on the root level. And you can not add
 /// prefix chapters once you have added numbered chapters.
-/// 
+///
 /// ```markdown
 /// [Title of prefix element](relative/path/to/markdown.md)
 /// ```
-/// 
-/// **Numbered Chapter:** Numbered chapters are the main content of the book, they
+///
+/// **Numbered Chapter:** Numbered chapters are the main content of the book,
+/// they
 /// will be numbered and can be nested, resulting in a nice hierarchy (chapters,
 /// sub-chapters, etc.)
-/// 
+///
 /// ```markdown
 /// - [Title of the Chapter](relative/path/to/markdown.md)
 /// ```
-/// 
+///
 /// You can either use - or * to indicate a numbered chapter.
-/// 
+///
 /// **Suffix Chapter:** After the numbered chapters you can add a couple of
 /// non-numbered chapters. They are the same as prefix chapters but come after
 /// the numbered chapters instead of before.
-/// 
+///
 /// All other elements are unsupported and will be ignored at best or result in
 /// an error.
 pub fn parse_summary(summary: &str) -> Result<Summary, Box<Error>> {
    let parser = SummaryParser::new(summary);
-    parser.parse()    
+    parser.parse()
 }

 /// The parsed `SUMMARY.md`, specifying how the book should be laid out.
@ -52,7 +53,7 @@ pub struct Summary {
    title: Option<String>,
 }

-/// A struct representing an entry in the `SUMMARY.md`, possibly with nested 
+/// A struct representing an entry in the `SUMMARY.md`, possibly with nested
 /// entries.
 ///
 /// This is roughly the equivalent of `[Some section](./path/to/file.md)`.
@ -69,41 +70,53 @@ enum SummaryItem {
    Separator,
 }

+#[derive(Debug, Copy, Clone, PartialEq)]
+enum State {
+    Begin,
+    PrefixChapters,
+    /// Numbered chapters, including the nesting level.
+    NumberedChapters(u32),
+    SuffixChapters,
+    End,
+}
+
 /// A stateful parser for parsing a `SUMMARY.md` file.
 ///
 /// # Grammar
-/// 
+///
 /// The `SUMMARY.md` file has a grammar which looks something like this:
 ///
 /// ```text
-/// summary           ::= title prefix_chapters numbered_chapters suffix_chapters
+/// summary           ::= title prefix_chapters numbered_chapters
+/// suffix_chapters
 /// title             ::= "# " TEXT
 ///                     | EPSILON
 /// prefix_chapters   ::= item*
 /// suffix_chapters   ::= item*
 /// numbered_chapters ::= dotted_item+
 /// dotted_item       ::= INDENT* DOT_POINT item
-/// item              ::= link 
+/// item              ::= link
 ///                     | separator
 /// separator         ::= "---"
 /// link              ::= "[" TEXT "]" "(" TEXT ")"
 /// DOT_POINT         ::= "-"
 ///                     | "*"
 /// ```
-/// 
-/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly) 
+///
+/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
 /// > match the following regex: "[^<>\n[]]+".
 struct SummaryParser<'a> {
    stream: pulldown_cmark::Parser<'a>,
    summary: Summary,
+    state: State,
 }

 /// Reads `Events` from the provided stream until the corresponding
 /// `Event::End` is encountered which matches the `$delimiter` pattern.
 ///
-/// This is the equivalent of doing 
-/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to 
-/// use pattern matching and you won't get errors because `take_while()` 
+/// This is the equivalent of doing
+/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to
+/// use pattern matching and you won't get errors because `take_while()`
 /// moves `$stream` out of self.
 macro_rules! collect_events {
    ($stream:expr, $delimiter:pat) => {
@ -127,8 +140,7 @@ macro_rules! collect_events {
    }
 }

-impl<'a> SummaryParser<'a> 
-{
+impl<'a> SummaryParser<'a> {
    fn new(text: &str) -> SummaryParser {
        let pulldown_parser = pulldown_cmark::Parser::new(text);
        let intermediate_summary = Summary::default();
@ -136,6 +148,7 @@ impl<'a> SummaryParser<'a>
        SummaryParser {
            stream: pulldown_parser,
            summary: intermediate_summary,
+            state: State::Begin,
        }
    }

@ -143,17 +156,38 @@ impl<'a> SummaryParser<'a>
    fn parse(mut self) -> Result<Summary, Box<Error>> {
        self.summary.title = self.parse_title();

-        Ok(self.summary)        
+        Ok(self.summary)
+    }
+
+    fn step(&mut self) -> Result<(), Box<Error>> {
+        let next_event = self.stream.next().expect("TODO: error-chain");
+        trace!("[*] Current state = {:?}, Next Event = {:?}", self.state, next_event);
+    
+        match self.state {
+            State::Begin => self.step_start(next_event),
+            other => unimplemented!()
+        }
+    }
+
+    /// The very first state, we should see a `BeginParagraph` token or
+    /// it's an error...
+    fn step_start(&mut self, event: Event<'a>) -> Result<(), Box<Error>> {
+        match event {
+            Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters,
+            other => panic!("Unexpected tag! {:?}", other),
+        }
+
+        Ok(())
    }

    fn parse_title(&mut self) -> Option<String> {
        if let Some(Event::Start(Tag::Header(1))) = self.stream.next() {
            debug!("[*] Found a h1 in the SUMMARY");
-            
+
            let tags = collect_events!(self.stream, Tag::Header(1));

            // TODO: How do we deal with headings like "# My **awesome** summary"?
-            // for now, I'm just going to scan through and concatenate the 
+            // for now, I'm just going to scan through and concatenate the
            // Event::Text tags, skipping any styling.
            Some(stringify_events(tags))
        } else {
@ -179,15 +213,15 @@ impl<'a> SummaryParser<'a>
    }
 }

-/// Extract just the text from a bunch of events and concatenate it into a
-/// single string.
+/// Extracts the text from formatted markdown.
 fn stringify_events<'a>(events: Vec<Event<'a>>) -> String {
-    events.into_iter()
-                .filter_map(|t| match t {
-                    Event::Text(text) => Some(text),
-                    _ => None,
-                })
-                .collect()
+    events
+        .into_iter()
+        .filter_map(|t| match t {
+            Event::Text(text) => Some(text.into_owned()),
+            _ => None,
+        })
+        .collect()
 }

 /// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`.
@ -196,9 +230,11 @@ struct SectionNumber(Vec<u32>);

 impl Display for SectionNumber {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        let dotted_number: String = self.0.iter().map(|i| format!("{}", i))
-        .collect::<Vec<String>>()
-        .join(".");
+        let dotted_number: String = self.0
+            .iter()
+            .map(|i| format!("{}", i))
+            .collect::<Vec<String>>()
+            .join(".");

        write!(f, "{}", dotted_number)
    }
@ -274,4 +310,27 @@ mod tests {

        assert_eq!(got, should_be);
    }
-}
+
+    #[test]
+    fn convert_markdown_events_to_a_string() {
+        let src = "Hello *World*, `this` is some text [and a link](./path/to/link)";
+        let should_be = "Hello World, this is some text and a link";
+
+        let events = pulldown_cmark::Parser::new(src).collect();
+        let got = stringify_events(events);
+
+        assert_eq!(got, should_be);
+
+    }
+
+    #[test]
+    fn can_step_past_first_token() {
+        let src = "hello world";
+        let should_be = State::PrefixChapters;
+
+        let mut parser = SummaryParser::new(src);
+        assert_eq!(parser.state, State::Begin);
+        parser.step().unwrap();
+        assert_eq!(parser.state, should_be);
+    }
+}