Encoded the first state in the SummaryParser

This commit is contained in:
Michael Bryan 2017-06-27 23:40:41 +08:00
parent dacb3e082e
commit dcc8368543
1 changed files with 97 additions and 38 deletions

View File

@ -11,39 +11,40 @@ use pulldown_cmark::{self, Event, Tag};
/// ///
/// # Summary Format /// # Summary Format
/// ///
/// **Title:** It's common practice to begin with a title, generally /// **Title:** It's common practice to begin with a title, generally
/// "# Summary". But it is not mandatory, the parser just ignores it. So you /// "# Summary". But it is not mandatory, the parser just ignores it. So you
/// can too if you feel like it. /// can too if you feel like it.
/// ///
/// **Prefix Chapter:** Before the main numbered chapters you can add a couple /// **Prefix Chapter:** Before the main numbered chapters you can add a couple
/// of elements that will not be numbered. This is useful for forewords, /// of elements that will not be numbered. This is useful for forewords,
/// introductions, etc. There are however some constraints. You can not nest /// introductions, etc. There are however some constraints. You can not nest
/// prefix chapters, they should all be on the root level. And you can not add /// prefix chapters, they should all be on the root level. And you can not add
/// prefix chapters once you have added numbered chapters. /// prefix chapters once you have added numbered chapters.
/// ///
/// ```markdown /// ```markdown
/// [Title of prefix element](relative/path/to/markdown.md) /// [Title of prefix element](relative/path/to/markdown.md)
/// ``` /// ```
/// ///
/// **Numbered Chapter:** Numbered chapters are the main content of the book, they /// **Numbered Chapter:** Numbered chapters are the main content of the book,
/// they
/// will be numbered and can be nested, resulting in a nice hierarchy (chapters, /// will be numbered and can be nested, resulting in a nice hierarchy (chapters,
/// sub-chapters, etc.) /// sub-chapters, etc.)
/// ///
/// ```markdown /// ```markdown
/// - [Title of the Chapter](relative/path/to/markdown.md) /// - [Title of the Chapter](relative/path/to/markdown.md)
/// ``` /// ```
/// ///
/// You can either use - or * to indicate a numbered chapter. /// You can either use - or * to indicate a numbered chapter.
/// ///
/// **Suffix Chapter:** After the numbered chapters you can add a couple of /// **Suffix Chapter:** After the numbered chapters you can add a couple of
/// non-numbered chapters. They are the same as prefix chapters but come after /// non-numbered chapters. They are the same as prefix chapters but come after
/// the numbered chapters instead of before. /// the numbered chapters instead of before.
/// ///
/// All other elements are unsupported and will be ignored at best or result in /// All other elements are unsupported and will be ignored at best or result in
/// an error. /// an error.
pub fn parse_summary(summary: &str) -> Result<Summary, Box<Error>> { pub fn parse_summary(summary: &str) -> Result<Summary, Box<Error>> {
let parser = SummaryParser::new(summary); let parser = SummaryParser::new(summary);
parser.parse() parser.parse()
} }
/// The parsed `SUMMARY.md`, specifying how the book should be laid out. /// The parsed `SUMMARY.md`, specifying how the book should be laid out.
@ -52,7 +53,7 @@ pub struct Summary {
title: Option<String>, title: Option<String>,
} }
/// A struct representing an entry in the `SUMMARY.md`, possibly with nested /// A struct representing an entry in the `SUMMARY.md`, possibly with nested
/// entries. /// entries.
/// ///
/// This is roughly the equivalent of `[Some section](./path/to/file.md)`. /// This is roughly the equivalent of `[Some section](./path/to/file.md)`.
@ -69,41 +70,53 @@ enum SummaryItem {
Separator, Separator,
} }
#[derive(Debug, Copy, Clone, PartialEq)]
enum State {
Begin,
PrefixChapters,
/// Numbered chapters, including the nesting level.
NumberedChapters(u32),
SuffixChapters,
End,
}
/// A stateful parser for parsing a `SUMMARY.md` file. /// A stateful parser for parsing a `SUMMARY.md` file.
/// ///
/// # Grammar /// # Grammar
/// ///
/// The `SUMMARY.md` file has a grammar which looks something like this: /// The `SUMMARY.md` file has a grammar which looks something like this:
/// ///
/// ```text /// ```text
/// summary ::= title prefix_chapters numbered_chapters suffix_chapters /// summary ::= title prefix_chapters numbered_chapters
/// suffix_chapters
/// title ::= "# " TEXT /// title ::= "# " TEXT
/// | EPSILON /// | EPSILON
/// prefix_chapters ::= item* /// prefix_chapters ::= item*
/// suffix_chapters ::= item* /// suffix_chapters ::= item*
/// numbered_chapters ::= dotted_item+ /// numbered_chapters ::= dotted_item+
/// dotted_item ::= INDENT* DOT_POINT item /// dotted_item ::= INDENT* DOT_POINT item
/// item ::= link /// item ::= link
/// | separator /// | separator
/// separator ::= "---" /// separator ::= "---"
/// link ::= "[" TEXT "]" "(" TEXT ")" /// link ::= "[" TEXT "]" "(" TEXT ")"
/// DOT_POINT ::= "-" /// DOT_POINT ::= "-"
/// | "*" /// | "*"
/// ``` /// ```
/// ///
/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly) /// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
/// > match the following regex: "[^<>\n[]]+". /// > match the following regex: "[^<>\n[]]+".
struct SummaryParser<'a> { struct SummaryParser<'a> {
stream: pulldown_cmark::Parser<'a>, stream: pulldown_cmark::Parser<'a>,
summary: Summary, summary: Summary,
state: State,
} }
/// Reads `Events` from the provided stream until the corresponding /// Reads `Events` from the provided stream until the corresponding
/// `Event::End` is encountered which matches the `$delimiter` pattern. /// `Event::End` is encountered which matches the `$delimiter` pattern.
/// ///
/// This is the equivalent of doing /// This is the equivalent of doing
/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to /// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to
/// use pattern matching and you won't get errors because `take_while()` /// use pattern matching and you won't get errors because `take_while()`
/// moves `$stream` out of self. /// moves `$stream` out of self.
macro_rules! collect_events { macro_rules! collect_events {
($stream:expr, $delimiter:pat) => { ($stream:expr, $delimiter:pat) => {
@ -127,8 +140,7 @@ macro_rules! collect_events {
} }
} }
impl<'a> SummaryParser<'a> impl<'a> SummaryParser<'a> {
{
fn new(text: &str) -> SummaryParser { fn new(text: &str) -> SummaryParser {
let pulldown_parser = pulldown_cmark::Parser::new(text); let pulldown_parser = pulldown_cmark::Parser::new(text);
let intermediate_summary = Summary::default(); let intermediate_summary = Summary::default();
@ -136,6 +148,7 @@ impl<'a> SummaryParser<'a>
SummaryParser { SummaryParser {
stream: pulldown_parser, stream: pulldown_parser,
summary: intermediate_summary, summary: intermediate_summary,
state: State::Begin,
} }
} }
@ -143,17 +156,38 @@ impl<'a> SummaryParser<'a>
fn parse(mut self) -> Result<Summary, Box<Error>> { fn parse(mut self) -> Result<Summary, Box<Error>> {
self.summary.title = self.parse_title(); self.summary.title = self.parse_title();
Ok(self.summary) Ok(self.summary)
}
fn step(&mut self) -> Result<(), Box<Error>> {
let next_event = self.stream.next().expect("TODO: error-chain");
trace!("[*] Current state = {:?}, Next Event = {:?}", self.state, next_event);
match self.state {
State::Begin => self.step_start(next_event),
other => unimplemented!()
}
}
/// The very first state, we should see a `BeginParagraph` token or
/// it's an error...
fn step_start(&mut self, event: Event<'a>) -> Result<(), Box<Error>> {
match event {
Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters,
other => panic!("Unexpected tag! {:?}", other),
}
Ok(())
} }
fn parse_title(&mut self) -> Option<String> { fn parse_title(&mut self) -> Option<String> {
if let Some(Event::Start(Tag::Header(1))) = self.stream.next() { if let Some(Event::Start(Tag::Header(1))) = self.stream.next() {
debug!("[*] Found a h1 in the SUMMARY"); debug!("[*] Found a h1 in the SUMMARY");
let tags = collect_events!(self.stream, Tag::Header(1)); let tags = collect_events!(self.stream, Tag::Header(1));
// TODO: How do we deal with headings like "# My **awesome** summary"? // TODO: How do we deal with headings like "# My **awesome** summary"?
// for now, I'm just going to scan through and concatenate the // for now, I'm just going to scan through and concatenate the
// Event::Text tags, skipping any styling. // Event::Text tags, skipping any styling.
Some(stringify_events(tags)) Some(stringify_events(tags))
} else { } else {
@ -179,15 +213,15 @@ impl<'a> SummaryParser<'a>
} }
} }
/// Extract just the text from a bunch of events and concatenate it into a /// Extracts the text from formatted markdown.
/// single string.
fn stringify_events<'a>(events: Vec<Event<'a>>) -> String { fn stringify_events<'a>(events: Vec<Event<'a>>) -> String {
events.into_iter() events
.filter_map(|t| match t { .into_iter()
Event::Text(text) => Some(text), .filter_map(|t| match t {
_ => None, Event::Text(text) => Some(text.into_owned()),
}) _ => None,
.collect() })
.collect()
} }
/// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`. /// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`.
@ -196,9 +230,11 @@ struct SectionNumber(Vec<u32>);
impl Display for SectionNumber { impl Display for SectionNumber {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let dotted_number: String = self.0.iter().map(|i| format!("{}", i)) let dotted_number: String = self.0
.collect::<Vec<String>>() .iter()
.join("."); .map(|i| format!("{}", i))
.collect::<Vec<String>>()
.join(".");
write!(f, "{}", dotted_number) write!(f, "{}", dotted_number)
} }
@ -274,4 +310,27 @@ mod tests {
assert_eq!(got, should_be); assert_eq!(got, should_be);
} }
}
#[test]
fn convert_markdown_events_to_a_string() {
let src = "Hello *World*, `this` is some text [and a link](./path/to/link)";
let should_be = "Hello World, this is some text and a link";
let events = pulldown_cmark::Parser::new(src).collect();
let got = stringify_events(events);
assert_eq!(got, should_be);
}
#[test]
fn can_step_past_first_token() {
let src = "hello world";
let should_be = State::PrefixChapters;
let mut parser = SummaryParser::new(src);
assert_eq!(parser.state, State::Begin);
parser.step().unwrap();
assert_eq!(parser.state, should_be);
}
}