Encoded the first state in the SummaryParser
This commit is contained in:
parent
dacb3e082e
commit
dcc8368543
|
@ -11,39 +11,40 @@ use pulldown_cmark::{self, Event, Tag};
|
||||||
///
|
///
|
||||||
/// # Summary Format
|
/// # Summary Format
|
||||||
///
|
///
|
||||||
/// **Title:** It's common practice to begin with a title, generally
|
/// **Title:** It's common practice to begin with a title, generally
|
||||||
/// "# Summary". But it is not mandatory, the parser just ignores it. So you
|
/// "# Summary". But it is not mandatory, the parser just ignores it. So you
|
||||||
/// can too if you feel like it.
|
/// can too if you feel like it.
|
||||||
///
|
///
|
||||||
/// **Prefix Chapter:** Before the main numbered chapters you can add a couple
|
/// **Prefix Chapter:** Before the main numbered chapters you can add a couple
|
||||||
/// of elements that will not be numbered. This is useful for forewords,
|
/// of elements that will not be numbered. This is useful for forewords,
|
||||||
/// introductions, etc. There are however some constraints. You can not nest
|
/// introductions, etc. There are however some constraints. You can not nest
|
||||||
/// prefix chapters, they should all be on the root level. And you can not add
|
/// prefix chapters, they should all be on the root level. And you can not add
|
||||||
/// prefix chapters once you have added numbered chapters.
|
/// prefix chapters once you have added numbered chapters.
|
||||||
///
|
///
|
||||||
/// ```markdown
|
/// ```markdown
|
||||||
/// [Title of prefix element](relative/path/to/markdown.md)
|
/// [Title of prefix element](relative/path/to/markdown.md)
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// **Numbered Chapter:** Numbered chapters are the main content of the book, they
|
/// **Numbered Chapter:** Numbered chapters are the main content of the book,
|
||||||
|
/// they
|
||||||
/// will be numbered and can be nested, resulting in a nice hierarchy (chapters,
|
/// will be numbered and can be nested, resulting in a nice hierarchy (chapters,
|
||||||
/// sub-chapters, etc.)
|
/// sub-chapters, etc.)
|
||||||
///
|
///
|
||||||
/// ```markdown
|
/// ```markdown
|
||||||
/// - [Title of the Chapter](relative/path/to/markdown.md)
|
/// - [Title of the Chapter](relative/path/to/markdown.md)
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// You can either use - or * to indicate a numbered chapter.
|
/// You can either use - or * to indicate a numbered chapter.
|
||||||
///
|
///
|
||||||
/// **Suffix Chapter:** After the numbered chapters you can add a couple of
|
/// **Suffix Chapter:** After the numbered chapters you can add a couple of
|
||||||
/// non-numbered chapters. They are the same as prefix chapters but come after
|
/// non-numbered chapters. They are the same as prefix chapters but come after
|
||||||
/// the numbered chapters instead of before.
|
/// the numbered chapters instead of before.
|
||||||
///
|
///
|
||||||
/// All other elements are unsupported and will be ignored at best or result in
|
/// All other elements are unsupported and will be ignored at best or result in
|
||||||
/// an error.
|
/// an error.
|
||||||
pub fn parse_summary(summary: &str) -> Result<Summary, Box<Error>> {
|
pub fn parse_summary(summary: &str) -> Result<Summary, Box<Error>> {
|
||||||
let parser = SummaryParser::new(summary);
|
let parser = SummaryParser::new(summary);
|
||||||
parser.parse()
|
parser.parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The parsed `SUMMARY.md`, specifying how the book should be laid out.
|
/// The parsed `SUMMARY.md`, specifying how the book should be laid out.
|
||||||
|
@ -52,7 +53,7 @@ pub struct Summary {
|
||||||
title: Option<String>,
|
title: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A struct representing an entry in the `SUMMARY.md`, possibly with nested
|
/// A struct representing an entry in the `SUMMARY.md`, possibly with nested
|
||||||
/// entries.
|
/// entries.
|
||||||
///
|
///
|
||||||
/// This is roughly the equivalent of `[Some section](./path/to/file.md)`.
|
/// This is roughly the equivalent of `[Some section](./path/to/file.md)`.
|
||||||
|
@ -69,41 +70,53 @@ enum SummaryItem {
|
||||||
Separator,
|
Separator,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
enum State {
|
||||||
|
Begin,
|
||||||
|
PrefixChapters,
|
||||||
|
/// Numbered chapters, including the nesting level.
|
||||||
|
NumberedChapters(u32),
|
||||||
|
SuffixChapters,
|
||||||
|
End,
|
||||||
|
}
|
||||||
|
|
||||||
/// A stateful parser for parsing a `SUMMARY.md` file.
|
/// A stateful parser for parsing a `SUMMARY.md` file.
|
||||||
///
|
///
|
||||||
/// # Grammar
|
/// # Grammar
|
||||||
///
|
///
|
||||||
/// The `SUMMARY.md` file has a grammar which looks something like this:
|
/// The `SUMMARY.md` file has a grammar which looks something like this:
|
||||||
///
|
///
|
||||||
/// ```text
|
/// ```text
|
||||||
/// summary ::= title prefix_chapters numbered_chapters suffix_chapters
|
/// summary ::= title prefix_chapters numbered_chapters
|
||||||
|
/// suffix_chapters
|
||||||
/// title ::= "# " TEXT
|
/// title ::= "# " TEXT
|
||||||
/// | EPSILON
|
/// | EPSILON
|
||||||
/// prefix_chapters ::= item*
|
/// prefix_chapters ::= item*
|
||||||
/// suffix_chapters ::= item*
|
/// suffix_chapters ::= item*
|
||||||
/// numbered_chapters ::= dotted_item+
|
/// numbered_chapters ::= dotted_item+
|
||||||
/// dotted_item ::= INDENT* DOT_POINT item
|
/// dotted_item ::= INDENT* DOT_POINT item
|
||||||
/// item ::= link
|
/// item ::= link
|
||||||
/// | separator
|
/// | separator
|
||||||
/// separator ::= "---"
|
/// separator ::= "---"
|
||||||
/// link ::= "[" TEXT "]" "(" TEXT ")"
|
/// link ::= "[" TEXT "]" "(" TEXT ")"
|
||||||
/// DOT_POINT ::= "-"
|
/// DOT_POINT ::= "-"
|
||||||
/// | "*"
|
/// | "*"
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
|
/// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
|
||||||
/// > match the following regex: "[^<>\n[]]+".
|
/// > match the following regex: "[^<>\n[]]+".
|
||||||
struct SummaryParser<'a> {
|
struct SummaryParser<'a> {
|
||||||
stream: pulldown_cmark::Parser<'a>,
|
stream: pulldown_cmark::Parser<'a>,
|
||||||
summary: Summary,
|
summary: Summary,
|
||||||
|
state: State,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads `Events` from the provided stream until the corresponding
|
/// Reads `Events` from the provided stream until the corresponding
|
||||||
/// `Event::End` is encountered which matches the `$delimiter` pattern.
|
/// `Event::End` is encountered which matches the `$delimiter` pattern.
|
||||||
///
|
///
|
||||||
/// This is the equivalent of doing
|
/// This is the equivalent of doing
|
||||||
/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to
|
/// `$stream.take_while(|e| e != $delimeter).collect()` but it allows you to
|
||||||
/// use pattern matching and you won't get errors because `take_while()`
|
/// use pattern matching and you won't get errors because `take_while()`
|
||||||
/// moves `$stream` out of self.
|
/// moves `$stream` out of self.
|
||||||
macro_rules! collect_events {
|
macro_rules! collect_events {
|
||||||
($stream:expr, $delimiter:pat) => {
|
($stream:expr, $delimiter:pat) => {
|
||||||
|
@ -127,8 +140,7 @@ macro_rules! collect_events {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SummaryParser<'a>
|
impl<'a> SummaryParser<'a> {
|
||||||
{
|
|
||||||
fn new(text: &str) -> SummaryParser {
|
fn new(text: &str) -> SummaryParser {
|
||||||
let pulldown_parser = pulldown_cmark::Parser::new(text);
|
let pulldown_parser = pulldown_cmark::Parser::new(text);
|
||||||
let intermediate_summary = Summary::default();
|
let intermediate_summary = Summary::default();
|
||||||
|
@ -136,6 +148,7 @@ impl<'a> SummaryParser<'a>
|
||||||
SummaryParser {
|
SummaryParser {
|
||||||
stream: pulldown_parser,
|
stream: pulldown_parser,
|
||||||
summary: intermediate_summary,
|
summary: intermediate_summary,
|
||||||
|
state: State::Begin,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,17 +156,38 @@ impl<'a> SummaryParser<'a>
|
||||||
fn parse(mut self) -> Result<Summary, Box<Error>> {
|
fn parse(mut self) -> Result<Summary, Box<Error>> {
|
||||||
self.summary.title = self.parse_title();
|
self.summary.title = self.parse_title();
|
||||||
|
|
||||||
Ok(self.summary)
|
Ok(self.summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn step(&mut self) -> Result<(), Box<Error>> {
|
||||||
|
let next_event = self.stream.next().expect("TODO: error-chain");
|
||||||
|
trace!("[*] Current state = {:?}, Next Event = {:?}", self.state, next_event);
|
||||||
|
|
||||||
|
match self.state {
|
||||||
|
State::Begin => self.step_start(next_event),
|
||||||
|
other => unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The very first state, we should see a `BeginParagraph` token or
|
||||||
|
/// it's an error...
|
||||||
|
fn step_start(&mut self, event: Event<'a>) -> Result<(), Box<Error>> {
|
||||||
|
match event {
|
||||||
|
Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters,
|
||||||
|
other => panic!("Unexpected tag! {:?}", other),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_title(&mut self) -> Option<String> {
|
fn parse_title(&mut self) -> Option<String> {
|
||||||
if let Some(Event::Start(Tag::Header(1))) = self.stream.next() {
|
if let Some(Event::Start(Tag::Header(1))) = self.stream.next() {
|
||||||
debug!("[*] Found a h1 in the SUMMARY");
|
debug!("[*] Found a h1 in the SUMMARY");
|
||||||
|
|
||||||
let tags = collect_events!(self.stream, Tag::Header(1));
|
let tags = collect_events!(self.stream, Tag::Header(1));
|
||||||
|
|
||||||
// TODO: How do we deal with headings like "# My **awesome** summary"?
|
// TODO: How do we deal with headings like "# My **awesome** summary"?
|
||||||
// for now, I'm just going to scan through and concatenate the
|
// for now, I'm just going to scan through and concatenate the
|
||||||
// Event::Text tags, skipping any styling.
|
// Event::Text tags, skipping any styling.
|
||||||
Some(stringify_events(tags))
|
Some(stringify_events(tags))
|
||||||
} else {
|
} else {
|
||||||
|
@ -179,15 +213,15 @@ impl<'a> SummaryParser<'a>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract just the text from a bunch of events and concatenate it into a
|
/// Extracts the text from formatted markdown.
|
||||||
/// single string.
|
|
||||||
fn stringify_events<'a>(events: Vec<Event<'a>>) -> String {
|
fn stringify_events<'a>(events: Vec<Event<'a>>) -> String {
|
||||||
events.into_iter()
|
events
|
||||||
.filter_map(|t| match t {
|
.into_iter()
|
||||||
Event::Text(text) => Some(text),
|
.filter_map(|t| match t {
|
||||||
_ => None,
|
Event::Text(text) => Some(text.into_owned()),
|
||||||
})
|
_ => None,
|
||||||
.collect()
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`.
|
/// A section number like "1.2.3", basically just a newtype'd `Vec<u32>`.
|
||||||
|
@ -196,9 +230,11 @@ struct SectionNumber(Vec<u32>);
|
||||||
|
|
||||||
impl Display for SectionNumber {
|
impl Display for SectionNumber {
|
||||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
let dotted_number: String = self.0.iter().map(|i| format!("{}", i))
|
let dotted_number: String = self.0
|
||||||
.collect::<Vec<String>>()
|
.iter()
|
||||||
.join(".");
|
.map(|i| format!("{}", i))
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(".");
|
||||||
|
|
||||||
write!(f, "{}", dotted_number)
|
write!(f, "{}", dotted_number)
|
||||||
}
|
}
|
||||||
|
@ -274,4 +310,27 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(got, should_be);
|
assert_eq!(got, should_be);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
#[test]
|
||||||
|
fn convert_markdown_events_to_a_string() {
|
||||||
|
let src = "Hello *World*, `this` is some text [and a link](./path/to/link)";
|
||||||
|
let should_be = "Hello World, this is some text and a link";
|
||||||
|
|
||||||
|
let events = pulldown_cmark::Parser::new(src).collect();
|
||||||
|
let got = stringify_events(events);
|
||||||
|
|
||||||
|
assert_eq!(got, should_be);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn can_step_past_first_token() {
|
||||||
|
let src = "hello world";
|
||||||
|
let should_be = State::PrefixChapters;
|
||||||
|
|
||||||
|
let mut parser = SummaryParser::new(src);
|
||||||
|
assert_eq!(parser.state, State::Begin);
|
||||||
|
parser.step().unwrap();
|
||||||
|
assert_eq!(parser.state, should_be);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue