simplified the preprocessor and changed some wording about the

preprocessor in the book
2021-02-16 03:29:22 +00:00 · 2021-02-16 03:29:22 +00:00 · 2ebee79d64
parent ca3e426f4a
commit 2ebee79d64
3 changed files with 79 additions and 185 deletions
--- a/guide/src/format/config.md
+++ b/guide/src/format/config.md
@ -83,14 +83,14 @@ This controls the build process of your book.
  will be created when the book is built (i.e. `create-missing = true`). If this
  is `false` then the build process will instead exit with an error if any files
  do not exist.
- **use-default-preprocessors:** Disable the default preprocessors of (`links` &
+- **use-default-preprocessors:** Disable the default preprocessors of (`links`,
-  `index`) by setting this option to `false`.
+  `index` & `metadata`) by setting this option to `false`.
  If you have the same, and/or other preprocessors declared via their table
  of configuration, they will run instead.
-  - For clarity, with no preprocessor configuration, the default `links` and
+  - For clarity, with no preprocessor configuration, the default `links`,
-    `index` will run.
+    `index` and `metadata` will run.
  - Setting `use-default-preprocessors = false` will disable these
    default preprocessors from running.
  - Adding `[preprocessor.links]`, for example, will ensure, regardless of
@ -105,24 +105,23 @@ The following preprocessors are available and included by default:
 - `index`: Convert all chapter files named `README.md` into `index.md`. That is
  to say, all `README.md` would be rendered to an index file `index.html` in the
  rendered book.
- `metadata`: Reads an optional TOML header from the markdown chapter sources
+- `metadata`: Strips an optional TOML header from the markdown chapter sources
  to provide chapter specific information. This data is then made available to
-  handlebars.js. The supported fields are `author`, `title`, `description`, `keywords`,
+  handlebars.js as a collection of properties.
  `date` and `modified`.
-**Sample Chapter**
+**Sample Chapter With Default "index.hbs"**
 ```toml
 ---
-author = "Jane Doe" # this is written to the author meta tag
+author = "Jane Doe"     # this is written to the author meta tag
-title = "Blog Post #1" # this overwrites the default title handlebar
+title = "Blog Post #1"  # this overwrites the default title handlebar
 date = "2021/02/14"
 keywords = [
  "Rust",
  "Blog",
-] # this sets the keywords meta tag
+]                       # this sets the keywords meta tag
 description = "A blog about rust-lang" # this sets the description meta tag
 date = "2021/02/14"     # this exposes date as a property for use in the handlebars template
 ---
-This is my blog about rust.
+This is my blog about rust. # only from this point on remains after preprocessing
 ```
--- a/guide/src/format/theme/index-hbs.md
+++ b/guide/src/format/theme/index-hbs.md
@ -19,7 +19,7 @@ Here is a list of the properties that are exposed:
 - ***language*** Language of the book in the form `en`, as specified in `book.toml` (if not specified, defaults to `en`). To use in <code
  class="language-html">\<html lang="{{ language }}"></code> for example.
- ***title*** Title used for the current page. This is identical to `{{ chapter_title }} - {{ book_title }}` unless `book_title` is not set in which case it just defaults to the `chapter_title`.
+- ***title*** Title used for the current page. This is identical to `{{ chapter_title }} - {{ book_title }}` unless `book_title` is not set in which case it just defaults to the `chapter_title`. This property can be overwritten by the TOML front matter of a chapter's source.
 - ***book_title*** Title of the book, as specified in `book.toml`
 - ***chapter_title*** Title of the current chapter, as listed in `SUMMARY.md`
--- a/src/preprocess/metadata.rs
+++ b/src/preprocess/metadata.rs
@ -1,17 +1,17 @@
 use crate::errors::*;
-use regex::{CaptureMatches, Captures, Regex};
+use regex::Regex;
 use std::ops::Range;
 use super::{Preprocessor, PreprocessorContext};
 use crate::book::{Book, BookItem};
-/// A preprocessor for reading TOML front matter from a markdown file. The supported
+/// A preprocessor for reading TOML front matter from a markdown file. Special
-/// fields are:
+/// fields are included in the `index.hbs` file for handlebars.js templating and
 /// are:
 /// - `author` - For setting the author meta tag.
 /// - `title` - For overwritting the title tag.
 /// - `description` - For setting the description meta tag.
 /// - `keywords` - For setting the keywords meta tag.
 /// - `date` - The date the file was created, creates a handlebar.js vairable {{date}}.
 /// - `modified` - The date the file was modified, creates a handlebar.js vairable {{modified}}.
 #[derive(Default)]
 pub struct MetadataPreprocessor;
@ -32,142 +32,57 @@ impl Preprocessor for MetadataPreprocessor {
    fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
        book.for_each_mut(|section: &mut BookItem| {
            if let BookItem::Chapter(ref mut ch) = *section {
-                let (metadata, content) = collect(&ch.content);
+                if let Some(m) = Match::find_metadata(&ch.content) {
-                ch.content = content;
+                    if let Ok(mut meta) = toml::from_str(&ch.content[m.range]) {
-                ch.chapter_config.append(&mut metadata.to_map());
+                        ch.chapter_config.append(&mut meta);
                        ch.content = String::from(&ch.content[m.end..]);
                    };
                }
            }
        });
        Ok(book)
    }
 }
-fn collect(s: &str) -> (Metadata, String) {
+struct Match {
-    let mut end_index = 0;
+    range: Range<usize>,
-    let mut replaced = String::new();
+    end: usize,
    let metadata: Metadata = if let Some(metadata) = find_metadata(s).next() {
        match toml::from_str(metadata.text) {
            Ok(meta) => {
                end_index += metadata.end_index;
                meta
            }
            _ => Metadata::default(),
        }
    } else {
        Metadata::default()
    };
    replaced.push_str(&s[end_index..]);
    (metadata, replaced)
 }
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+impl Match {
-#[serde(default, rename_all = "kebab-case")]
+    fn find_metadata(contents: &str) -> Option<Match> {
-struct Metadata {
+        // lazily compute following regex
-    author: Option<String>,
+        // r"\A-{3,}\n(?P<metadata>.*?)^{3,}\n"
-    title: Option<String>,
+        lazy_static! {
-    date: Option<String>,
+            static ref RE: Regex = Regex::new(
-    keywords: Option<Vec<String>>,
+                r"(?xms)          # insignificant whitespace mode and multiline
-    description: Option<String>,
+                \A-{3,}\n         # match a horizontal rule at the start of the content
-    modified: Option<String>,
+                (?P<metadata>.*?) # name the match between horizontal rules metadata
-}
+                ^-{3,}\n          # match a horizontal rule
-
+                "
-impl Metadata {
+            )
-    fn to_map(self) -> serde_json::Map<String, serde_json::Value> {
+            .unwrap();
-        let mut map = serde_json::Map::new();
+        };
-        if let Some(author) = self.author {
+        if let Some(mat) = RE.captures(contents) {
-            map.insert("author".to_string(), json!(author));
+            // safe to unwrap as we know there is a match
-        }
+            let metadata = mat.name("metadata").unwrap();
-        if let Some(title) = self.title {
+            Some( Match {
-            map.insert("title".to_string(), json!(title));
+                range: metadata.start()..metadata.end(),
-        }
+                end: mat.get(0).unwrap().end(),
-        if let Some(date) = self.date {
+            })
-            map.insert("date".to_string(), json!(date));
+        } else {
-        }
+            None
        if let Some(keywords) = self.keywords {
            map.insert("keywords".to_string(), json!(keywords));
        }
        if let Some(modified) = self.modified {
            map.insert("modified".to_string(), json!(modified));
        }
        if let Some(description) = self.description {
            map.insert("description".to_string(), json!(description));
        }
        map
    }
 }
 impl Default for Metadata {
    fn default() -> Metadata {
        Metadata {
            author: None,
            title: None,
            date: None,
            keywords: None,
            modified: None,
            description: None,
        }
    }
 }
 #[derive(PartialEq, Debug, Clone)]
 struct MetadataItem<'a> {
    end_index: usize,
    text: &'a str,
 }
 impl<'a> MetadataItem<'a> {
    fn from_capture(cap: Captures<'a>) -> Option<MetadataItem<'a>> {
        if let Some(mat) = cap.name("metadata") {
            let full_match = cap.get(0).unwrap();
            if full_match.start() == 0 {
                return Some(MetadataItem {
                    end_index: full_match.end(),
                    text: mat.as_str(),
                });
            }
        }
        None
    }
 }
 struct MetadataIter<'a>(CaptureMatches<'a, 'a>);
 impl<'a> Iterator for MetadataIter<'a> {
    type Item = MetadataItem<'a>;
    fn next(&mut self) -> Option<MetadataItem<'a>> {
        for cap in &mut self.0 {
            if let Some(inc) = MetadataItem::from_capture(cap) {
                return Some(inc);
            }
        }
        None
    }
 }
 fn find_metadata(contents: &str) -> MetadataIter<'_> {
    // lazily compute following regex
    // r"^-{3,}\n(?P<metadata>.*?)^{3,}\n"
    lazy_static! {
        static ref RE: Regex = Regex::new(
            r"(?xms)          # insignificant whitespace mode and multiline
            ^-{3,}\n          # match a horizontal rule
            (?P<metadata>.*?) # name the match between horizontal rules metadata
            ^-{3,}\n          # match a horizontal rule
            "
        )
        .unwrap();
    }
    MetadataIter(RE.captures_iter(contents))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
-    fn test_collect_not_at_start() {
+    fn test_find_metadata_not_at_start() {
-        let start = "\
+        let s = "\
        content\n\
        ---
        author = \"Adam\"
@ -181,12 +96,14 @@ mod tests {
        ---
        content
        ";
-        assert_eq!(collect(start).1, start);
+        if let Some(_) = Match::find_metadata(s) {
            panic!()
        }
    }
    #[test]
-    fn test_collect_at_start() {
+    fn test_find_metadata_at_start() {
-        let start = "\
+        let s = "\
        ---
        author = \"Adam\"
        title = \"Blog Post #1\"
@ -200,60 +117,43 @@ mod tests {
        ---\n\
        content
        ";
-        let end = "\
+        if let None = Match::find_metadata(s) {
-        content
+            panic!()
-        ";
+        }
        assert_eq!(collect(start).1, end);
    }
    #[test]
-    fn test_collect_partial_metadata() {
+    fn test_find_metadata_partial_metadata() {
-        let start = "\
+        let s = "\
        ---
-        author = \"Adam\"\n\
+        author = \"Adam\n\
        ---\n\
        content
        ";
-        let end = "\
+        if let Some(_) = Match::find_metadata(s) {
-        content
+            panic!()
-        ";
+        }
        assert_eq!(collect(start).1, end);
        assert_eq!(
            collect(start).0,
            Metadata {
                author: Some("Adam".to_string()),
                ..Default::default()
            }
        );
    }
    #[test]
-    fn test_collect_unsupported_metadata() {
+    fn test_find_metadata_not_metadata() {
-        let start = "\
+        type Map = serde_json::Map<String, serde_json::Value>;
-        ---
+        let s = "\
        author: \"Adam\"
        unsupported_field: \"text\"\n\
        ---
        followed by more content
        ";
        assert_eq!(collect(start).1, start);
    }
    #[test]
    fn test_collect_not_metadata() {
        let start = "\
        ---
        This is just standard content that happens to start with a line break
        and has a second line break in the text.\n\
        ---
        followed by more content
        ";
-        assert_eq!(collect(start).1, start);
+        if let Some(m) = Match::find_metadata(s) {
            if let Ok(_) = toml::from_str::<Map>(&s[m.range]) {
                panic!()
            }
        }
    }
    #[test]
-    fn test_metadata_to_map() {
+    fn test_parse_metadata() {
-        let metadata: Metadata = toml::from_str(
+        let metadata: serde_json::Map<String, serde_json::Value> = toml::from_str(
            "author = \"Adam\"
        title = \"Blog Post #1\"
        keywords = [
@ -261,17 +161,12 @@ mod tests {
            \"Blog\",
        ]
        date = \"2021/02/15\"
-        description = \"My rust blog.\"
+        ").unwrap();
-        modified = \"2021/02/16\" ",
+        let mut map = serde_json::Map::<String, serde_json::Value>::new();
        )
        .unwrap();
        let mut map = serde_json::Map::new();
        map.insert("author".to_string(), json!("Adam"));
        map.insert("title".to_string(), json!("Blog Post #1"));
        map.insert("keywords".to_string(), json!(vec!["Rust", "Blog"]));
        map.insert("date".to_string(), json!("2021/02/15"));
-        map.insert("description".to_string(), json!("My rust blog."));
+        assert_eq!(metadata, map)
        map.insert("modified".to_string(), json!("2021/02/16"));
        assert_eq!(metadata.to_map(), map)
    }
 }