Search: Fine tuning

* remove searchindex feature (nightly requirement of elasticlunr-rs dropped) * some documentation * refactor BookItems iterator * add iterator for parents * Include paragraph structure in hierarchy * Fix url and specialchar handling * Use complete index
2017-10-09 13:03:21 +02:00 · 2017-10-09 13:03:21 +02:00 · a198e99fa9
parent aa1f02f7b2
commit a198e99fa9
7 changed files with 228 additions and 99 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -29,7 +29,7 @@ toml = "0.4"
 open = "1.1"
 regex = "0.2.1"
 tempdir = "0.3.4"
-elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true}
+elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs" }

 # Watch feature
 notify = { version = "4.0", optional = true }
@ -56,7 +56,6 @@ output = []
 regenerate-css = []
 watch = ["notify", "time", "crossbeam"]
 serve = ["iron", "staticfile", "ws"]
-searchindex = ["elasticlunr"]

 [[bin]]
 doc = false
--- a/src/book/bookitem.rs
+++ b/src/book/bookitem.rs
@ -2,7 +2,12 @@ use serde::{Serialize, Serializer};
 use serde::ser::SerializeStruct;
 use std::path::PathBuf;

-
+/// A BookItem corresponds to one entry of the table of contents file SUMMARY.md.
+/// A line in that file can either be a numbered chapter with a section number like 2.1.3 or a
+/// suffix or postfix chapter without such a section number.
+/// The `String` field in the `Chapter` variant contains the section number as `2.1.3`.
+/// The `Chapter` type contains the child elements (which can only be other `BookItem::Chapters`).
+/// `BookItem::Affix` and `BookItem::Spacer` are only allowed within the root level.
 #[derive(Debug, Clone)]
 pub enum BookItem {
    Chapter(String, Chapter), // String = section
@ -10,6 +15,9 @@ pub enum BookItem {
    Spacer,
 }

+/// A chapter is a `.md` file that is referenced by some line in the `SUMMARY.md` table of
+/// contents. It also has references to its sub chapters via `sub_items`. These items can
+/// only be of the variant `BookItem::Chapter`.
 #[derive(Debug, Clone)]
 pub struct Chapter {
    pub name: String,
@ -17,13 +25,21 @@ pub struct Chapter {
    pub sub_items: Vec<BookItem>,
 }

+/// A flattening, depth-first iterator over Bookitems and it's children.
+/// It can be obtained by calling `MDBook::iter()`.
 #[derive(Debug, Clone)]
 pub struct BookItems<'a> {
-    pub items: &'a [BookItem],
-    pub current_index: usize,
-    pub stack: Vec<(&'a [BookItem], usize)>,
+    /// The remaining items in the iterator in the current, deepest level of the iterator
+    items: &'a [BookItem],
+    /// The higher levels of the hierarchy. The parents of the current level are still
+    /// in the list and accessible as `[stack[0][0], stack[1][0], stack[2][0], ...]`.
+    stack: Vec<&'a [BookItem]>,
 }

+/// Iterator for the parent `BookItem`s of a `BookItem`.
+pub struct BookItemParents<'a> {
+    stack: &'a [ &'a [BookItem] ]
+}

 impl Chapter {
    pub fn new(name: String, path: PathBuf) -> Self {
@ -48,39 +64,78 @@ impl Serialize for Chapter {
    }
 }

-
-
-// Shamelessly copied from Rustbook
-// (https://github.com/rust-lang/rust/blob/master/src/rustbook/book.rs)
 impl<'a> Iterator for BookItems<'a> {
    type Item = &'a BookItem;

    fn next(&mut self) -> Option<&'a BookItem> {
-        loop {
-            if self.current_index >= self.items.len() {
-                match self.stack.pop() {
-                    None => return None,
-                    Some((parent_items, parent_idx)) => {
-                        self.items = parent_items;
-                        self.current_index = parent_idx + 1;
-                    }
-                }
+        if let Some((first, rest)) = self.items.split_first() {
+            // Return the first element in `items` and optionally dive into afterwards.
+            match first {
+                &BookItem::Spacer => {
+                    self.items = rest;
+                },
+                &BookItem::Chapter(_, ref ch) |
+                &BookItem::Affix(ref ch) => {
+                    if ch.sub_items.is_empty() {
+                        self.items = rest;
                    } else {
-                let cur = &self.items[self.current_index];
-
-                match *cur {
-                    BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) => {
-                        self.stack.push((self.items, self.current_index));
+                        // Don't remove `first` for now. (Because of Parent Iterator)
+                        self.stack.push(self.items);
                        self.items = &ch.sub_items[..];
-                        self.current_index = 0;
                    }
-                    BookItem::Spacer => {
-                        self.current_index += 1;
-                    }
-                }
-
-                return Some(cur);
+                },
+            };
+            Some(first)
+        } else {
+            // Current level is drained => pop from `stack` or return `None`
+            if let Some(stacked_items) = self.stack.pop() {
+                // The first item of the popped slice is the bookitem we previously dived into.
+                self.items = &stacked_items[1..];
+                self.next()
+            } else {
+                None
            }
        }
    }
 }
+
+impl<'a> BookItems<'a> {
+    pub fn new(items : &'a[BookItem]) -> BookItems<'a> {
+        BookItems {
+            items : items,
+            stack : vec![],
+        }
+    }
+
+    /// Returns an iterator to iterate the parents of the last yielded `BookItem`.
+    /// Starts with the root item.
+    pub fn current_parents(&'a self) -> BookItemParents<'a> {
+        BookItemParents { stack : &self.stack }
+    }
+
+    /// Collects the names of the parent `BookItem`s of the last yielded `Bookitem` into a list.
+    pub fn collect_current_parents_names(&self) -> Vec<String> {
+        self.current_parents().filter_map(|i| match i {
+            &BookItem::Chapter(_, ref ch) | &BookItem::Affix(ref ch) => Some(ch.name.clone()),
+            _ => None,
+        }).collect()
+    }
+
+    /// Get the level of the last yielded `BookItem`. Root level = 0
+    pub fn current_depth(&'a self) -> usize {
+        self.stack.len()
+    }
+}
+
+impl<'a> Iterator for BookItemParents<'a> {
+    type Item = &'a BookItem;
+
+    fn next(&mut self) -> Option<&'a BookItem> {
+        if let Some((first, rest)) = self.stack.split_first() {
+            self.stack = rest;
+            Some (&first[0])
+        } else {
+            None
+        }
+    }
+}
--- a/src/book/mod.rs
+++ b/src/book/mod.rs
@ -105,11 +105,7 @@ impl MDBook {
    /// ```

    pub fn iter(&self) -> BookItems {
-        BookItems {
-            items: &self.content[..],
-            current_index: 0,
-            stack: Vec::new(),
-        }
+        BookItems::new(&self.content[..])
    }

    /// `init()` creates some boilerplate files and directories
--- a/src/lib.rs
+++ b/src/lib.rs
@ -88,7 +88,6 @@ extern crate serde_derive;
 extern crate serde_json;
 extern crate tempdir;
 extern crate toml;
-#[cfg(feature = "searchindex")]
 extern crate elasticlunr;

 mod parse;
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@ -9,7 +9,6 @@ use theme::{Theme, playpen_editor};
 use errors::*;
 use regex::{Captures, Regex};

-#[cfg(feature = "searchindex")]
 use elasticlunr;

 use std::ascii::AsciiExt;
@ -35,13 +34,15 @@ impl HtmlHandlebars {
                   item: &BookItem,
                   mut ctx: RenderItemContext,
                   print_content: &mut String,
-                   search_documents : &mut Vec<utils::SearchDocument>)
+                   search_documents : &mut Vec<utils::SearchDocument>,
+                   mut parents_names : Vec<String>)
                   -> Result<()> {
+
        // FIXME: This should be made DRY-er and rely less on mutable state
        match *item {
-            BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch)
-                if !ch.path.as_os_str().is_empty() =>
-            {
+            BookItem::Chapter(_, ref ch) |
+            BookItem::Affix(ref ch) if !ch.path.as_os_str().is_empty() => {
+
                let path = ctx.book.get_source().join(&ch.path);
                let content = utils::fs::file_to_string(&path)?;
                let base = path.parent()
@ -49,11 +50,20 @@ impl HtmlHandlebars {
                let path = ch.path.to_str().ok_or_else(|| {
                    io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
                })?;
+                let filepath = Path::new(&ch.path).with_extension("html");
+                let filepath = filepath.to_str().ok_or_else(|| {
+                    Error::from(format!("Bad file name: {}", filepath.display()))
+                })?;

+
+                if ! parents_names.last().map(String::as_ref).unwrap_or("")
+                    .eq_ignore_ascii_case(&ch.name) {
+                    parents_names.push(ch.name.clone());
+                }
                utils::render_markdown_into_searchindex(search_documents,
                    &content,
-                    path,
-                    &vec![],
+                    filepath,
+                    parents_names,
                    id_from_content);

                // Parse and expand links
@ -84,17 +94,15 @@ impl HtmlHandlebars {
                debug!("[*]: Render template");
                let rendered = ctx.handlebars.render("index", &ctx.data)?;

-                let filepath = Path::new(&ch.path).with_extension("html");
+
                let rendered = self.post_process(
                    rendered,
-                    &normalize_path(filepath.to_str().ok_or_else(|| Error::from(
-                        format!("Bad file name: {}", filepath.display()),
-                    ))?),
+                    &normalize_path(filepath),
                    &ctx.book.config.html_config().unwrap_or_default().playpen,
                );

                // Write to file
-                info!("[*] Creating {:?} ✓", filepath.display());
+                info!("[*] Creating {:?} ✓", filepath);
                ctx.book.write_file(filepath, &rendered.into_bytes())?;

                if ctx.is_index {
@ -282,20 +290,28 @@ impl Renderer for HtmlHandlebars {
        fs::create_dir_all(&destination)
            .chain_err(|| "Unexpected error when constructing destination path")?;

-        for (i, item) in book.iter().enumerate() {
+
+        let mut depthfirstiterator = book.iter();
+        let mut is_index = true;
+        while let Some(item) = depthfirstiterator.next() {
            let ctx = RenderItemContext {
                book: book,
                handlebars: &handlebars,
                destination: destination.to_path_buf(),
                data: data.clone(),
-                is_index: i == 0,
+                is_index: is_index,
                html_config: html_config.clone(),
            };
-            self.render_item(item, ctx, &mut print_content, &mut search_documents)?;
+            self.render_item(item,
+                             ctx,
+                             &mut print_content,
+                             &mut search_documents,
+                             depthfirstiterator.collect_current_parents_names())?;
+            is_index = false;
        }

        // Search index
-        make_searchindex(book, &search_documents)?;
+        make_searchindex(book, search_documents)?;

        // Print version
        self.configure_print_version(&mut data, &print_content);
@ -633,21 +649,29 @@ pub fn normalize_id(content: &str) -> String {
           .collect::<String>()
 }

-#[cfg(not(feature = "searchindex"))]
-fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
-    Ok(())
-}
+/// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
+fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> {
+    let mut index = elasticlunr::index::Index::new("id",
+        &["title".into(), "body".into(), "breadcrumbs".into()]);

-#[cfg(feature = "searchindex")]
-fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
-    let mut index = elasticlunr::IndexBuilder::new();
    for sd in search_documents {
-        index.add_document(&sd.title, &sd.body);
+        let anchor = if let Some(s) = sd.anchor.1 {
+            format!("{}#{}", sd.anchor.0, &s)
+        } else {
+            sd.anchor.0
+        };
+
+        let mut map = HashMap::new();
+        map.insert("id".into(), anchor.clone());
+        map.insert("title".into(), sd.title);
+        map.insert("body".into(), sd.body);
+        map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
+        index.add_doc(&anchor, map);
    }

    book.write_file(
        Path::new("searchindex").with_extension("json"),
-        &index.to_json().as_bytes(),
+        &serde_json::to_string(&index).unwrap().as_bytes(),
    )?;
    info!("[*] Creating \"searchindex.json\" ✓");

--- a/src/theme/book.js
+++ b/src/theme/book.js
@ -144,6 +144,20 @@ $( document ).ready(function() {
            return url;
        }
        ,
+        escapeHTML: (function() {
+            var MAP = {
+                '&': '&amp;',
+                '<': '&lt;',
+                '>': '&gt;',
+                '"': '&#34;',
+                "'": '&#39;'
+            };
+            var repl = function(c) { return MAP[c]; };
+            return function(s) {
+                return s.replace(/[&<>'"]/g, repl);
+            };
+        })()
+        ,
        formatSearchResult : function (result, searchterms) {
            // Show text around first occurrence of first search term.
            var firstoccurence = result.doc.body.search(searchterms[0]);
@ -173,9 +187,9 @@ $( document ).ready(function() {

            return $('<li><a href="'
                    + url[0] + '?' + this.MARK_PARAM + '=' + searchterms + '#' + url[1]
-                    + '">' + result.doc.title + '</a>'
-                    + '<span class="breadcrumbs">' + result.doc.breadcrumbs + '</span>'
-                    + '<span class="teaser">' + teaser + '</span>'
+                    + '">' + result.doc.breadcrumbs + '</a>' // doc.title
+                    + '<span class="breadcrumbs">' + '</span>'
+                    + '<span class="teaser">' + this.escapeHTML(teaser) + '</span>'
                    + '</li>');
        }
        ,
@ -213,7 +227,8 @@ $( document ).ready(function() {
            if (url.params.hasOwnProperty(this.SEARCH_PARAM)
                && url.params[this.SEARCH_PARAM] != "") {
                this.searchbar_outer.slideDown();
-                this.searchbar[0].value = url.params[this.SEARCH_PARAM];
+                this.searchbar[0].value = decodeURIComponent(
+                    (url.params[this.SEARCH_PARAM]+'').replace(/\+/g, '%20'));
                this.searchbarKeyUpHandler();
            } else {
                this.searchbar_outer.slideUp();
@ -229,19 +244,42 @@ $( document ).ready(function() {
        }
        ,
        init : function () {
+            var this_ = this;
+            window.md = this;
+
            // For testing purposes: Index current page
-            this.create_test_searchindex();
+            //this.create_test_searchindex();
+
+            $.getJSON("searchindex.json", function(json) {
+                //this_.searchindex = elasticlunr.Index.load(json);
+
+                // TODO: Workaround: reindex everything
+                var searchindex = elasticlunr(function () {
+                    this.addField('body');
+                    this.addField('title');
+                    this.addField('breadcrumbs')
+                    this.setRef('id');
+                });
+                window.mjs = json;
+                var docs = json.documentStore.docs;
+                for (var key in docs) {
+                    searchindex.addDoc(docs[key]);
+                }
+                this_.searchindex = searchindex;
+

                // Set up events
-            var this_ = this;
-            this.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
-            this.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
+                this_.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
+                this_.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
                $(document).on('keydown', function (e) { this_.globalKeyHandler(e); });
                // If the user uses the browser buttons, do the same as if a reload happened
                window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); };

                // If reloaded, do the search or mark again, depending on the current url parameters
-            this.doSearchOrMarkFromUrl();
+                this_.doSearchOrMarkFromUrl();
+
+            });
+
        }
        ,
        hasFocus : function () {
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -2,10 +2,10 @@ pub mod fs;

 use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
                     OPTION_ENABLE_TABLES};
+use std::ascii::AsciiExt;
 use std::borrow::Cow;
 use std::fmt::Write;
 use regex::Regex;
-use std::rc::Rc;

 /// A heading together with the successive content until the next heading will
 /// make up one `SearchDocument`. It represents some independently searchable part of the book.
@ -16,22 +16,22 @@ pub struct SearchDocument {
    // Content: Flatted paragraphs, lists, code
    pub body : String,
    /// Needed information to generate a link to the corresponding title anchor
-    /// First part is the `reference_base` that should be the same for all documents that
+    /// First part is the `anchor_base` that should be the same for all documents that
    /// came from the same `.md` file. The second part is derived from the heading of the search
    /// document.
-    pub sref : (Rc<String>, Option<String>),
-    // Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
+    pub anchor : (String, Option<String>),
+    // Hierarchy like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
    // as a human understandable path to the search document.
-    pub breadcrumbs : Vec<Rc<String>>,
+    pub hierarchy : Vec<String>,
 }

 impl SearchDocument {
-    fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument {
+    fn new(anchor_base : &str, hierarchy : &Vec<String>) -> SearchDocument {
        SearchDocument {
            title : "".to_owned(),
            body : "".to_owned(),
-            sref : (sref0.clone(), None),
-            breadcrumbs : bcs.clone()
+            anchor : (anchor_base.to_owned(), None),
+            hierarchy : (*hierarchy).clone()
        }
    }

@ -47,19 +47,29 @@ impl SearchDocument {
            self.body.write_str(&" ").unwrap();
        }
    }
+
+    fn extend_hierarchy(&mut self, more : &Vec<String>) {
+        let last = self.hierarchy.last().map(String::as_ref).unwrap_or("").to_owned();
+
+        self.hierarchy.extend(more.iter().filter(|h|
+            h.as_str() != ""
+            && ! h.as_str().eq_ignore_ascii_case(&last))
+        .map(|h| h.to_owned()));
+
+    }
 }

 /// Renders markdown into flat unformatted text for usage in the search index.
 /// Refer to the struct `SearchDocument`.
 ///
-/// The field `sref` in the `SearchDocument` struct becomes
-///    `(reference_base, Some(heading_to_sref("The Section Heading")))`
+/// The field `anchor` in the `SearchDocument` struct becomes
+///    `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
 pub fn render_markdown_into_searchindex<F>(
    search_documents: &mut Vec<SearchDocument>,
    text: &str,
-    reference_base: &str,
-    breadcrumbs : &Vec<Rc<String>>,
-    heading_to_sref : F)
+    anchor_base: &str,
+    hierarchy : Vec<String>,
+    heading_to_anchor : F)
    where F : Fn(&str) -> String {

    let mut opts = Options::empty();
@ -67,24 +77,31 @@ pub fn render_markdown_into_searchindex<F>(
    opts.insert(OPTION_ENABLE_FOOTNOTES);
    let p = Parser::new_ext(text, opts);

-    let reference_base = Rc::new(reference_base.to_owned());
-    let mut current = SearchDocument::new(&reference_base, breadcrumbs);
+    let mut current = SearchDocument::new(&anchor_base, &hierarchy);
    let mut in_header = false;
+    let max_paragraph_level = 3;
+    let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];

    for event in p {
        match event {
-            Event::Start(Tag::Header(i)) if i <= 3 => {
+            Event::Start(Tag::Header(i)) if i <= max_paragraph_level => {
+                // Paragraph finished, the next header is following now
                if current.has_content() {
+                    // Push header_hierarchy to the search documents chapter hierarchy
+                    current.extend_hierarchy(&header_hierarchy);
                    search_documents.push(current);
                }
-                current = SearchDocument::new(&reference_base, breadcrumbs);
+                current = SearchDocument::new(&anchor_base, &hierarchy);
                in_header = true;
            }
-            Event::End(Tag::Header(_)) => {
-                // Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
-                current.breadcrumbs.push(Rc::new(current.title.clone()));
-                current.sref.1 = Some(heading_to_sref(&current.title));
+            Event::End(Tag::Header(i)) if i <= max_paragraph_level => {
                in_header = false;
+                current.anchor.1 = Some(heading_to_anchor(&current.title));
+
+                header_hierarchy[i as usize -1] = current.title.clone();
+                for h in &mut header_hierarchy[i as usize ..] {
+                    *h = "".to_owned();
+                }
            }
            Event::Start(_) | Event::End(_) => {}
            Event::Text(text) => {
@ -97,6 +114,7 @@ pub fn render_markdown_into_searchindex<F>(
            Event::SoftBreak | Event::HardBreak => {}
        }
    }
+    current.extend_hierarchy(&header_hierarchy);
    search_documents.push(current);
 }