From a198e99fa9fad7d16d3db12a372bf26805db279a Mon Sep 17 00:00:00 2001
From: Phaiax <phaiax-git@invisibletower.de>
Date: Mon, 9 Oct 2017 13:03:21 +0200
Subject: [PATCH] Search: Fine tuning * remove searchindex feature (nightly
 requirement of elasticlunr-rs dropped) * some documentation * refactor
 BookItems iterator * add iterator for parents * Include paragraph structure
 in hierarchy * Fix url and specialchar handling * Use complete index

---
 Cargo.toml                                   |   3 +-
 src/book/bookitem.rs                         | 115 ++++++++++++++-----
 src/book/mod.rs                              |   6 +-
 src/lib.rs                                   |   1 -
 src/renderer/html_handlebars/hbs_renderer.rs |  74 ++++++++----
 src/theme/book.js                            |  68 ++++++++---
 src/utils/mod.rs                             |  60 ++++++----
 7 files changed, 228 insertions(+), 99 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index fbdeaff8..7d7f048d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,7 +29,7 @@ toml = "0.4"
 open = "1.1"
 regex = "0.2.1"
 tempdir = "0.3.4"
-elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true}
+elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs" }
 
 # Watch feature
 notify = { version = "4.0", optional = true }
@@ -56,7 +56,6 @@ output = []
 regenerate-css = []
 watch = ["notify", "time", "crossbeam"]
 serve = ["iron", "staticfile", "ws"]
-searchindex = ["elasticlunr"]
 
 [[bin]]
 doc = false
diff --git a/src/book/bookitem.rs b/src/book/bookitem.rs
index a2ec2cb0..4d16cc19 100644
--- a/src/book/bookitem.rs
+++ b/src/book/bookitem.rs
@@ -2,7 +2,12 @@ use serde::{Serialize, Serializer};
 use serde::ser::SerializeStruct;
 use std::path::PathBuf;
 
-
+/// A BookItem corresponds to one entry of the table of contents file SUMMARY.md.
+/// A line in that file can either be a numbered chapter with a section number like 2.1.3 or a
+/// suffix or postfix chapter without such a section number.
+/// The `String` field in the `Chapter` variant contains the section number as `2.1.3`.
+/// The `Chapter` type contains the child elements (which can only be other `BookItem::Chapters`).
+/// `BookItem::Affix` and `BookItem::Spacer` are only allowed within the root level.
 #[derive(Debug, Clone)]
 pub enum BookItem {
     Chapter(String, Chapter), // String = section
@@ -10,6 +15,9 @@ pub enum BookItem {
     Spacer,
 }
 
+/// A chapter is a `.md` file that is referenced by some line in the `SUMMARY.md` table of
+/// contents. It also has references to its sub chapters via `sub_items`. These items can
+/// only be of the variant `BookItem::Chapter`.
 #[derive(Debug, Clone)]
 pub struct Chapter {
     pub name: String,
@@ -17,13 +25,21 @@ pub struct Chapter {
     pub sub_items: Vec<BookItem>,
 }
 
+/// A flattening, depth-first iterator over Bookitems and it's children.
+/// It can be obtained by calling `MDBook::iter()`.
 #[derive(Debug, Clone)]
 pub struct BookItems<'a> {
-    pub items: &'a [BookItem],
-    pub current_index: usize,
-    pub stack: Vec<(&'a [BookItem], usize)>,
+    /// The remaining items in the iterator in the current, deepest level of the iterator
+    items: &'a [BookItem],
+    /// The higher levels of the hierarchy. The parents of the current level are still
+    /// in the list and accessible as `[stack[0][0], stack[1][0], stack[2][0], ...]`.
+    stack: Vec<&'a [BookItem]>,
 }
 
+/// Iterator for the parent `BookItem`s of a `BookItem`.
+pub struct BookItemParents<'a> {
+    stack: &'a [ &'a [BookItem] ]
+}
 
 impl Chapter {
     pub fn new(name: String, path: PathBuf) -> Self {
@@ -48,39 +64,78 @@ impl Serialize for Chapter {
     }
 }
 
-
-
-// Shamelessly copied from Rustbook
-// (https://github.com/rust-lang/rust/blob/master/src/rustbook/book.rs)
 impl<'a> Iterator for BookItems<'a> {
     type Item = &'a BookItem;
 
     fn next(&mut self) -> Option<&'a BookItem> {
-        loop {
-            if self.current_index >= self.items.len() {
-                match self.stack.pop() {
-                    None => return None,
-                    Some((parent_items, parent_idx)) => {
-                        self.items = parent_items;
-                        self.current_index = parent_idx + 1;
-                    }
-                }
-            } else {
-                let cur = &self.items[self.current_index];
-
-                match *cur {
-                    BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) => {
-                        self.stack.push((self.items, self.current_index));
+        if let Some((first, rest)) = self.items.split_first() {
+            // Return the first element in `items` and optionally dive into afterwards.
+            match first {
+                &BookItem::Spacer => {
+                    self.items = rest;
+                },
+                &BookItem::Chapter(_, ref ch) |
+                &BookItem::Affix(ref ch) => {
+                    if ch.sub_items.is_empty() {
+                        self.items = rest;
+                    } else {
+                        // Don't remove `first` for now. (Because of Parent Iterator)
+                        self.stack.push(self.items);
                         self.items = &ch.sub_items[..];
-                        self.current_index = 0;
                     }
-                    BookItem::Spacer => {
-                        self.current_index += 1;
-                    }
-                }
-
-                return Some(cur);
+                },
+            };
+            Some(first)
+        } else {
+            // Current level is drained => pop from `stack` or return `None`
+            if let Some(stacked_items) = self.stack.pop() {
+                // The first item of the popped slice is the bookitem we previously dived into.
+                self.items = &stacked_items[1..];
+                self.next()
+            } else {
+                None
             }
         }
     }
 }
+
+impl<'a> BookItems<'a> {
+    pub fn new(items : &'a[BookItem]) -> BookItems<'a> {
+        BookItems {
+            items : items,
+            stack : vec![],
+        }
+    }
+
+    /// Returns an iterator to iterate the parents of the last yielded `BookItem`.
+    /// Starts with the root item.
+    pub fn current_parents(&'a self) -> BookItemParents<'a> {
+        BookItemParents { stack : &self.stack }
+    }
+
+    /// Collects the names of the parent `BookItem`s of the last yielded `Bookitem` into a list.
+    pub fn collect_current_parents_names(&self) -> Vec<String> {
+        self.current_parents().filter_map(|i| match i {
+            &BookItem::Chapter(_, ref ch) | &BookItem::Affix(ref ch) => Some(ch.name.clone()),
+            _ => None,
+        }).collect()
+    }
+
+    /// Get the level of the last yielded `BookItem`. Root level = 0
+    pub fn current_depth(&'a self) -> usize {
+        self.stack.len()
+    }
+}
+
+impl<'a> Iterator for BookItemParents<'a> {
+    type Item = &'a BookItem;
+
+    fn next(&mut self) -> Option<&'a BookItem> {
+        if let Some((first, rest)) = self.stack.split_first() {
+            self.stack = rest;
+            Some (&first[0])
+        } else {
+            None
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/book/mod.rs b/src/book/mod.rs
index fc757a90..9a1cf95f 100644
--- a/src/book/mod.rs
+++ b/src/book/mod.rs
@@ -105,11 +105,7 @@ impl MDBook {
     /// ```
 
     pub fn iter(&self) -> BookItems {
-        BookItems {
-            items: &self.content[..],
-            current_index: 0,
-            stack: Vec::new(),
-        }
+        BookItems::new(&self.content[..])
     }
 
     /// `init()` creates some boilerplate files and directories
diff --git a/src/lib.rs b/src/lib.rs
index cc2c7771..00e5cabe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -88,7 +88,6 @@ extern crate serde_derive;
 extern crate serde_json;
 extern crate tempdir;
 extern crate toml;
-#[cfg(feature = "searchindex")]
 extern crate elasticlunr;
 
 mod parse;
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
index d0e2fc32..59ba412a 100644
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -9,7 +9,6 @@ use theme::{Theme, playpen_editor};
 use errors::*;
 use regex::{Captures, Regex};
 
-#[cfg(feature = "searchindex")]
 use elasticlunr;
 
 use std::ascii::AsciiExt;
@@ -35,13 +34,15 @@ impl HtmlHandlebars {
                    item: &BookItem,
                    mut ctx: RenderItemContext,
                    print_content: &mut String,
-                   search_documents : &mut Vec<utils::SearchDocument>)
+                   search_documents : &mut Vec<utils::SearchDocument>,
+                   mut parents_names : Vec<String>)
                    -> Result<()> {
+
         // FIXME: This should be made DRY-er and rely less on mutable state
         match *item {
-            BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch)
-                if !ch.path.as_os_str().is_empty() =>
-            {
+            BookItem::Chapter(_, ref ch) |
+            BookItem::Affix(ref ch) if !ch.path.as_os_str().is_empty() => {
+
                 let path = ctx.book.get_source().join(&ch.path);
                 let content = utils::fs::file_to_string(&path)?;
                 let base = path.parent()
@@ -49,11 +50,20 @@ impl HtmlHandlebars {
                 let path = ch.path.to_str().ok_or_else(|| {
                     io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
                 })?;
+                let filepath = Path::new(&ch.path).with_extension("html");
+                let filepath = filepath.to_str().ok_or_else(|| {
+                    Error::from(format!("Bad file name: {}", filepath.display()))
+                })?;
 
+
+                if ! parents_names.last().map(String::as_ref).unwrap_or("")
+                    .eq_ignore_ascii_case(&ch.name) {
+                    parents_names.push(ch.name.clone());
+                }
                 utils::render_markdown_into_searchindex(search_documents,
                     &content,
-                    path,
-                    &vec![],
+                    filepath,
+                    parents_names,
                     id_from_content);
 
                 // Parse and expand links
@@ -84,17 +94,15 @@ impl HtmlHandlebars {
                 debug!("[*]: Render template");
                 let rendered = ctx.handlebars.render("index", &ctx.data)?;
 
-                let filepath = Path::new(&ch.path).with_extension("html");
+
                 let rendered = self.post_process(
                     rendered,
-                    &normalize_path(filepath.to_str().ok_or_else(|| Error::from(
-                        format!("Bad file name: {}", filepath.display()),
-                    ))?),
+                    &normalize_path(filepath),
                     &ctx.book.config.html_config().unwrap_or_default().playpen,
                 );
 
                 // Write to file
-                info!("[*] Creating {:?} ✓", filepath.display());
+                info!("[*] Creating {:?} ✓", filepath);
                 ctx.book.write_file(filepath, &rendered.into_bytes())?;
 
                 if ctx.is_index {
@@ -282,20 +290,28 @@ impl Renderer for HtmlHandlebars {
         fs::create_dir_all(&destination)
             .chain_err(|| "Unexpected error when constructing destination path")?;
 
-        for (i, item) in book.iter().enumerate() {
+
+        let mut depthfirstiterator = book.iter();
+        let mut is_index = true;
+        while let Some(item) = depthfirstiterator.next() {
             let ctx = RenderItemContext {
                 book: book,
                 handlebars: &handlebars,
                 destination: destination.to_path_buf(),
                 data: data.clone(),
-                is_index: i == 0,
+                is_index: is_index,
                 html_config: html_config.clone(),
             };
-            self.render_item(item, ctx, &mut print_content, &mut search_documents)?;
+            self.render_item(item,
+                             ctx,
+                             &mut print_content,
+                             &mut search_documents,
+                             depthfirstiterator.collect_current_parents_names())?;
+            is_index = false;
         }
 
         // Search index
-        make_searchindex(book, &search_documents)?;
+        make_searchindex(book, search_documents)?;
 
         // Print version
         self.configure_print_version(&mut data, &print_content);
@@ -633,21 +649,29 @@ pub fn normalize_id(content: &str) -> String {
            .collect::<String>()
 }
 
-#[cfg(not(feature = "searchindex"))]
-fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
-    Ok(())
-}
+/// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
+fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> {
+    let mut index = elasticlunr::index::Index::new("id",
+        &["title".into(), "body".into(), "breadcrumbs".into()]);
 
-#[cfg(feature = "searchindex")]
-fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
-    let mut index = elasticlunr::IndexBuilder::new();
     for sd in search_documents {
-        index.add_document(&sd.title, &sd.body);
+        let anchor = if let Some(s) = sd.anchor.1 {
+            format!("{}#{}", sd.anchor.0, &s)
+        } else {
+            sd.anchor.0
+        };
+
+        let mut map = HashMap::new();
+        map.insert("id".into(), anchor.clone());
+        map.insert("title".into(), sd.title);
+        map.insert("body".into(), sd.body);
+        map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
+        index.add_doc(&anchor, map);
     }
 
     book.write_file(
         Path::new("searchindex").with_extension("json"),
-        &index.to_json().as_bytes(),
+        &serde_json::to_string(&index).unwrap().as_bytes(),
     )?;
     info!("[*] Creating \"searchindex.json\" ✓");
 
diff --git a/src/theme/book.js b/src/theme/book.js
index 679532f9..ceaa14b5 100644
--- a/src/theme/book.js
+++ b/src/theme/book.js
@@ -144,6 +144,20 @@ $( document ).ready(function() {
             return url;
         }
         ,
+        escapeHTML: (function() {
+            var MAP = {
+                '&': '&amp;',
+                '<': '&lt;',
+                '>': '&gt;',
+                '"': '&#34;',
+                "'": '&#39;'
+            };
+            var repl = function(c) { return MAP[c]; };
+            return function(s) {
+                return s.replace(/[&<>'"]/g, repl);
+            };
+        })()
+        ,
         formatSearchResult : function (result, searchterms) {
             // Show text around first occurrence of first search term.
             var firstoccurence = result.doc.body.search(searchterms[0]);
@@ -173,9 +187,9 @@ $( document ).ready(function() {
 
             return $('<li><a href="'
                     + url[0] + '?' + this.MARK_PARAM + '=' + searchterms + '#' + url[1]
-                    + '">' + result.doc.title + '</a>'
-                    + '<span class="breadcrumbs">' + result.doc.breadcrumbs + '</span>'
-                    + '<span class="teaser">' + teaser + '</span>'
+                    + '">' + result.doc.breadcrumbs + '</a>' // doc.title
+                    + '<span class="breadcrumbs">' + '</span>'
+                    + '<span class="teaser">' + this.escapeHTML(teaser) + '</span>'
                     + '</li>');
         }
         ,
@@ -213,7 +227,8 @@ $( document ).ready(function() {
             if (url.params.hasOwnProperty(this.SEARCH_PARAM)
                 && url.params[this.SEARCH_PARAM] != "") {
                 this.searchbar_outer.slideDown();
-                this.searchbar[0].value = url.params[this.SEARCH_PARAM];
+                this.searchbar[0].value = decodeURIComponent(
+                    (url.params[this.SEARCH_PARAM]+'').replace(/\+/g, '%20'));
                 this.searchbarKeyUpHandler();
             } else {
                 this.searchbar_outer.slideUp();
@@ -229,19 +244,42 @@ $( document ).ready(function() {
         }
         ,
         init : function () {
-            // For testing purposes: Index current page
-            this.create_test_searchindex();
-
-            // Set up events
             var this_ = this;
-            this.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
-            this.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
-            $(document).on('keydown', function (e) { this_.globalKeyHandler(e); });
-            // If the user uses the browser buttons, do the same as if a reload happened
-            window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); };
+            window.md = this;
+
+            // For testing purposes: Index current page
+            //this.create_test_searchindex();
+
+            $.getJSON("searchindex.json", function(json) {
+                //this_.searchindex = elasticlunr.Index.load(json);
+
+                // TODO: Workaround: reindex everything
+                var searchindex = elasticlunr(function () {
+                    this.addField('body');
+                    this.addField('title');
+                    this.addField('breadcrumbs')
+                    this.setRef('id');
+                });
+                window.mjs = json;
+                var docs = json.documentStore.docs;
+                for (var key in docs) {
+                    searchindex.addDoc(docs[key]);
+                }
+                this_.searchindex = searchindex;
+
+
+                // Set up events
+                this_.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
+                this_.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
+                $(document).on('keydown', function (e) { this_.globalKeyHandler(e); });
+                // If the user uses the browser buttons, do the same as if a reload happened
+                window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); };
+
+                // If reloaded, do the search or mark again, depending on the current url parameters
+                this_.doSearchOrMarkFromUrl();
+
+            });
 
-            // If reloaded, do the search or mark again, depending on the current url parameters
-            this.doSearchOrMarkFromUrl();
         }
         ,
         hasFocus : function () {
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index fa77e0f5..dfdb620f 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -2,10 +2,10 @@ pub mod fs;
 
 use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
                      OPTION_ENABLE_TABLES};
+use std::ascii::AsciiExt;
 use std::borrow::Cow;
 use std::fmt::Write;
 use regex::Regex;
-use std::rc::Rc;
 
 /// A heading together with the successive content until the next heading will
 /// make up one `SearchDocument`. It represents some independently searchable part of the book.
@@ -16,22 +16,22 @@ pub struct SearchDocument {
     // Content: Flatted paragraphs, lists, code
     pub body : String,
     /// Needed information to generate a link to the corresponding title anchor
-    /// First part is the `reference_base` that should be the same for all documents that
+    /// First part is the `anchor_base` that should be the same for all documents that
     /// came from the same `.md` file. The second part is derived from the heading of the search
     /// document.
-    pub sref : (Rc<String>, Option<String>),
-    // Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
+    pub anchor : (String, Option<String>),
+    // Hierarchy like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
     // as a human understandable path to the search document.
-    pub breadcrumbs : Vec<Rc<String>>,
+    pub hierarchy : Vec<String>,
 }
 
 impl SearchDocument {
-    fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument {
+    fn new(anchor_base : &str, hierarchy : &Vec<String>) -> SearchDocument {
         SearchDocument {
             title : "".to_owned(),
             body : "".to_owned(),
-            sref : (sref0.clone(), None),
-            breadcrumbs : bcs.clone()
+            anchor : (anchor_base.to_owned(), None),
+            hierarchy : (*hierarchy).clone()
         }
     }
 
@@ -47,19 +47,29 @@ impl SearchDocument {
             self.body.write_str(&" ").unwrap();
         }
     }
+
+    fn extend_hierarchy(&mut self, more : &Vec<String>) {
+        let last = self.hierarchy.last().map(String::as_ref).unwrap_or("").to_owned();
+
+        self.hierarchy.extend(more.iter().filter(|h|
+            h.as_str() != ""
+            && ! h.as_str().eq_ignore_ascii_case(&last))
+        .map(|h| h.to_owned()));
+
+    }
 }
 
 /// Renders markdown into flat unformatted text for usage in the search index.
 /// Refer to the struct `SearchDocument`.
 ///
-/// The field `sref` in the `SearchDocument` struct becomes
-///    `(reference_base, Some(heading_to_sref("The Section Heading")))`
+/// The field `anchor` in the `SearchDocument` struct becomes
+///    `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
 pub fn render_markdown_into_searchindex<F>(
     search_documents: &mut Vec<SearchDocument>,
     text: &str,
-    reference_base: &str,
-    breadcrumbs : &Vec<Rc<String>>,
-    heading_to_sref : F)
+    anchor_base: &str,
+    hierarchy : Vec<String>,
+    heading_to_anchor : F)
     where F : Fn(&str) -> String {
 
     let mut opts = Options::empty();
@@ -67,24 +77,31 @@ pub fn render_markdown_into_searchindex<F>(
     opts.insert(OPTION_ENABLE_FOOTNOTES);
     let p = Parser::new_ext(text, opts);
 
-    let reference_base = Rc::new(reference_base.to_owned());
-    let mut current = SearchDocument::new(&reference_base, breadcrumbs);
+    let mut current = SearchDocument::new(&anchor_base, &hierarchy);
     let mut in_header = false;
+    let max_paragraph_level = 3;
+    let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];
 
     for event in p {
         match event {
-            Event::Start(Tag::Header(i)) if i <= 3 => {
+            Event::Start(Tag::Header(i)) if i <= max_paragraph_level => {
+                // Paragraph finished, the next header is following now
                 if current.has_content() {
+                    // Push header_hierarchy to the search documents chapter hierarchy
+                    current.extend_hierarchy(&header_hierarchy);
                     search_documents.push(current);
                 }
-                current = SearchDocument::new(&reference_base, breadcrumbs);
+                current = SearchDocument::new(&anchor_base, &hierarchy);
                 in_header = true;
             }
-            Event::End(Tag::Header(_)) => {
-                // Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
-                current.breadcrumbs.push(Rc::new(current.title.clone()));
-                current.sref.1 = Some(heading_to_sref(&current.title));
+            Event::End(Tag::Header(i)) if i <= max_paragraph_level => {
                 in_header = false;
+                current.anchor.1 = Some(heading_to_anchor(&current.title));
+
+                header_hierarchy[i as usize -1] = current.title.clone();
+                for h in &mut header_hierarchy[i as usize ..] {
+                    *h = "".to_owned();
+                }
             }
             Event::Start(_) | Event::End(_) => {}
             Event::Text(text) => {
@@ -97,6 +114,7 @@ pub fn render_markdown_into_searchindex<F>(
             Event::SoftBreak | Event::HardBreak => {}
         }
     }
+    current.extend_hierarchy(&header_hierarchy);
     search_documents.push(current);
 }