From a198e99fa9fad7d16d3db12a372bf26805db279a Mon Sep 17 00:00:00 2001 From: Phaiax Date: Mon, 9 Oct 2017 13:03:21 +0200 Subject: [PATCH] Search: Fine tuning * remove searchindex feature (nightly requirement of elasticlunr-rs dropped) * some documentation * refactor BookItems iterator * add iterator for parents * Include paragraph structure in hierarchy * Fix url and specialchar handling * Use complete index --- Cargo.toml | 3 +- src/book/bookitem.rs | 115 ++++++++++++++----- src/book/mod.rs | 6 +- src/lib.rs | 1 - src/renderer/html_handlebars/hbs_renderer.rs | 74 ++++++++---- src/theme/book.js | 68 ++++++++--- src/utils/mod.rs | 60 ++++++---- 7 files changed, 228 insertions(+), 99 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fbdeaff8..7d7f048d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ toml = "0.4" open = "1.1" regex = "0.2.1" tempdir = "0.3.4" -elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true} +elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs" } # Watch feature notify = { version = "4.0", optional = true } @@ -56,7 +56,6 @@ output = [] regenerate-css = [] watch = ["notify", "time", "crossbeam"] serve = ["iron", "staticfile", "ws"] -searchindex = ["elasticlunr"] [[bin]] doc = false diff --git a/src/book/bookitem.rs b/src/book/bookitem.rs index a2ec2cb0..4d16cc19 100644 --- a/src/book/bookitem.rs +++ b/src/book/bookitem.rs @@ -2,7 +2,12 @@ use serde::{Serialize, Serializer}; use serde::ser::SerializeStruct; use std::path::PathBuf; - +/// A BookItem corresponds to one entry of the table of contents file SUMMARY.md. +/// A line in that file can either be a numbered chapter with a section number like 2.1.3 or a +/// suffix or postfix chapter without such a section number. +/// The `String` field in the `Chapter` variant contains the section number as `2.1.3`. +/// The `Chapter` type contains the child elements (which can only be other `BookItem::Chapters`). +/// `BookItem::Affix` and `BookItem::Spacer` are only allowed within the root level. #[derive(Debug, Clone)] pub enum BookItem { Chapter(String, Chapter), // String = section @@ -10,6 +15,9 @@ pub enum BookItem { Spacer, } +/// A chapter is a `.md` file that is referenced by some line in the `SUMMARY.md` table of +/// contents. It also has references to its sub chapters via `sub_items`. These items can +/// only be of the variant `BookItem::Chapter`. #[derive(Debug, Clone)] pub struct Chapter { pub name: String, @@ -17,13 +25,21 @@ pub struct Chapter { pub sub_items: Vec, } +/// A flattening, depth-first iterator over Bookitems and it's children. +/// It can be obtained by calling `MDBook::iter()`. #[derive(Debug, Clone)] pub struct BookItems<'a> { - pub items: &'a [BookItem], - pub current_index: usize, - pub stack: Vec<(&'a [BookItem], usize)>, + /// The remaining items in the iterator in the current, deepest level of the iterator + items: &'a [BookItem], + /// The higher levels of the hierarchy. The parents of the current level are still + /// in the list and accessible as `[stack[0][0], stack[1][0], stack[2][0], ...]`. + stack: Vec<&'a [BookItem]>, } +/// Iterator for the parent `BookItem`s of a `BookItem`. +pub struct BookItemParents<'a> { + stack: &'a [ &'a [BookItem] ] +} impl Chapter { pub fn new(name: String, path: PathBuf) -> Self { @@ -48,39 +64,78 @@ impl Serialize for Chapter { } } - - -// Shamelessly copied from Rustbook -// (https://github.com/rust-lang/rust/blob/master/src/rustbook/book.rs) impl<'a> Iterator for BookItems<'a> { type Item = &'a BookItem; fn next(&mut self) -> Option<&'a BookItem> { - loop { - if self.current_index >= self.items.len() { - match self.stack.pop() { - None => return None, - Some((parent_items, parent_idx)) => { - self.items = parent_items; - self.current_index = parent_idx + 1; - } - } - } else { - let cur = &self.items[self.current_index]; - - match *cur { - BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) => { - self.stack.push((self.items, self.current_index)); + if let Some((first, rest)) = self.items.split_first() { + // Return the first element in `items` and optionally dive into afterwards. + match first { + &BookItem::Spacer => { + self.items = rest; + }, + &BookItem::Chapter(_, ref ch) | + &BookItem::Affix(ref ch) => { + if ch.sub_items.is_empty() { + self.items = rest; + } else { + // Don't remove `first` for now. (Because of Parent Iterator) + self.stack.push(self.items); self.items = &ch.sub_items[..]; - self.current_index = 0; } - BookItem::Spacer => { - self.current_index += 1; - } - } - - return Some(cur); + }, + }; + Some(first) + } else { + // Current level is drained => pop from `stack` or return `None` + if let Some(stacked_items) = self.stack.pop() { + // The first item of the popped slice is the bookitem we previously dived into. + self.items = &stacked_items[1..]; + self.next() + } else { + None } } } } + +impl<'a> BookItems<'a> { + pub fn new(items : &'a[BookItem]) -> BookItems<'a> { + BookItems { + items : items, + stack : vec![], + } + } + + /// Returns an iterator to iterate the parents of the last yielded `BookItem`. + /// Starts with the root item. + pub fn current_parents(&'a self) -> BookItemParents<'a> { + BookItemParents { stack : &self.stack } + } + + /// Collects the names of the parent `BookItem`s of the last yielded `Bookitem` into a list. + pub fn collect_current_parents_names(&self) -> Vec { + self.current_parents().filter_map(|i| match i { + &BookItem::Chapter(_, ref ch) | &BookItem::Affix(ref ch) => Some(ch.name.clone()), + _ => None, + }).collect() + } + + /// Get the level of the last yielded `BookItem`. Root level = 0 + pub fn current_depth(&'a self) -> usize { + self.stack.len() + } +} + +impl<'a> Iterator for BookItemParents<'a> { + type Item = &'a BookItem; + + fn next(&mut self) -> Option<&'a BookItem> { + if let Some((first, rest)) = self.stack.split_first() { + self.stack = rest; + Some (&first[0]) + } else { + None + } + } +} \ No newline at end of file diff --git a/src/book/mod.rs b/src/book/mod.rs index fc757a90..9a1cf95f 100644 --- a/src/book/mod.rs +++ b/src/book/mod.rs @@ -105,11 +105,7 @@ impl MDBook { /// ``` pub fn iter(&self) -> BookItems { - BookItems { - items: &self.content[..], - current_index: 0, - stack: Vec::new(), - } + BookItems::new(&self.content[..]) } /// `init()` creates some boilerplate files and directories diff --git a/src/lib.rs b/src/lib.rs index cc2c7771..00e5cabe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,7 +88,6 @@ extern crate serde_derive; extern crate serde_json; extern crate tempdir; extern crate toml; -#[cfg(feature = "searchindex")] extern crate elasticlunr; mod parse; diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index d0e2fc32..59ba412a 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -9,7 +9,6 @@ use theme::{Theme, playpen_editor}; use errors::*; use regex::{Captures, Regex}; -#[cfg(feature = "searchindex")] use elasticlunr; use std::ascii::AsciiExt; @@ -35,13 +34,15 @@ impl HtmlHandlebars { item: &BookItem, mut ctx: RenderItemContext, print_content: &mut String, - search_documents : &mut Vec) + search_documents : &mut Vec, + mut parents_names : Vec) -> Result<()> { + // FIXME: This should be made DRY-er and rely less on mutable state match *item { - BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) - if !ch.path.as_os_str().is_empty() => - { + BookItem::Chapter(_, ref ch) | + BookItem::Affix(ref ch) if !ch.path.as_os_str().is_empty() => { + let path = ctx.book.get_source().join(&ch.path); let content = utils::fs::file_to_string(&path)?; let base = path.parent() @@ -49,11 +50,20 @@ impl HtmlHandlebars { let path = ch.path.to_str().ok_or_else(|| { io::Error::new(io::ErrorKind::Other, "Could not convert path to str") })?; + let filepath = Path::new(&ch.path).with_extension("html"); + let filepath = filepath.to_str().ok_or_else(|| { + Error::from(format!("Bad file name: {}", filepath.display())) + })?; + + if ! parents_names.last().map(String::as_ref).unwrap_or("") + .eq_ignore_ascii_case(&ch.name) { + parents_names.push(ch.name.clone()); + } utils::render_markdown_into_searchindex(search_documents, &content, - path, - &vec![], + filepath, + parents_names, id_from_content); // Parse and expand links @@ -84,17 +94,15 @@ impl HtmlHandlebars { debug!("[*]: Render template"); let rendered = ctx.handlebars.render("index", &ctx.data)?; - let filepath = Path::new(&ch.path).with_extension("html"); + let rendered = self.post_process( rendered, - &normalize_path(filepath.to_str().ok_or_else(|| Error::from( - format!("Bad file name: {}", filepath.display()), - ))?), + &normalize_path(filepath), &ctx.book.config.html_config().unwrap_or_default().playpen, ); // Write to file - info!("[*] Creating {:?} โœ“", filepath.display()); + info!("[*] Creating {:?} โœ“", filepath); ctx.book.write_file(filepath, &rendered.into_bytes())?; if ctx.is_index { @@ -282,20 +290,28 @@ impl Renderer for HtmlHandlebars { fs::create_dir_all(&destination) .chain_err(|| "Unexpected error when constructing destination path")?; - for (i, item) in book.iter().enumerate() { + + let mut depthfirstiterator = book.iter(); + let mut is_index = true; + while let Some(item) = depthfirstiterator.next() { let ctx = RenderItemContext { book: book, handlebars: &handlebars, destination: destination.to_path_buf(), data: data.clone(), - is_index: i == 0, + is_index: is_index, html_config: html_config.clone(), }; - self.render_item(item, ctx, &mut print_content, &mut search_documents)?; + self.render_item(item, + ctx, + &mut print_content, + &mut search_documents, + depthfirstiterator.collect_current_parents_names())?; + is_index = false; } // Search index - make_searchindex(book, &search_documents)?; + make_searchindex(book, search_documents)?; // Print version self.configure_print_version(&mut data, &print_content); @@ -633,21 +649,29 @@ pub fn normalize_id(content: &str) -> String { .collect::() } -#[cfg(not(feature = "searchindex"))] -fn make_searchindex(_book: &MDBook, _search_documents : &Vec) -> Result<()> { - Ok(()) -} +/// Uses elasticlunr to create a search index and exports that into `searchindex.json`. +fn make_searchindex(book: &MDBook, search_documents : Vec) -> Result<()> { + let mut index = elasticlunr::index::Index::new("id", + &["title".into(), "body".into(), "breadcrumbs".into()]); -#[cfg(feature = "searchindex")] -fn make_searchindex(book: &MDBook, search_documents : &Vec) -> Result<()> { - let mut index = elasticlunr::IndexBuilder::new(); for sd in search_documents { - index.add_document(&sd.title, &sd.body); + let anchor = if let Some(s) = sd.anchor.1 { + format!("{}#{}", sd.anchor.0, &s) + } else { + sd.anchor.0 + }; + + let mut map = HashMap::new(); + map.insert("id".into(), anchor.clone()); + map.insert("title".into(), sd.title); + map.insert("body".into(), sd.body); + map.insert("breadcrumbs".into(), sd.hierarchy.join(" ยป ")); + index.add_doc(&anchor, map); } book.write_file( Path::new("searchindex").with_extension("json"), - &index.to_json().as_bytes(), + &serde_json::to_string(&index).unwrap().as_bytes(), )?; info!("[*] Creating \"searchindex.json\" โœ“"); diff --git a/src/theme/book.js b/src/theme/book.js index 679532f9..ceaa14b5 100644 --- a/src/theme/book.js +++ b/src/theme/book.js @@ -144,6 +144,20 @@ $( document ).ready(function() { return url; } , + escapeHTML: (function() { + var MAP = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + }; + var repl = function(c) { return MAP[c]; }; + return function(s) { + return s.replace(/[&<>'"]/g, repl); + }; + })() + , formatSearchResult : function (result, searchterms) { // Show text around first occurrence of first search term. var firstoccurence = result.doc.body.search(searchterms[0]); @@ -173,9 +187,9 @@ $( document ).ready(function() { return $('
  • ' + result.doc.title + '' - + '' + result.doc.breadcrumbs + '' - + '' + teaser + '' + + '">' + result.doc.breadcrumbs + '' // doc.title + + '' + '' + + '' + this.escapeHTML(teaser) + '' + '
  • '); } , @@ -213,7 +227,8 @@ $( document ).ready(function() { if (url.params.hasOwnProperty(this.SEARCH_PARAM) && url.params[this.SEARCH_PARAM] != "") { this.searchbar_outer.slideDown(); - this.searchbar[0].value = url.params[this.SEARCH_PARAM]; + this.searchbar[0].value = decodeURIComponent( + (url.params[this.SEARCH_PARAM]+'').replace(/\+/g, '%20')); this.searchbarKeyUpHandler(); } else { this.searchbar_outer.slideUp(); @@ -229,19 +244,42 @@ $( document ).ready(function() { } , init : function () { - // For testing purposes: Index current page - this.create_test_searchindex(); - - // Set up events var this_ = this; - this.searchicon.click( function(e) { this_.searchIconClickHandler(); } ); - this.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } ); - $(document).on('keydown', function (e) { this_.globalKeyHandler(e); }); - // If the user uses the browser buttons, do the same as if a reload happened - window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); }; + window.md = this; + + // For testing purposes: Index current page + //this.create_test_searchindex(); + + $.getJSON("searchindex.json", function(json) { + //this_.searchindex = elasticlunr.Index.load(json); + + // TODO: Workaround: reindex everything + var searchindex = elasticlunr(function () { + this.addField('body'); + this.addField('title'); + this.addField('breadcrumbs') + this.setRef('id'); + }); + window.mjs = json; + var docs = json.documentStore.docs; + for (var key in docs) { + searchindex.addDoc(docs[key]); + } + this_.searchindex = searchindex; + + + // Set up events + this_.searchicon.click( function(e) { this_.searchIconClickHandler(); } ); + this_.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } ); + $(document).on('keydown', function (e) { this_.globalKeyHandler(e); }); + // If the user uses the browser buttons, do the same as if a reload happened + window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); }; + + // If reloaded, do the search or mark again, depending on the current url parameters + this_.doSearchOrMarkFromUrl(); + + }); - // If reloaded, do the search or mark again, depending on the current url parameters - this.doSearchOrMarkFromUrl(); } , hasFocus : function () { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index fa77e0f5..dfdb620f 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -2,10 +2,10 @@ pub mod fs; use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES, OPTION_ENABLE_TABLES}; +use std::ascii::AsciiExt; use std::borrow::Cow; use std::fmt::Write; use regex::Regex; -use std::rc::Rc; /// A heading together with the successive content until the next heading will /// make up one `SearchDocument`. It represents some independently searchable part of the book. @@ -16,22 +16,22 @@ pub struct SearchDocument { // Content: Flatted paragraphs, lists, code pub body : String, /// Needed information to generate a link to the corresponding title anchor - /// First part is the `reference_base` that should be the same for all documents that + /// First part is the `anchor_base` that should be the same for all documents that /// came from the same `.md` file. The second part is derived from the heading of the search /// document. - pub sref : (Rc, Option), - // Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"] + pub anchor : (String, Option), + // Hierarchy like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"] // as a human understandable path to the search document. - pub breadcrumbs : Vec>, + pub hierarchy : Vec, } impl SearchDocument { - fn new(sref0 : &Rc, bcs : &Vec>) -> SearchDocument { + fn new(anchor_base : &str, hierarchy : &Vec) -> SearchDocument { SearchDocument { title : "".to_owned(), body : "".to_owned(), - sref : (sref0.clone(), None), - breadcrumbs : bcs.clone() + anchor : (anchor_base.to_owned(), None), + hierarchy : (*hierarchy).clone() } } @@ -47,19 +47,29 @@ impl SearchDocument { self.body.write_str(&" ").unwrap(); } } + + fn extend_hierarchy(&mut self, more : &Vec) { + let last = self.hierarchy.last().map(String::as_ref).unwrap_or("").to_owned(); + + self.hierarchy.extend(more.iter().filter(|h| + h.as_str() != "" + && ! h.as_str().eq_ignore_ascii_case(&last)) + .map(|h| h.to_owned())); + + } } /// Renders markdown into flat unformatted text for usage in the search index. /// Refer to the struct `SearchDocument`. /// -/// The field `sref` in the `SearchDocument` struct becomes -/// `(reference_base, Some(heading_to_sref("The Section Heading")))` +/// The field `anchor` in the `SearchDocument` struct becomes +/// `(anchor_base, Some(heading_to_anchor("The Section Heading")))` pub fn render_markdown_into_searchindex( search_documents: &mut Vec, text: &str, - reference_base: &str, - breadcrumbs : &Vec>, - heading_to_sref : F) + anchor_base: &str, + hierarchy : Vec, + heading_to_anchor : F) where F : Fn(&str) -> String { let mut opts = Options::empty(); @@ -67,24 +77,31 @@ pub fn render_markdown_into_searchindex( opts.insert(OPTION_ENABLE_FOOTNOTES); let p = Parser::new_ext(text, opts); - let reference_base = Rc::new(reference_base.to_owned()); - let mut current = SearchDocument::new(&reference_base, breadcrumbs); + let mut current = SearchDocument::new(&anchor_base, &hierarchy); let mut in_header = false; + let max_paragraph_level = 3; + let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize]; for event in p { match event { - Event::Start(Tag::Header(i)) if i <= 3 => { + Event::Start(Tag::Header(i)) if i <= max_paragraph_level => { + // Paragraph finished, the next header is following now if current.has_content() { + // Push header_hierarchy to the search documents chapter hierarchy + current.extend_hierarchy(&header_hierarchy); search_documents.push(current); } - current = SearchDocument::new(&reference_base, breadcrumbs); + current = SearchDocument::new(&anchor_base, &hierarchy); in_header = true; } - Event::End(Tag::Header(_)) => { - // Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs - current.breadcrumbs.push(Rc::new(current.title.clone())); - current.sref.1 = Some(heading_to_sref(¤t.title)); + Event::End(Tag::Header(i)) if i <= max_paragraph_level => { in_header = false; + current.anchor.1 = Some(heading_to_anchor(¤t.title)); + + header_hierarchy[i as usize -1] = current.title.clone(); + for h in &mut header_hierarchy[i as usize ..] { + *h = "".to_owned(); + } } Event::Start(_) | Event::End(_) => {} Event::Text(text) => { @@ -97,6 +114,7 @@ pub fn render_markdown_into_searchindex( Event::SoftBreak | Event::HardBreak => {} } } + current.extend_hierarchy(&header_hierarchy); search_documents.push(current); }