Search: Fine tuning

* remove searchindex feature (nightly requirement of elasticlunr-rs dropped)
* some documentation
* refactor BookItems iterator
* add iterator for parents
* Include paragraph structure in hierarchy
* Fix url and specialchar handling
* Use complete index
This commit is contained in:
Phaiax 2017-10-09 13:03:21 +02:00
parent aa1f02f7b2
commit a198e99fa9
7 changed files with 228 additions and 99 deletions

View File

@ -29,7 +29,7 @@ toml = "0.4"
open = "1.1" open = "1.1"
regex = "0.2.1" regex = "0.2.1"
tempdir = "0.3.4" tempdir = "0.3.4"
elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true} elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs" }
# Watch feature # Watch feature
notify = { version = "4.0", optional = true } notify = { version = "4.0", optional = true }
@ -56,7 +56,6 @@ output = []
regenerate-css = [] regenerate-css = []
watch = ["notify", "time", "crossbeam"] watch = ["notify", "time", "crossbeam"]
serve = ["iron", "staticfile", "ws"] serve = ["iron", "staticfile", "ws"]
searchindex = ["elasticlunr"]
[[bin]] [[bin]]
doc = false doc = false

View File

@ -2,7 +2,12 @@ use serde::{Serialize, Serializer};
use serde::ser::SerializeStruct; use serde::ser::SerializeStruct;
use std::path::PathBuf; use std::path::PathBuf;
/// A BookItem corresponds to one entry of the table of contents file SUMMARY.md.
/// A line in that file can either be a numbered chapter with a section number like 2.1.3 or a
/// suffix or postfix chapter without such a section number.
/// The `String` field in the `Chapter` variant contains the section number as `2.1.3`.
/// The `Chapter` type contains the child elements (which can only be other `BookItem::Chapters`).
/// `BookItem::Affix` and `BookItem::Spacer` are only allowed within the root level.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum BookItem { pub enum BookItem {
Chapter(String, Chapter), // String = section Chapter(String, Chapter), // String = section
@ -10,6 +15,9 @@ pub enum BookItem {
Spacer, Spacer,
} }
/// A chapter is a `.md` file that is referenced by some line in the `SUMMARY.md` table of
/// contents. It also has references to its sub chapters via `sub_items`. These items can
/// only be of the variant `BookItem::Chapter`.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Chapter { pub struct Chapter {
pub name: String, pub name: String,
@ -17,13 +25,21 @@ pub struct Chapter {
pub sub_items: Vec<BookItem>, pub sub_items: Vec<BookItem>,
} }
/// A flattening, depth-first iterator over Bookitems and it's children.
/// It can be obtained by calling `MDBook::iter()`.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct BookItems<'a> { pub struct BookItems<'a> {
pub items: &'a [BookItem], /// The remaining items in the iterator in the current, deepest level of the iterator
pub current_index: usize, items: &'a [BookItem],
pub stack: Vec<(&'a [BookItem], usize)>, /// The higher levels of the hierarchy. The parents of the current level are still
/// in the list and accessible as `[stack[0][0], stack[1][0], stack[2][0], ...]`.
stack: Vec<&'a [BookItem]>,
} }
/// Iterator for the parent `BookItem`s of a `BookItem`.
pub struct BookItemParents<'a> {
stack: &'a [ &'a [BookItem] ]
}
impl Chapter { impl Chapter {
pub fn new(name: String, path: PathBuf) -> Self { pub fn new(name: String, path: PathBuf) -> Self {
@ -48,39 +64,78 @@ impl Serialize for Chapter {
} }
} }
// Shamelessly copied from Rustbook
// (https://github.com/rust-lang/rust/blob/master/src/rustbook/book.rs)
impl<'a> Iterator for BookItems<'a> { impl<'a> Iterator for BookItems<'a> {
type Item = &'a BookItem; type Item = &'a BookItem;
fn next(&mut self) -> Option<&'a BookItem> { fn next(&mut self) -> Option<&'a BookItem> {
loop { if let Some((first, rest)) = self.items.split_first() {
if self.current_index >= self.items.len() { // Return the first element in `items` and optionally dive into afterwards.
match self.stack.pop() { match first {
None => return None, &BookItem::Spacer => {
Some((parent_items, parent_idx)) => { self.items = rest;
self.items = parent_items; },
self.current_index = parent_idx + 1; &BookItem::Chapter(_, ref ch) |
} &BookItem::Affix(ref ch) => {
} if ch.sub_items.is_empty() {
self.items = rest;
} else { } else {
let cur = &self.items[self.current_index]; // Don't remove `first` for now. (Because of Parent Iterator)
self.stack.push(self.items);
match *cur {
BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) => {
self.stack.push((self.items, self.current_index));
self.items = &ch.sub_items[..]; self.items = &ch.sub_items[..];
self.current_index = 0;
} }
BookItem::Spacer => { },
self.current_index += 1; };
Some(first)
} else {
// Current level is drained => pop from `stack` or return `None`
if let Some(stacked_items) = self.stack.pop() {
// The first item of the popped slice is the bookitem we previously dived into.
self.items = &stacked_items[1..];
self.next()
} else {
None
}
}
} }
} }
return Some(cur); impl<'a> BookItems<'a> {
} pub fn new(items : &'a[BookItem]) -> BookItems<'a> {
BookItems {
items : items,
stack : vec![],
}
}
/// Returns an iterator to iterate the parents of the last yielded `BookItem`.
/// Starts with the root item.
pub fn current_parents(&'a self) -> BookItemParents<'a> {
BookItemParents { stack : &self.stack }
}
/// Collects the names of the parent `BookItem`s of the last yielded `Bookitem` into a list.
pub fn collect_current_parents_names(&self) -> Vec<String> {
self.current_parents().filter_map(|i| match i {
&BookItem::Chapter(_, ref ch) | &BookItem::Affix(ref ch) => Some(ch.name.clone()),
_ => None,
}).collect()
}
/// Get the level of the last yielded `BookItem`. Root level = 0
pub fn current_depth(&'a self) -> usize {
self.stack.len()
}
}
impl<'a> Iterator for BookItemParents<'a> {
type Item = &'a BookItem;
fn next(&mut self) -> Option<&'a BookItem> {
if let Some((first, rest)) = self.stack.split_first() {
self.stack = rest;
Some (&first[0])
} else {
None
} }
} }
} }

View File

@ -105,11 +105,7 @@ impl MDBook {
/// ``` /// ```
pub fn iter(&self) -> BookItems { pub fn iter(&self) -> BookItems {
BookItems { BookItems::new(&self.content[..])
items: &self.content[..],
current_index: 0,
stack: Vec::new(),
}
} }
/// `init()` creates some boilerplate files and directories /// `init()` creates some boilerplate files and directories

View File

@ -88,7 +88,6 @@ extern crate serde_derive;
extern crate serde_json; extern crate serde_json;
extern crate tempdir; extern crate tempdir;
extern crate toml; extern crate toml;
#[cfg(feature = "searchindex")]
extern crate elasticlunr; extern crate elasticlunr;
mod parse; mod parse;

View File

@ -9,7 +9,6 @@ use theme::{Theme, playpen_editor};
use errors::*; use errors::*;
use regex::{Captures, Regex}; use regex::{Captures, Regex};
#[cfg(feature = "searchindex")]
use elasticlunr; use elasticlunr;
use std::ascii::AsciiExt; use std::ascii::AsciiExt;
@ -35,13 +34,15 @@ impl HtmlHandlebars {
item: &BookItem, item: &BookItem,
mut ctx: RenderItemContext, mut ctx: RenderItemContext,
print_content: &mut String, print_content: &mut String,
search_documents : &mut Vec<utils::SearchDocument>) search_documents : &mut Vec<utils::SearchDocument>,
mut parents_names : Vec<String>)
-> Result<()> { -> Result<()> {
// FIXME: This should be made DRY-er and rely less on mutable state // FIXME: This should be made DRY-er and rely less on mutable state
match *item { match *item {
BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) BookItem::Chapter(_, ref ch) |
if !ch.path.as_os_str().is_empty() => BookItem::Affix(ref ch) if !ch.path.as_os_str().is_empty() => {
{
let path = ctx.book.get_source().join(&ch.path); let path = ctx.book.get_source().join(&ch.path);
let content = utils::fs::file_to_string(&path)?; let content = utils::fs::file_to_string(&path)?;
let base = path.parent() let base = path.parent()
@ -49,11 +50,20 @@ impl HtmlHandlebars {
let path = ch.path.to_str().ok_or_else(|| { let path = ch.path.to_str().ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "Could not convert path to str") io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
})?; })?;
let filepath = Path::new(&ch.path).with_extension("html");
let filepath = filepath.to_str().ok_or_else(|| {
Error::from(format!("Bad file name: {}", filepath.display()))
})?;
if ! parents_names.last().map(String::as_ref).unwrap_or("")
.eq_ignore_ascii_case(&ch.name) {
parents_names.push(ch.name.clone());
}
utils::render_markdown_into_searchindex(search_documents, utils::render_markdown_into_searchindex(search_documents,
&content, &content,
path, filepath,
&vec![], parents_names,
id_from_content); id_from_content);
// Parse and expand links // Parse and expand links
@ -84,17 +94,15 @@ impl HtmlHandlebars {
debug!("[*]: Render template"); debug!("[*]: Render template");
let rendered = ctx.handlebars.render("index", &ctx.data)?; let rendered = ctx.handlebars.render("index", &ctx.data)?;
let filepath = Path::new(&ch.path).with_extension("html");
let rendered = self.post_process( let rendered = self.post_process(
rendered, rendered,
&normalize_path(filepath.to_str().ok_or_else(|| Error::from( &normalize_path(filepath),
format!("Bad file name: {}", filepath.display()),
))?),
&ctx.book.config.html_config().unwrap_or_default().playpen, &ctx.book.config.html_config().unwrap_or_default().playpen,
); );
// Write to file // Write to file
info!("[*] Creating {:?} ✓", filepath.display()); info!("[*] Creating {:?} ✓", filepath);
ctx.book.write_file(filepath, &rendered.into_bytes())?; ctx.book.write_file(filepath, &rendered.into_bytes())?;
if ctx.is_index { if ctx.is_index {
@ -282,20 +290,28 @@ impl Renderer for HtmlHandlebars {
fs::create_dir_all(&destination) fs::create_dir_all(&destination)
.chain_err(|| "Unexpected error when constructing destination path")?; .chain_err(|| "Unexpected error when constructing destination path")?;
for (i, item) in book.iter().enumerate() {
let mut depthfirstiterator = book.iter();
let mut is_index = true;
while let Some(item) = depthfirstiterator.next() {
let ctx = RenderItemContext { let ctx = RenderItemContext {
book: book, book: book,
handlebars: &handlebars, handlebars: &handlebars,
destination: destination.to_path_buf(), destination: destination.to_path_buf(),
data: data.clone(), data: data.clone(),
is_index: i == 0, is_index: is_index,
html_config: html_config.clone(), html_config: html_config.clone(),
}; };
self.render_item(item, ctx, &mut print_content, &mut search_documents)?; self.render_item(item,
ctx,
&mut print_content,
&mut search_documents,
depthfirstiterator.collect_current_parents_names())?;
is_index = false;
} }
// Search index // Search index
make_searchindex(book, &search_documents)?; make_searchindex(book, search_documents)?;
// Print version // Print version
self.configure_print_version(&mut data, &print_content); self.configure_print_version(&mut data, &print_content);
@ -633,21 +649,29 @@ pub fn normalize_id(content: &str) -> String {
.collect::<String>() .collect::<String>()
} }
#[cfg(not(feature = "searchindex"))] /// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> { fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> {
Ok(()) let mut index = elasticlunr::index::Index::new("id",
} &["title".into(), "body".into(), "breadcrumbs".into()]);
#[cfg(feature = "searchindex")]
fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
let mut index = elasticlunr::IndexBuilder::new();
for sd in search_documents { for sd in search_documents {
index.add_document(&sd.title, &sd.body); let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new();
map.insert("id".into(), anchor.clone());
map.insert("title".into(), sd.title);
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
} }
book.write_file( book.write_file(
Path::new("searchindex").with_extension("json"), Path::new("searchindex").with_extension("json"),
&index.to_json().as_bytes(), &serde_json::to_string(&index).unwrap().as_bytes(),
)?; )?;
info!("[*] Creating \"searchindex.json\""); info!("[*] Creating \"searchindex.json\"");

View File

@ -144,6 +144,20 @@ $( document ).ready(function() {
return url; return url;
} }
, ,
escapeHTML: (function() {
var MAP = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&#34;',
"'": '&#39;'
};
var repl = function(c) { return MAP[c]; };
return function(s) {
return s.replace(/[&<>'"]/g, repl);
};
})()
,
formatSearchResult : function (result, searchterms) { formatSearchResult : function (result, searchterms) {
// Show text around first occurrence of first search term. // Show text around first occurrence of first search term.
var firstoccurence = result.doc.body.search(searchterms[0]); var firstoccurence = result.doc.body.search(searchterms[0]);
@ -173,9 +187,9 @@ $( document ).ready(function() {
return $('<li><a href="' return $('<li><a href="'
+ url[0] + '?' + this.MARK_PARAM + '=' + searchterms + '#' + url[1] + url[0] + '?' + this.MARK_PARAM + '=' + searchterms + '#' + url[1]
+ '">' + result.doc.title + '</a>' + '">' + result.doc.breadcrumbs + '</a>' // doc.title
+ '<span class="breadcrumbs">' + result.doc.breadcrumbs + '</span>' + '<span class="breadcrumbs">' + '</span>'
+ '<span class="teaser">' + teaser + '</span>' + '<span class="teaser">' + this.escapeHTML(teaser) + '</span>'
+ '</li>'); + '</li>');
} }
, ,
@ -213,7 +227,8 @@ $( document ).ready(function() {
if (url.params.hasOwnProperty(this.SEARCH_PARAM) if (url.params.hasOwnProperty(this.SEARCH_PARAM)
&& url.params[this.SEARCH_PARAM] != "") { && url.params[this.SEARCH_PARAM] != "") {
this.searchbar_outer.slideDown(); this.searchbar_outer.slideDown();
this.searchbar[0].value = url.params[this.SEARCH_PARAM]; this.searchbar[0].value = decodeURIComponent(
(url.params[this.SEARCH_PARAM]+'').replace(/\+/g, '%20'));
this.searchbarKeyUpHandler(); this.searchbarKeyUpHandler();
} else { } else {
this.searchbar_outer.slideUp(); this.searchbar_outer.slideUp();
@ -229,19 +244,42 @@ $( document ).ready(function() {
} }
, ,
init : function () { init : function () {
var this_ = this;
window.md = this;
// For testing purposes: Index current page // For testing purposes: Index current page
this.create_test_searchindex(); //this.create_test_searchindex();
$.getJSON("searchindex.json", function(json) {
//this_.searchindex = elasticlunr.Index.load(json);
// TODO: Workaround: reindex everything
var searchindex = elasticlunr(function () {
this.addField('body');
this.addField('title');
this.addField('breadcrumbs')
this.setRef('id');
});
window.mjs = json;
var docs = json.documentStore.docs;
for (var key in docs) {
searchindex.addDoc(docs[key]);
}
this_.searchindex = searchindex;
// Set up events // Set up events
var this_ = this; this_.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
this.searchicon.click( function(e) { this_.searchIconClickHandler(); } ); this_.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
this.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
$(document).on('keydown', function (e) { this_.globalKeyHandler(e); }); $(document).on('keydown', function (e) { this_.globalKeyHandler(e); });
// If the user uses the browser buttons, do the same as if a reload happened // If the user uses the browser buttons, do the same as if a reload happened
window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); }; window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); };
// If reloaded, do the search or mark again, depending on the current url parameters // If reloaded, do the search or mark again, depending on the current url parameters
this.doSearchOrMarkFromUrl(); this_.doSearchOrMarkFromUrl();
});
} }
, ,
hasFocus : function () { hasFocus : function () {

View File

@ -2,10 +2,10 @@ pub mod fs;
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES, use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
OPTION_ENABLE_TABLES}; OPTION_ENABLE_TABLES};
use std::ascii::AsciiExt;
use std::borrow::Cow; use std::borrow::Cow;
use std::fmt::Write; use std::fmt::Write;
use regex::Regex; use regex::Regex;
use std::rc::Rc;
/// A heading together with the successive content until the next heading will /// A heading together with the successive content until the next heading will
/// make up one `SearchDocument`. It represents some independently searchable part of the book. /// make up one `SearchDocument`. It represents some independently searchable part of the book.
@ -16,22 +16,22 @@ pub struct SearchDocument {
// Content: Flatted paragraphs, lists, code // Content: Flatted paragraphs, lists, code
pub body : String, pub body : String,
/// Needed information to generate a link to the corresponding title anchor /// Needed information to generate a link to the corresponding title anchor
/// First part is the `reference_base` that should be the same for all documents that /// First part is the `anchor_base` that should be the same for all documents that
/// came from the same `.md` file. The second part is derived from the heading of the search /// came from the same `.md` file. The second part is derived from the heading of the search
/// document. /// document.
pub sref : (Rc<String>, Option<String>), pub anchor : (String, Option<String>),
// Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"] // Hierarchy like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
// as a human understandable path to the search document. // as a human understandable path to the search document.
pub breadcrumbs : Vec<Rc<String>>, pub hierarchy : Vec<String>,
} }
impl SearchDocument { impl SearchDocument {
fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument { fn new(anchor_base : &str, hierarchy : &Vec<String>) -> SearchDocument {
SearchDocument { SearchDocument {
title : "".to_owned(), title : "".to_owned(),
body : "".to_owned(), body : "".to_owned(),
sref : (sref0.clone(), None), anchor : (anchor_base.to_owned(), None),
breadcrumbs : bcs.clone() hierarchy : (*hierarchy).clone()
} }
} }
@ -47,19 +47,29 @@ impl SearchDocument {
self.body.write_str(&" ").unwrap(); self.body.write_str(&" ").unwrap();
} }
} }
fn extend_hierarchy(&mut self, more : &Vec<String>) {
let last = self.hierarchy.last().map(String::as_ref).unwrap_or("").to_owned();
self.hierarchy.extend(more.iter().filter(|h|
h.as_str() != ""
&& ! h.as_str().eq_ignore_ascii_case(&last))
.map(|h| h.to_owned()));
}
} }
/// Renders markdown into flat unformatted text for usage in the search index. /// Renders markdown into flat unformatted text for usage in the search index.
/// Refer to the struct `SearchDocument`. /// Refer to the struct `SearchDocument`.
/// ///
/// The field `sref` in the `SearchDocument` struct becomes /// The field `anchor` in the `SearchDocument` struct becomes
/// `(reference_base, Some(heading_to_sref("The Section Heading")))` /// `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
pub fn render_markdown_into_searchindex<F>( pub fn render_markdown_into_searchindex<F>(
search_documents: &mut Vec<SearchDocument>, search_documents: &mut Vec<SearchDocument>,
text: &str, text: &str,
reference_base: &str, anchor_base: &str,
breadcrumbs : &Vec<Rc<String>>, hierarchy : Vec<String>,
heading_to_sref : F) heading_to_anchor : F)
where F : Fn(&str) -> String { where F : Fn(&str) -> String {
let mut opts = Options::empty(); let mut opts = Options::empty();
@ -67,24 +77,31 @@ pub fn render_markdown_into_searchindex<F>(
opts.insert(OPTION_ENABLE_FOOTNOTES); opts.insert(OPTION_ENABLE_FOOTNOTES);
let p = Parser::new_ext(text, opts); let p = Parser::new_ext(text, opts);
let reference_base = Rc::new(reference_base.to_owned()); let mut current = SearchDocument::new(&anchor_base, &hierarchy);
let mut current = SearchDocument::new(&reference_base, breadcrumbs);
let mut in_header = false; let mut in_header = false;
let max_paragraph_level = 3;
let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];
for event in p { for event in p {
match event { match event {
Event::Start(Tag::Header(i)) if i <= 3 => { Event::Start(Tag::Header(i)) if i <= max_paragraph_level => {
// Paragraph finished, the next header is following now
if current.has_content() { if current.has_content() {
// Push header_hierarchy to the search documents chapter hierarchy
current.extend_hierarchy(&header_hierarchy);
search_documents.push(current); search_documents.push(current);
} }
current = SearchDocument::new(&reference_base, breadcrumbs); current = SearchDocument::new(&anchor_base, &hierarchy);
in_header = true; in_header = true;
} }
Event::End(Tag::Header(_)) => { Event::End(Tag::Header(i)) if i <= max_paragraph_level => {
// Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
current.breadcrumbs.push(Rc::new(current.title.clone()));
current.sref.1 = Some(heading_to_sref(&current.title));
in_header = false; in_header = false;
current.anchor.1 = Some(heading_to_anchor(&current.title));
header_hierarchy[i as usize -1] = current.title.clone();
for h in &mut header_hierarchy[i as usize ..] {
*h = "".to_owned();
}
} }
Event::Start(_) | Event::End(_) => {} Event::Start(_) | Event::End(_) => {}
Event::Text(text) => { Event::Text(text) => {
@ -97,6 +114,7 @@ pub fn render_markdown_into_searchindex<F>(
Event::SoftBreak | Event::HardBreak => {} Event::SoftBreak | Event::HardBreak => {}
} }
} }
current.extend_hierarchy(&header_hierarchy);
search_documents.push(current); search_documents.push(current);
} }