Search: Fine tuning

* remove searchindex feature (nightly requirement of elasticlunr-rs dropped)
* some documentation
* refactor BookItems iterator
* add iterator for parents
* Include paragraph structure in hierarchy
* Fix url and specialchar handling
* Use complete index
This commit is contained in:
Phaiax 2017-10-09 13:03:21 +02:00
parent aa1f02f7b2
commit a198e99fa9
7 changed files with 228 additions and 99 deletions

View File

@ -29,7 +29,7 @@ toml = "0.4"
open = "1.1"
regex = "0.2.1"
tempdir = "0.3.4"
elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true}
elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs" }
# Watch feature
notify = { version = "4.0", optional = true }
@ -56,7 +56,6 @@ output = []
regenerate-css = []
watch = ["notify", "time", "crossbeam"]
serve = ["iron", "staticfile", "ws"]
searchindex = ["elasticlunr"]
[[bin]]
doc = false

View File

@ -2,7 +2,12 @@ use serde::{Serialize, Serializer};
use serde::ser::SerializeStruct;
use std::path::PathBuf;
/// A BookItem corresponds to one entry of the table of contents file SUMMARY.md.
/// A line in that file can either be a numbered chapter with a section number like 2.1.3 or a
/// suffix or postfix chapter without such a section number.
/// The `String` field in the `Chapter` variant contains the section number as `2.1.3`.
/// The `Chapter` type contains the child elements (which can only be other `BookItem::Chapters`).
/// `BookItem::Affix` and `BookItem::Spacer` are only allowed within the root level.
#[derive(Debug, Clone)]
pub enum BookItem {
Chapter(String, Chapter), // String = section
@ -10,6 +15,9 @@ pub enum BookItem {
Spacer,
}
/// A chapter is a `.md` file that is referenced by some line in the `SUMMARY.md` table of
/// contents. It also has references to its sub chapters via `sub_items`. These items can
/// only be of the variant `BookItem::Chapter`.
#[derive(Debug, Clone)]
pub struct Chapter {
pub name: String,
@ -17,13 +25,21 @@ pub struct Chapter {
pub sub_items: Vec<BookItem>,
}
/// A flattening, depth-first iterator over Bookitems and it's children.
/// It can be obtained by calling `MDBook::iter()`.
#[derive(Debug, Clone)]
pub struct BookItems<'a> {
pub items: &'a [BookItem],
pub current_index: usize,
pub stack: Vec<(&'a [BookItem], usize)>,
/// The remaining items in the iterator in the current, deepest level of the iterator
items: &'a [BookItem],
/// The higher levels of the hierarchy. The parents of the current level are still
/// in the list and accessible as `[stack[0][0], stack[1][0], stack[2][0], ...]`.
stack: Vec<&'a [BookItem]>,
}
/// Iterator for the parent `BookItem`s of a `BookItem`.
pub struct BookItemParents<'a> {
stack: &'a [ &'a [BookItem] ]
}
impl Chapter {
pub fn new(name: String, path: PathBuf) -> Self {
@ -48,39 +64,78 @@ impl Serialize for Chapter {
}
}
// Shamelessly copied from Rustbook
// (https://github.com/rust-lang/rust/blob/master/src/rustbook/book.rs)
impl<'a> Iterator for BookItems<'a> {
type Item = &'a BookItem;
fn next(&mut self) -> Option<&'a BookItem> {
loop {
if self.current_index >= self.items.len() {
match self.stack.pop() {
None => return None,
Some((parent_items, parent_idx)) => {
self.items = parent_items;
self.current_index = parent_idx + 1;
}
}
if let Some((first, rest)) = self.items.split_first() {
// Return the first element in `items` and optionally dive into afterwards.
match first {
&BookItem::Spacer => {
self.items = rest;
},
&BookItem::Chapter(_, ref ch) |
&BookItem::Affix(ref ch) => {
if ch.sub_items.is_empty() {
self.items = rest;
} else {
let cur = &self.items[self.current_index];
match *cur {
BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch) => {
self.stack.push((self.items, self.current_index));
// Don't remove `first` for now. (Because of Parent Iterator)
self.stack.push(self.items);
self.items = &ch.sub_items[..];
self.current_index = 0;
}
BookItem::Spacer => {
self.current_index += 1;
}
}
return Some(cur);
},
};
Some(first)
} else {
// Current level is drained => pop from `stack` or return `None`
if let Some(stacked_items) = self.stack.pop() {
// The first item of the popped slice is the bookitem we previously dived into.
self.items = &stacked_items[1..];
self.next()
} else {
None
}
}
}
}
impl<'a> BookItems<'a> {
pub fn new(items : &'a[BookItem]) -> BookItems<'a> {
BookItems {
items : items,
stack : vec![],
}
}
/// Returns an iterator to iterate the parents of the last yielded `BookItem`.
/// Starts with the root item.
pub fn current_parents(&'a self) -> BookItemParents<'a> {
BookItemParents { stack : &self.stack }
}
/// Collects the names of the parent `BookItem`s of the last yielded `Bookitem` into a list.
pub fn collect_current_parents_names(&self) -> Vec<String> {
self.current_parents().filter_map(|i| match i {
&BookItem::Chapter(_, ref ch) | &BookItem::Affix(ref ch) => Some(ch.name.clone()),
_ => None,
}).collect()
}
/// Get the level of the last yielded `BookItem`. Root level = 0
pub fn current_depth(&'a self) -> usize {
self.stack.len()
}
}
impl<'a> Iterator for BookItemParents<'a> {
type Item = &'a BookItem;
fn next(&mut self) -> Option<&'a BookItem> {
if let Some((first, rest)) = self.stack.split_first() {
self.stack = rest;
Some (&first[0])
} else {
None
}
}
}

View File

@ -105,11 +105,7 @@ impl MDBook {
/// ```
pub fn iter(&self) -> BookItems {
BookItems {
items: &self.content[..],
current_index: 0,
stack: Vec::new(),
}
BookItems::new(&self.content[..])
}
/// `init()` creates some boilerplate files and directories

View File

@ -88,7 +88,6 @@ extern crate serde_derive;
extern crate serde_json;
extern crate tempdir;
extern crate toml;
#[cfg(feature = "searchindex")]
extern crate elasticlunr;
mod parse;

View File

@ -9,7 +9,6 @@ use theme::{Theme, playpen_editor};
use errors::*;
use regex::{Captures, Regex};
#[cfg(feature = "searchindex")]
use elasticlunr;
use std::ascii::AsciiExt;
@ -35,13 +34,15 @@ impl HtmlHandlebars {
item: &BookItem,
mut ctx: RenderItemContext,
print_content: &mut String,
search_documents : &mut Vec<utils::SearchDocument>)
search_documents : &mut Vec<utils::SearchDocument>,
mut parents_names : Vec<String>)
-> Result<()> {
// FIXME: This should be made DRY-er and rely less on mutable state
match *item {
BookItem::Chapter(_, ref ch) | BookItem::Affix(ref ch)
if !ch.path.as_os_str().is_empty() =>
{
BookItem::Chapter(_, ref ch) |
BookItem::Affix(ref ch) if !ch.path.as_os_str().is_empty() => {
let path = ctx.book.get_source().join(&ch.path);
let content = utils::fs::file_to_string(&path)?;
let base = path.parent()
@ -49,11 +50,20 @@ impl HtmlHandlebars {
let path = ch.path.to_str().ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
})?;
let filepath = Path::new(&ch.path).with_extension("html");
let filepath = filepath.to_str().ok_or_else(|| {
Error::from(format!("Bad file name: {}", filepath.display()))
})?;
if ! parents_names.last().map(String::as_ref).unwrap_or("")
.eq_ignore_ascii_case(&ch.name) {
parents_names.push(ch.name.clone());
}
utils::render_markdown_into_searchindex(search_documents,
&content,
path,
&vec![],
filepath,
parents_names,
id_from_content);
// Parse and expand links
@ -84,17 +94,15 @@ impl HtmlHandlebars {
debug!("[*]: Render template");
let rendered = ctx.handlebars.render("index", &ctx.data)?;
let filepath = Path::new(&ch.path).with_extension("html");
let rendered = self.post_process(
rendered,
&normalize_path(filepath.to_str().ok_or_else(|| Error::from(
format!("Bad file name: {}", filepath.display()),
))?),
&normalize_path(filepath),
&ctx.book.config.html_config().unwrap_or_default().playpen,
);
// Write to file
info!("[*] Creating {:?} ✓", filepath.display());
info!("[*] Creating {:?} ✓", filepath);
ctx.book.write_file(filepath, &rendered.into_bytes())?;
if ctx.is_index {
@ -282,20 +290,28 @@ impl Renderer for HtmlHandlebars {
fs::create_dir_all(&destination)
.chain_err(|| "Unexpected error when constructing destination path")?;
for (i, item) in book.iter().enumerate() {
let mut depthfirstiterator = book.iter();
let mut is_index = true;
while let Some(item) = depthfirstiterator.next() {
let ctx = RenderItemContext {
book: book,
handlebars: &handlebars,
destination: destination.to_path_buf(),
data: data.clone(),
is_index: i == 0,
is_index: is_index,
html_config: html_config.clone(),
};
self.render_item(item, ctx, &mut print_content, &mut search_documents)?;
self.render_item(item,
ctx,
&mut print_content,
&mut search_documents,
depthfirstiterator.collect_current_parents_names())?;
is_index = false;
}
// Search index
make_searchindex(book, &search_documents)?;
make_searchindex(book, search_documents)?;
// Print version
self.configure_print_version(&mut data, &print_content);
@ -633,21 +649,29 @@ pub fn normalize_id(content: &str) -> String {
.collect::<String>()
}
#[cfg(not(feature = "searchindex"))]
fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
Ok(())
}
/// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> {
let mut index = elasticlunr::index::Index::new("id",
&["title".into(), "body".into(), "breadcrumbs".into()]);
#[cfg(feature = "searchindex")]
fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
let mut index = elasticlunr::IndexBuilder::new();
for sd in search_documents {
index.add_document(&sd.title, &sd.body);
let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new();
map.insert("id".into(), anchor.clone());
map.insert("title".into(), sd.title);
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
}
book.write_file(
Path::new("searchindex").with_extension("json"),
&index.to_json().as_bytes(),
&serde_json::to_string(&index).unwrap().as_bytes(),
)?;
info!("[*] Creating \"searchindex.json\"");

View File

@ -144,6 +144,20 @@ $( document ).ready(function() {
return url;
}
,
escapeHTML: (function() {
var MAP = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&#34;',
"'": '&#39;'
};
var repl = function(c) { return MAP[c]; };
return function(s) {
return s.replace(/[&<>'"]/g, repl);
};
})()
,
formatSearchResult : function (result, searchterms) {
// Show text around first occurrence of first search term.
var firstoccurence = result.doc.body.search(searchterms[0]);
@ -173,9 +187,9 @@ $( document ).ready(function() {
return $('<li><a href="'
+ url[0] + '?' + this.MARK_PARAM + '=' + searchterms + '#' + url[1]
+ '">' + result.doc.title + '</a>'
+ '<span class="breadcrumbs">' + result.doc.breadcrumbs + '</span>'
+ '<span class="teaser">' + teaser + '</span>'
+ '">' + result.doc.breadcrumbs + '</a>' // doc.title
+ '<span class="breadcrumbs">' + '</span>'
+ '<span class="teaser">' + this.escapeHTML(teaser) + '</span>'
+ '</li>');
}
,
@ -213,7 +227,8 @@ $( document ).ready(function() {
if (url.params.hasOwnProperty(this.SEARCH_PARAM)
&& url.params[this.SEARCH_PARAM] != "") {
this.searchbar_outer.slideDown();
this.searchbar[0].value = url.params[this.SEARCH_PARAM];
this.searchbar[0].value = decodeURIComponent(
(url.params[this.SEARCH_PARAM]+'').replace(/\+/g, '%20'));
this.searchbarKeyUpHandler();
} else {
this.searchbar_outer.slideUp();
@ -229,19 +244,42 @@ $( document ).ready(function() {
}
,
init : function () {
var this_ = this;
window.md = this;
// For testing purposes: Index current page
this.create_test_searchindex();
//this.create_test_searchindex();
$.getJSON("searchindex.json", function(json) {
//this_.searchindex = elasticlunr.Index.load(json);
// TODO: Workaround: reindex everything
var searchindex = elasticlunr(function () {
this.addField('body');
this.addField('title');
this.addField('breadcrumbs')
this.setRef('id');
});
window.mjs = json;
var docs = json.documentStore.docs;
for (var key in docs) {
searchindex.addDoc(docs[key]);
}
this_.searchindex = searchindex;
// Set up events
var this_ = this;
this.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
this.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
this_.searchicon.click( function(e) { this_.searchIconClickHandler(); } );
this_.searchbar.on('keyup', function(e) { this_.searchbarKeyUpHandler(); } );
$(document).on('keydown', function (e) { this_.globalKeyHandler(e); });
// If the user uses the browser buttons, do the same as if a reload happened
window.onpopstate = function(e) { this_.doSearchOrMarkFromUrl(); };
// If reloaded, do the search or mark again, depending on the current url parameters
this.doSearchOrMarkFromUrl();
this_.doSearchOrMarkFromUrl();
});
}
,
hasFocus : function () {

View File

@ -2,10 +2,10 @@ pub mod fs;
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
OPTION_ENABLE_TABLES};
use std::ascii::AsciiExt;
use std::borrow::Cow;
use std::fmt::Write;
use regex::Regex;
use std::rc::Rc;
/// A heading together with the successive content until the next heading will
/// make up one `SearchDocument`. It represents some independently searchable part of the book.
@ -16,22 +16,22 @@ pub struct SearchDocument {
// Content: Flatted paragraphs, lists, code
pub body : String,
/// Needed information to generate a link to the corresponding title anchor
/// First part is the `reference_base` that should be the same for all documents that
/// First part is the `anchor_base` that should be the same for all documents that
/// came from the same `.md` file. The second part is derived from the heading of the search
/// document.
pub sref : (Rc<String>, Option<String>),
// Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
pub anchor : (String, Option<String>),
// Hierarchy like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
// as a human understandable path to the search document.
pub breadcrumbs : Vec<Rc<String>>,
pub hierarchy : Vec<String>,
}
impl SearchDocument {
fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument {
fn new(anchor_base : &str, hierarchy : &Vec<String>) -> SearchDocument {
SearchDocument {
title : "".to_owned(),
body : "".to_owned(),
sref : (sref0.clone(), None),
breadcrumbs : bcs.clone()
anchor : (anchor_base.to_owned(), None),
hierarchy : (*hierarchy).clone()
}
}
@ -47,19 +47,29 @@ impl SearchDocument {
self.body.write_str(&" ").unwrap();
}
}
fn extend_hierarchy(&mut self, more : &Vec<String>) {
let last = self.hierarchy.last().map(String::as_ref).unwrap_or("").to_owned();
self.hierarchy.extend(more.iter().filter(|h|
h.as_str() != ""
&& ! h.as_str().eq_ignore_ascii_case(&last))
.map(|h| h.to_owned()));
}
}
/// Renders markdown into flat unformatted text for usage in the search index.
/// Refer to the struct `SearchDocument`.
///
/// The field `sref` in the `SearchDocument` struct becomes
/// `(reference_base, Some(heading_to_sref("The Section Heading")))`
/// The field `anchor` in the `SearchDocument` struct becomes
/// `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
pub fn render_markdown_into_searchindex<F>(
search_documents: &mut Vec<SearchDocument>,
text: &str,
reference_base: &str,
breadcrumbs : &Vec<Rc<String>>,
heading_to_sref : F)
anchor_base: &str,
hierarchy : Vec<String>,
heading_to_anchor : F)
where F : Fn(&str) -> String {
let mut opts = Options::empty();
@ -67,24 +77,31 @@ pub fn render_markdown_into_searchindex<F>(
opts.insert(OPTION_ENABLE_FOOTNOTES);
let p = Parser::new_ext(text, opts);
let reference_base = Rc::new(reference_base.to_owned());
let mut current = SearchDocument::new(&reference_base, breadcrumbs);
let mut current = SearchDocument::new(&anchor_base, &hierarchy);
let mut in_header = false;
let max_paragraph_level = 3;
let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];
for event in p {
match event {
Event::Start(Tag::Header(i)) if i <= 3 => {
Event::Start(Tag::Header(i)) if i <= max_paragraph_level => {
// Paragraph finished, the next header is following now
if current.has_content() {
// Push header_hierarchy to the search documents chapter hierarchy
current.extend_hierarchy(&header_hierarchy);
search_documents.push(current);
}
current = SearchDocument::new(&reference_base, breadcrumbs);
current = SearchDocument::new(&anchor_base, &hierarchy);
in_header = true;
}
Event::End(Tag::Header(_)) => {
// Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
current.breadcrumbs.push(Rc::new(current.title.clone()));
current.sref.1 = Some(heading_to_sref(&current.title));
Event::End(Tag::Header(i)) if i <= max_paragraph_level => {
in_header = false;
current.anchor.1 = Some(heading_to_anchor(&current.title));
header_hierarchy[i as usize -1] = current.title.clone();
for h in &mut header_hierarchy[i as usize ..] {
*h = "".to_owned();
}
}
Event::Start(_) | Event::End(_) => {}
Event::Text(text) => {
@ -97,6 +114,7 @@ pub fn render_markdown_into_searchindex<F>(
Event::SoftBreak | Event::HardBreak => {}
}
}
current.extend_hierarchy(&header_hierarchy);
search_documents.push(current);
}