Search: Configuration via book.toml

This commit is contained in:
Phaiax 2017-11-13 22:18:13 +01:00
parent 850df09670
commit 4dcba1943c
6 changed files with 163 additions and 32 deletions

View File

@ -4,3 +4,13 @@ author = "Mathieu David"
[output.html]
mathjax-support = true
[output.html.search]
enable = true
limit-results = 20
use-boolean-and = true
boost-title = 2
boost-hierarchy = 2
boost-paragraph = 1
expand = true
split-until-heading = 2

View File

@ -227,6 +227,7 @@ pub struct HtmlConfig {
pub additional_css: Vec<PathBuf>,
pub additional_js: Vec<PathBuf>,
pub playpen: Playpen,
pub search: Search,
}
/// Configuration for tweaking how the the HTML renderer handles the playpen.
@ -236,6 +237,53 @@ pub struct Playpen {
pub editable: bool,
}
/// Configuration of the search functionality of the HTML renderer.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(default, rename_all = "kebab-case")]
pub struct Search {
/// Enable in browser searching. Default: true.
pub enable: bool,
/// Maximum number of visible results. Default: 30.
pub limit_results: u32,
/// The number of words used for a search result teaser. Default: 30,
pub teaser_word_count: u32,
/// Define the logical link between multiple search words.
/// If true, all search words must appear in each result. Default: true.
pub use_boolean_and: bool,
/// Boost factor for the search result score if a search word appears in the header.
/// Default: 2.
pub boost_title: u8,
/// Boost factor for the search result score if a search word appears in the hierarchy.
/// The hierarchy contains all titles of the parent documents and all parent headings.
/// Default: 1.
pub boost_hierarchy: u8,
/// Boost factor for the search result score if a search word appears in the text.
/// Default: 1.
pub boost_paragraph: u8,
/// True if the searchword `micro` should match `microwave`. Default: true.
pub expand : bool,
/// Documents are split into smaller parts, seperated by headings. This defines, until which
/// level of heading documents should be split. Default: 3. (`### This is a level 3 heading`)
pub split_until_heading: u8,
}
impl Default for Search {
fn default() -> Search {
// Please update the documentation of `Search` when changing values!
Search {
enable: true,
limit_results: 30,
teaser_word_count: 30,
use_boolean_and: false,
boost_title: 2,
boost_hierarchy: 1,
boost_paragraph: 1,
expand: true,
split_until_heading: 3,
}
}
}
#[cfg(test)]
mod tests {

View File

@ -3,7 +3,7 @@ use preprocess;
use renderer::Renderer;
use book::MDBook;
use book::bookitem::{BookItem, Chapter};
use config::{Config, Playpen, HtmlConfig};
use config::{Config, Playpen, HtmlConfig, Search};
use {utils, theme};
use theme::{Theme, playpen_editor};
use errors::*;
@ -60,7 +60,9 @@ impl HtmlHandlebars {
.eq_ignore_ascii_case(&ch.name) {
parents_names.push(ch.name.clone());
}
utils::render_markdown_into_searchindex(search_documents,
utils::render_markdown_into_searchindex(
&ctx.html_config.search,
search_documents,
&content,
filepath,
parents_names,
@ -311,7 +313,7 @@ impl Renderer for HtmlHandlebars {
}
// Search index
make_searchindex(book, search_documents)?;
make_searchindex(book, search_documents, &html_config.search)?;
// Print version
self.configure_print_version(&mut data, &print_content);
@ -650,28 +652,92 @@ pub fn normalize_id(content: &str) -> String {
}
/// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> {
let mut index = elasticlunr::index::Index::new("id",
&["title".into(), "body".into(), "breadcrumbs".into()]);
fn make_searchindex(book: &MDBook,
search_documents : Vec<utils::SearchDocument>,
searchconfig : &Search) -> Result<()> {
for sd in search_documents {
let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new();
map.insert("id".into(), anchor.clone());
map.insert("title".into(), sd.title);
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
#[derive(Serialize)]
struct SearchOptionsField {
boost: u8,
}
#[derive(Serialize)]
struct SearchOptionsFields {
title: SearchOptionsField,
body: SearchOptionsField,
breadcrumbs: SearchOptionsField,
}
/// The searchoptions for elasticlunr.js
#[derive(Serialize)]
struct SearchOptions {
bool: String,
expand: bool,
limit_results: u32,
teaser_word_count: u32,
fields: SearchOptionsFields,
}
#[derive(Serialize)]
struct SearchindexJson {
enable: bool,
#[serde(skip_serializing_if = "Option::is_none")]
searchoptions: Option<SearchOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
index: Option<elasticlunr::index::Index>,
}
let searchoptions = SearchOptions {
bool : if searchconfig.use_boolean_and { "AND".into() } else { "OR".into() },
expand : searchconfig.expand,
limit_results : searchconfig.limit_results,
teaser_word_count : searchconfig.teaser_word_count,
fields : SearchOptionsFields {
title : SearchOptionsField { boost : searchconfig.boost_title },
body : SearchOptionsField { boost : searchconfig.boost_paragraph },
breadcrumbs : SearchOptionsField { boost : searchconfig.boost_hierarchy },
}
};
let json_contents = if searchconfig.enable {
let mut index = elasticlunr::index::Index::new("id",
&["title".into(), "body".into(), "breadcrumbs".into()]);
for sd in search_documents {
let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new();
map.insert("id".into(), anchor.clone());
map.insert("title".into(), sd.title);
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
}
SearchindexJson {
enable : searchconfig.enable,
searchoptions : Some(searchoptions),
index : Some(index),
}
} else {
SearchindexJson {
enable : false,
searchoptions : None,
index : None,
}
};
book.write_file(
Path::new("searchindex").with_extension("json"),
&serde_json::to_string(&index).unwrap().as_bytes(),
&serde_json::to_string(&json_contents).unwrap().as_bytes(),
)?;
info!("[*] Creating \"searchindex.json\"");

View File

@ -17,6 +17,8 @@ $( document ).ready(function() {
searchoptions : {
bool: "AND",
expand: true,
teaser_word_count : 30,
limit_results : 30,
fields: {
title: {boost: 1},
body: {boost: 1},
@ -25,8 +27,6 @@ $( document ).ready(function() {
},
mark_exclude : [], // ['.hljs']
current_searchterm : "",
teaser_words : 30,
resultcount_limit : 30,
SEARCH_PARAM : 'search',
MARK_PARAM : 'highlight',
@ -220,7 +220,7 @@ $( document ).ready(function() {
}
var window_weight = [];
var window_size = Math.min(weighted.length, this.teaser_words);
var window_size = Math.min(weighted.length, this.searchoptions.teaser_word_count);
var cur_sum = 0;
for (var wordindex = 0; wordindex < window_size; wordindex++) {
@ -280,8 +280,7 @@ $( document ).ready(function() {
// Do the actual search
var results = this.searchindex.search(searchterm, this.searchoptions);
var resultcount = (results.length > this.resultcount_limit)
? this.resultcount_limit : results.length;
var resultcount = Math.min(results.length, this.searchoptions.limit_results);
// Display search metrics
this.searchresults_header.text(this.formatSearchMetric(resultcount, searchterm));
@ -327,7 +326,14 @@ $( document ).ready(function() {
//this.create_test_searchindex();
$.getJSON("searchindex.json", function(json) {
//this_.searchindex = elasticlunr.Index.load(json);
if (json.enable == false) {
this_.searchicon.hide();
return;
}
this_.searchoptions = json.searchoptions;
//this_.searchindex = elasticlunr.Index.load(json.index);
// TODO: Workaround: reindex everything
var searchindex = elasticlunr(function () {
@ -337,7 +343,8 @@ $( document ).ready(function() {
this.setRef('id');
});
window.mjs = json;
var docs = json.documentStore.docs;
window.search = this_;
var docs = json.index.documentStore.docs;
for (var key in docs) {
searchindex.addDoc(docs[key]);
}

View File

@ -64,8 +64,6 @@
}
</script>
<!-- Fetch store.js from local - TODO add CDN when 2.x.x is available on cdnjs -->
<script src="store.js"></script>

View File

@ -6,6 +6,7 @@ use std::ascii::AsciiExt;
use std::borrow::Cow;
use std::fmt::Write;
use regex::Regex;
use config::Search;
/// A heading together with the successive content until the next heading will
/// make up one `SearchDocument`. It represents some independently searchable part of the book.
@ -65,6 +66,7 @@ impl SearchDocument {
/// The field `anchor` in the `SearchDocument` struct becomes
/// `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
pub fn render_markdown_into_searchindex<F>(
searchconfig: &Search,
search_documents: &mut Vec<SearchDocument>,
text: &str,
anchor_base: &str,
@ -79,7 +81,7 @@ pub fn render_markdown_into_searchindex<F>(
let mut current = SearchDocument::new(&anchor_base, &hierarchy);
let mut in_header = false;
let max_paragraph_level = 3;
let max_paragraph_level = searchconfig.split_until_heading as i32;
let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];
for event in p {