Search: Configuration via book.toml

This commit is contained in:
Phaiax 2017-11-13 22:18:13 +01:00
parent 850df09670
commit 4dcba1943c
6 changed files with 163 additions and 32 deletions

View File

@ -4,3 +4,13 @@ author = "Mathieu David"
[output.html] [output.html]
mathjax-support = true mathjax-support = true
[output.html.search]
enable = true
limit-results = 20
use-boolean-and = true
boost-title = 2
boost-hierarchy = 2
boost-paragraph = 1
expand = true
split-until-heading = 2

View File

@ -227,6 +227,7 @@ pub struct HtmlConfig {
pub additional_css: Vec<PathBuf>, pub additional_css: Vec<PathBuf>,
pub additional_js: Vec<PathBuf>, pub additional_js: Vec<PathBuf>,
pub playpen: Playpen, pub playpen: Playpen,
pub search: Search,
} }
/// Configuration for tweaking how the the HTML renderer handles the playpen. /// Configuration for tweaking how the the HTML renderer handles the playpen.
@ -236,6 +237,53 @@ pub struct Playpen {
pub editable: bool, pub editable: bool,
} }
/// Configuration of the search functionality of the HTML renderer.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(default, rename_all = "kebab-case")]
pub struct Search {
/// Enable in browser searching. Default: true.
pub enable: bool,
/// Maximum number of visible results. Default: 30.
pub limit_results: u32,
/// The number of words used for a search result teaser. Default: 30,
pub teaser_word_count: u32,
/// Define the logical link between multiple search words.
/// If true, all search words must appear in each result. Default: true.
pub use_boolean_and: bool,
/// Boost factor for the search result score if a search word appears in the header.
/// Default: 2.
pub boost_title: u8,
/// Boost factor for the search result score if a search word appears in the hierarchy.
/// The hierarchy contains all titles of the parent documents and all parent headings.
/// Default: 1.
pub boost_hierarchy: u8,
/// Boost factor for the search result score if a search word appears in the text.
/// Default: 1.
pub boost_paragraph: u8,
/// True if the searchword `micro` should match `microwave`. Default: true.
pub expand : bool,
/// Documents are split into smaller parts, seperated by headings. This defines, until which
/// level of heading documents should be split. Default: 3. (`### This is a level 3 heading`)
pub split_until_heading: u8,
}
impl Default for Search {
fn default() -> Search {
// Please update the documentation of `Search` when changing values!
Search {
enable: true,
limit_results: 30,
teaser_word_count: 30,
use_boolean_and: false,
boost_title: 2,
boost_hierarchy: 1,
boost_paragraph: 1,
expand: true,
split_until_heading: 3,
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {

View File

@ -3,7 +3,7 @@ use preprocess;
use renderer::Renderer; use renderer::Renderer;
use book::MDBook; use book::MDBook;
use book::bookitem::{BookItem, Chapter}; use book::bookitem::{BookItem, Chapter};
use config::{Config, Playpen, HtmlConfig}; use config::{Config, Playpen, HtmlConfig, Search};
use {utils, theme}; use {utils, theme};
use theme::{Theme, playpen_editor}; use theme::{Theme, playpen_editor};
use errors::*; use errors::*;
@ -60,7 +60,9 @@ impl HtmlHandlebars {
.eq_ignore_ascii_case(&ch.name) { .eq_ignore_ascii_case(&ch.name) {
parents_names.push(ch.name.clone()); parents_names.push(ch.name.clone());
} }
utils::render_markdown_into_searchindex(search_documents, utils::render_markdown_into_searchindex(
&ctx.html_config.search,
search_documents,
&content, &content,
filepath, filepath,
parents_names, parents_names,
@ -311,7 +313,7 @@ impl Renderer for HtmlHandlebars {
} }
// Search index // Search index
make_searchindex(book, search_documents)?; make_searchindex(book, search_documents, &html_config.search)?;
// Print version // Print version
self.configure_print_version(&mut data, &print_content); self.configure_print_version(&mut data, &print_content);
@ -650,28 +652,92 @@ pub fn normalize_id(content: &str) -> String {
} }
/// Uses elasticlunr to create a search index and exports that into `searchindex.json`. /// Uses elasticlunr to create a search index and exports that into `searchindex.json`.
fn make_searchindex(book: &MDBook, search_documents : Vec<utils::SearchDocument>) -> Result<()> { fn make_searchindex(book: &MDBook,
let mut index = elasticlunr::index::Index::new("id", search_documents : Vec<utils::SearchDocument>,
&["title".into(), "body".into(), "breadcrumbs".into()]); searchconfig : &Search) -> Result<()> {
for sd in search_documents {
let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new(); #[derive(Serialize)]
map.insert("id".into(), anchor.clone()); struct SearchOptionsField {
map.insert("title".into(), sd.title); boost: u8,
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
} }
#[derive(Serialize)]
struct SearchOptionsFields {
title: SearchOptionsField,
body: SearchOptionsField,
breadcrumbs: SearchOptionsField,
}
/// The searchoptions for elasticlunr.js
#[derive(Serialize)]
struct SearchOptions {
bool: String,
expand: bool,
limit_results: u32,
teaser_word_count: u32,
fields: SearchOptionsFields,
}
#[derive(Serialize)]
struct SearchindexJson {
enable: bool,
#[serde(skip_serializing_if = "Option::is_none")]
searchoptions: Option<SearchOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
index: Option<elasticlunr::index::Index>,
}
let searchoptions = SearchOptions {
bool : if searchconfig.use_boolean_and { "AND".into() } else { "OR".into() },
expand : searchconfig.expand,
limit_results : searchconfig.limit_results,
teaser_word_count : searchconfig.teaser_word_count,
fields : SearchOptionsFields {
title : SearchOptionsField { boost : searchconfig.boost_title },
body : SearchOptionsField { boost : searchconfig.boost_paragraph },
breadcrumbs : SearchOptionsField { boost : searchconfig.boost_hierarchy },
}
};
let json_contents = if searchconfig.enable {
let mut index = elasticlunr::index::Index::new("id",
&["title".into(), "body".into(), "breadcrumbs".into()]);
for sd in search_documents {
let anchor = if let Some(s) = sd.anchor.1 {
format!("{}#{}", sd.anchor.0, &s)
} else {
sd.anchor.0
};
let mut map = HashMap::new();
map.insert("id".into(), anchor.clone());
map.insert("title".into(), sd.title);
map.insert("body".into(), sd.body);
map.insert("breadcrumbs".into(), sd.hierarchy.join(" » "));
index.add_doc(&anchor, map);
}
SearchindexJson {
enable : searchconfig.enable,
searchoptions : Some(searchoptions),
index : Some(index),
}
} else {
SearchindexJson {
enable : false,
searchoptions : None,
index : None,
}
};
book.write_file( book.write_file(
Path::new("searchindex").with_extension("json"), Path::new("searchindex").with_extension("json"),
&serde_json::to_string(&index).unwrap().as_bytes(), &serde_json::to_string(&json_contents).unwrap().as_bytes(),
)?; )?;
info!("[*] Creating \"searchindex.json\""); info!("[*] Creating \"searchindex.json\"");

View File

@ -17,6 +17,8 @@ $( document ).ready(function() {
searchoptions : { searchoptions : {
bool: "AND", bool: "AND",
expand: true, expand: true,
teaser_word_count : 30,
limit_results : 30,
fields: { fields: {
title: {boost: 1}, title: {boost: 1},
body: {boost: 1}, body: {boost: 1},
@ -25,8 +27,6 @@ $( document ).ready(function() {
}, },
mark_exclude : [], // ['.hljs'] mark_exclude : [], // ['.hljs']
current_searchterm : "", current_searchterm : "",
teaser_words : 30,
resultcount_limit : 30,
SEARCH_PARAM : 'search', SEARCH_PARAM : 'search',
MARK_PARAM : 'highlight', MARK_PARAM : 'highlight',
@ -220,7 +220,7 @@ $( document ).ready(function() {
} }
var window_weight = []; var window_weight = [];
var window_size = Math.min(weighted.length, this.teaser_words); var window_size = Math.min(weighted.length, this.searchoptions.teaser_word_count);
var cur_sum = 0; var cur_sum = 0;
for (var wordindex = 0; wordindex < window_size; wordindex++) { for (var wordindex = 0; wordindex < window_size; wordindex++) {
@ -280,8 +280,7 @@ $( document ).ready(function() {
// Do the actual search // Do the actual search
var results = this.searchindex.search(searchterm, this.searchoptions); var results = this.searchindex.search(searchterm, this.searchoptions);
var resultcount = (results.length > this.resultcount_limit) var resultcount = Math.min(results.length, this.searchoptions.limit_results);
? this.resultcount_limit : results.length;
// Display search metrics // Display search metrics
this.searchresults_header.text(this.formatSearchMetric(resultcount, searchterm)); this.searchresults_header.text(this.formatSearchMetric(resultcount, searchterm));
@ -327,7 +326,14 @@ $( document ).ready(function() {
//this.create_test_searchindex(); //this.create_test_searchindex();
$.getJSON("searchindex.json", function(json) { $.getJSON("searchindex.json", function(json) {
//this_.searchindex = elasticlunr.Index.load(json);
if (json.enable == false) {
this_.searchicon.hide();
return;
}
this_.searchoptions = json.searchoptions;
//this_.searchindex = elasticlunr.Index.load(json.index);
// TODO: Workaround: reindex everything // TODO: Workaround: reindex everything
var searchindex = elasticlunr(function () { var searchindex = elasticlunr(function () {
@ -337,7 +343,8 @@ $( document ).ready(function() {
this.setRef('id'); this.setRef('id');
}); });
window.mjs = json; window.mjs = json;
var docs = json.documentStore.docs; window.search = this_;
var docs = json.index.documentStore.docs;
for (var key in docs) { for (var key in docs) {
searchindex.addDoc(docs[key]); searchindex.addDoc(docs[key]);
} }

View File

@ -64,8 +64,6 @@
} }
</script> </script>
<!-- Fetch store.js from local - TODO add CDN when 2.x.x is available on cdnjs --> <!-- Fetch store.js from local - TODO add CDN when 2.x.x is available on cdnjs -->
<script src="store.js"></script> <script src="store.js"></script>

View File

@ -6,6 +6,7 @@ use std::ascii::AsciiExt;
use std::borrow::Cow; use std::borrow::Cow;
use std::fmt::Write; use std::fmt::Write;
use regex::Regex; use regex::Regex;
use config::Search;
/// A heading together with the successive content until the next heading will /// A heading together with the successive content until the next heading will
/// make up one `SearchDocument`. It represents some independently searchable part of the book. /// make up one `SearchDocument`. It represents some independently searchable part of the book.
@ -65,6 +66,7 @@ impl SearchDocument {
/// The field `anchor` in the `SearchDocument` struct becomes /// The field `anchor` in the `SearchDocument` struct becomes
/// `(anchor_base, Some(heading_to_anchor("The Section Heading")))` /// `(anchor_base, Some(heading_to_anchor("The Section Heading")))`
pub fn render_markdown_into_searchindex<F>( pub fn render_markdown_into_searchindex<F>(
searchconfig: &Search,
search_documents: &mut Vec<SearchDocument>, search_documents: &mut Vec<SearchDocument>,
text: &str, text: &str,
anchor_base: &str, anchor_base: &str,
@ -79,7 +81,7 @@ pub fn render_markdown_into_searchindex<F>(
let mut current = SearchDocument::new(&anchor_base, &hierarchy); let mut current = SearchDocument::new(&anchor_base, &hierarchy);
let mut in_header = false; let mut in_header = false;
let max_paragraph_level = 3; let max_paragraph_level = searchconfig.split_until_heading as i32;
let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize]; let mut header_hierarchy = vec!["".to_owned(); max_paragraph_level as usize];
for event in p { for event in p {