Merge pull request #707 from mattico/search-index-opt
Optimize search index
This commit is contained in:
commit
da9be67516
|
@ -113,6 +113,7 @@ Available configuration options for the `[output.html.playpen]` table:
|
||||||
|
|
||||||
Available configuration options for the `[output.html.search]` table:
|
Available configuration options for the `[output.html.search]` table:
|
||||||
|
|
||||||
|
- **enable:** Enables the search feature. Defaults to `true`.
|
||||||
- **limit-results:** The maximum number of search results. Defaults to `30`.
|
- **limit-results:** The maximum number of search results. Defaults to `30`.
|
||||||
- **teaser-word-count:** The number of words used for a search result teaser.
|
- **teaser-word-count:** The number of words used for a search result teaser.
|
||||||
Defaults to `30`.
|
Defaults to `30`.
|
||||||
|
@ -168,6 +169,7 @@ boost-hierarchy = 1
|
||||||
boost-paragraph = 1
|
boost-paragraph = 1
|
||||||
expand = true
|
expand = true
|
||||||
heading-split-level = 3
|
heading-split-level = 3
|
||||||
|
copy-js = true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -463,9 +463,11 @@ impl Default for Playpen {
|
||||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
#[serde(default, rename_all = "kebab-case")]
|
#[serde(default, rename_all = "kebab-case")]
|
||||||
pub struct Search {
|
pub struct Search {
|
||||||
|
/// Enable the search feature. Default: `true`.
|
||||||
|
pub enable: bool,
|
||||||
/// Maximum number of visible results. Default: `30`.
|
/// Maximum number of visible results. Default: `30`.
|
||||||
pub limit_results: u32,
|
pub limit_results: u32,
|
||||||
/// The number of words used for a search result teaser. Default: `30`,
|
/// The number of words used for a search result teaser. Default: `30`.
|
||||||
pub teaser_word_count: u32,
|
pub teaser_word_count: u32,
|
||||||
/// Define the logical link between multiple search words.
|
/// Define the logical link between multiple search words.
|
||||||
/// If true, all search words must appear in each result. Default: `true`.
|
/// If true, all search words must appear in each result. Default: `true`.
|
||||||
|
@ -494,6 +496,7 @@ impl Default for Search {
|
||||||
fn default() -> Search {
|
fn default() -> Search {
|
||||||
// Please update the documentation of `Search` when changing values!
|
// Please update the documentation of `Search` when changing values!
|
||||||
Search {
|
Search {
|
||||||
|
enable: true,
|
||||||
limit_results: 30,
|
limit_results: 30,
|
||||||
teaser_word_count: 30,
|
teaser_word_count: 30,
|
||||||
use_boolean_and: false,
|
use_boolean_and: false,
|
||||||
|
|
|
@ -367,8 +367,10 @@ impl Renderer for HtmlHandlebars {
|
||||||
.chain_err(|| "Unable to copy across additional CSS and JS")?;
|
.chain_err(|| "Unable to copy across additional CSS and JS")?;
|
||||||
|
|
||||||
// Render search index
|
// Render search index
|
||||||
#[cfg(feature = "search")]
|
let search = html_config.search.unwrap_or_default();
|
||||||
super::search::create_files(&html_config.search.unwrap_or_default(), &destination, &book)?;
|
if cfg!(feature = "search") && search.enable {
|
||||||
|
super::search::create_files(&search, &destination, &book)?;
|
||||||
|
}
|
||||||
|
|
||||||
// Copy all remaining files
|
// Copy all remaining files
|
||||||
utils::fs::copy_files_except_ext(&src_dir, &destination, true, &["md"])?;
|
utils::fs::copy_files_except_ext(&src_dir, &destination, true, &["md"])?;
|
||||||
|
@ -446,10 +448,9 @@ fn make_data(
|
||||||
|
|
||||||
let search = html_config.search.clone();
|
let search = html_config.search.clone();
|
||||||
if cfg!(feature = "search") {
|
if cfg!(feature = "search") {
|
||||||
data.insert("search_enabled".to_owned(), json!(true));
|
let search = search.unwrap_or_default();
|
||||||
if search.unwrap_or_default().copy_js {
|
data.insert("search_enabled".to_owned(), json!(search.enable));
|
||||||
data.insert("search_js".to_owned(), json!(true));
|
data.insert("search_js".to_owned(), json!(search.enable && search.copy_js));
|
||||||
}
|
|
||||||
} else if search.is_some() {
|
} else if search.is_some() {
|
||||||
warn!("mdBook compiled without search support, ignoring `output.html.search` table");
|
warn!("mdBook compiled without search support, ignoring `output.html.search` table");
|
||||||
warn!(
|
warn!(
|
||||||
|
|
|
@ -18,16 +18,21 @@ use theme::searcher;
|
||||||
/// Creates all files required for search.
|
/// Creates all files required for search.
|
||||||
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
|
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
|
||||||
let mut index = Index::new(&["title", "body", "breadcrumbs"]);
|
let mut index = Index::new(&["title", "body", "breadcrumbs"]);
|
||||||
|
let mut doc_urls = Vec::with_capacity(book.sections.len());
|
||||||
|
|
||||||
for item in book.iter() {
|
for item in book.iter() {
|
||||||
render_item(&mut index, &search_config, item)?;
|
render_item(&mut index, &search_config, &mut doc_urls, item)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = write_to_js(index, &search_config)?;
|
let index = write_to_json(index, &search_config, doc_urls)?;
|
||||||
debug!("Writing search index ✓");
|
debug!("Writing search index ✓");
|
||||||
|
if index.len() > 10_000_000 {
|
||||||
|
warn!("searchindex.json is very large ({} bytes)", index.len());
|
||||||
|
}
|
||||||
|
|
||||||
if search_config.copy_js {
|
if search_config.copy_js {
|
||||||
utils::fs::write_file(destination, "searchindex.js", index.as_bytes())?;
|
utils::fs::write_file(destination, "searchindex.json", index.as_bytes())?;
|
||||||
|
utils::fs::write_file(destination, "searchindex.js", format!("window.search = {};", index).as_bytes())?;
|
||||||
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
|
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
|
||||||
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
|
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
|
||||||
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
|
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
|
||||||
|
@ -38,18 +43,22 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uses the given arguments to construct a search document, then inserts it to the given index.
|
/// Uses the given arguments to construct a search document, then inserts it to the given index.
|
||||||
fn add_doc<'a>(
|
fn add_doc(
|
||||||
index: &mut Index,
|
index: &mut Index,
|
||||||
anchor_base: &'a str,
|
doc_urls: &mut Vec<String>,
|
||||||
|
anchor_base: &str,
|
||||||
section_id: &Option<String>,
|
section_id: &Option<String>,
|
||||||
items: &[&str],
|
items: &[&str],
|
||||||
) {
|
) {
|
||||||
let doc_ref: Cow<'a, str> = if let &Some(ref id) = section_id {
|
let url = if let &Some(ref id) = section_id {
|
||||||
format!("{}#{}", anchor_base, id).into()
|
Cow::Owned(format!("{}#{}", anchor_base, id))
|
||||||
} else {
|
} else {
|
||||||
anchor_base.into()
|
Cow::Borrowed(anchor_base)
|
||||||
};
|
};
|
||||||
let doc_ref = utils::collapse_whitespace(doc_ref.trim());
|
let url = utils::collapse_whitespace(url.trim());
|
||||||
|
let doc_ref = doc_urls.len().to_string();
|
||||||
|
doc_urls.push(url.into());
|
||||||
|
|
||||||
let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
|
let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
|
||||||
index.add_doc(&doc_ref, items);
|
index.add_doc(&doc_ref, items);
|
||||||
}
|
}
|
||||||
|
@ -58,6 +67,7 @@ fn add_doc<'a>(
|
||||||
fn render_item(
|
fn render_item(
|
||||||
index: &mut Index,
|
index: &mut Index,
|
||||||
search_config: &Search,
|
search_config: &Search,
|
||||||
|
doc_urls: &mut Vec<String>,
|
||||||
item: &BookItem,
|
item: &BookItem,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let chapter = match item {
|
let chapter = match item {
|
||||||
|
@ -92,6 +102,7 @@ fn render_item(
|
||||||
// Write the data to the index, and clear it for the next section
|
// Write the data to the index, and clear it for the next section
|
||||||
add_doc(
|
add_doc(
|
||||||
index,
|
index,
|
||||||
|
doc_urls,
|
||||||
&anchor_base,
|
&anchor_base,
|
||||||
§ion_id,
|
§ion_id,
|
||||||
&[&heading, &body, &breadcrumbs.join(" » ")],
|
&[&heading, &body, &breadcrumbs.join(" » ")],
|
||||||
|
@ -144,6 +155,7 @@ fn render_item(
|
||||||
// Make sure the last section is added to the index
|
// Make sure the last section is added to the index
|
||||||
add_doc(
|
add_doc(
|
||||||
index,
|
index,
|
||||||
|
doc_urls,
|
||||||
&anchor_base,
|
&anchor_base,
|
||||||
§ion_id,
|
§ion_id,
|
||||||
&[&heading, &body, &breadcrumbs.join(" » ")],
|
&[&heading, &body, &breadcrumbs.join(" » ")],
|
||||||
|
@ -153,10 +165,7 @@ fn render_item(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Exports the index and search options to a JS script which stores the index in `window.search`.
|
fn write_to_json(index: Index, search_config: &Search, doc_urls: Vec<String>) -> Result<String> {
|
||||||
/// Using a JS script is a workaround for CORS in `file://` URIs. It also removes the need for
|
|
||||||
/// downloading/parsing JSON in JS.
|
|
||||||
fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use self::elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField};
|
use self::elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField};
|
||||||
|
|
||||||
|
@ -169,9 +178,11 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SearchindexJson {
|
struct SearchindexJson {
|
||||||
/// The options used for displaying search results
|
/// The options used for displaying search results
|
||||||
resultsoptions: ResultsOptions,
|
results_options: ResultsOptions,
|
||||||
/// The searchoptions for elasticlunr.js
|
/// The searchoptions for elasticlunr.js
|
||||||
searchoptions: SearchOptions,
|
search_options: SearchOptions,
|
||||||
|
/// Used to lookup a document's URL from an integer document ref.
|
||||||
|
doc_urls: Vec<String>,
|
||||||
/// The index for elasticlunr.js
|
/// The index for elasticlunr.js
|
||||||
index: elasticlunr::Index,
|
index: elasticlunr::Index,
|
||||||
}
|
}
|
||||||
|
@ -185,7 +196,7 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
|
||||||
opt.boost = Some(search_config.boost_hierarchy);
|
opt.boost = Some(search_config.boost_hierarchy);
|
||||||
fields.insert("breadcrumbs".into(), opt);
|
fields.insert("breadcrumbs".into(), opt);
|
||||||
|
|
||||||
let searchoptions = SearchOptions {
|
let search_options = SearchOptions {
|
||||||
bool: if search_config.use_boolean_and {
|
bool: if search_config.use_boolean_and {
|
||||||
SearchBool::And
|
SearchBool::And
|
||||||
} else {
|
} else {
|
||||||
|
@ -195,14 +206,15 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
|
||||||
fields,
|
fields,
|
||||||
};
|
};
|
||||||
|
|
||||||
let resultsoptions = ResultsOptions {
|
let results_options = ResultsOptions {
|
||||||
limit_results: search_config.limit_results,
|
limit_results: search_config.limit_results,
|
||||||
teaser_word_count: search_config.teaser_word_count,
|
teaser_word_count: search_config.teaser_word_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
let json_contents = SearchindexJson {
|
let json_contents = SearchindexJson {
|
||||||
resultsoptions,
|
results_options,
|
||||||
searchoptions,
|
search_options,
|
||||||
|
doc_urls,
|
||||||
index,
|
index,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -211,7 +223,7 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
|
||||||
let json_contents = serde_json::to_value(&json_contents)?;
|
let json_contents = serde_json::to_value(&json_contents)?;
|
||||||
let json_contents = serde_json::to_string(&json_contents)?;
|
let json_contents = serde_json::to_string(&json_contents)?;
|
||||||
|
|
||||||
Ok(format!("window.search = {};", json_contents))
|
Ok(json_contents)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clean_html(html: &str) -> String {
|
fn clean_html(html: &str) -> String {
|
||||||
|
|
|
@ -27,11 +27,12 @@ window.search = window.search || {};
|
||||||
content = document.getElementById('content'),
|
content = document.getElementById('content'),
|
||||||
|
|
||||||
searchindex = null,
|
searchindex = null,
|
||||||
resultsoptions = {
|
doc_urls = [],
|
||||||
|
results_options = {
|
||||||
teaser_word_count: 30,
|
teaser_word_count: 30,
|
||||||
limit_results: 30,
|
limit_results: 30,
|
||||||
},
|
},
|
||||||
searchoptions = {
|
search_options = {
|
||||||
bool: "AND",
|
bool: "AND",
|
||||||
expand: true,
|
expand: true,
|
||||||
fields: {
|
fields: {
|
||||||
|
@ -139,7 +140,7 @@ window.search = window.search || {};
|
||||||
teaser_count++;
|
teaser_count++;
|
||||||
|
|
||||||
// The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor
|
// The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor
|
||||||
var url = result.ref.split("#");
|
var url = doc_urls[result.ref].split("#");
|
||||||
if (url.length == 1) { // no anchor found
|
if (url.length == 1) { // no anchor found
|
||||||
url.push("");
|
url.push("");
|
||||||
}
|
}
|
||||||
|
@ -196,7 +197,7 @@ window.search = window.search || {};
|
||||||
}
|
}
|
||||||
|
|
||||||
var window_weight = [];
|
var window_weight = [];
|
||||||
var window_size = Math.min(weighted.length, resultsoptions.teaser_word_count);
|
var window_size = Math.min(weighted.length, results_options.teaser_word_count);
|
||||||
|
|
||||||
var cur_sum = 0;
|
var cur_sum = 0;
|
||||||
for (var wordindex = 0; wordindex < window_size; wordindex++) {
|
for (var wordindex = 0; wordindex < window_size; wordindex++) {
|
||||||
|
@ -246,11 +247,12 @@ window.search = window.search || {};
|
||||||
return teaser_split.join('');
|
return teaser_split.join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
function init() {
|
function init(config) {
|
||||||
resultsoptions = window.search.resultsoptions;
|
results_options = config.results_options;
|
||||||
searchoptions = window.search.searchoptions;
|
search_options = config.search_options;
|
||||||
searchbar_outer = window.search.searchbar_outer;
|
searchbar_outer = config.searchbar_outer;
|
||||||
searchindex = elasticlunr.Index.load(window.search.index);
|
doc_urls = config.doc_urls;
|
||||||
|
searchindex = elasticlunr.Index.load(config.index);
|
||||||
|
|
||||||
// Set up events
|
// Set up events
|
||||||
searchicon.addEventListener('click', function(e) { searchIconClickHandler(); }, false);
|
searchicon.addEventListener('click', function(e) { searchIconClickHandler(); }, false);
|
||||||
|
@ -441,8 +443,8 @@ window.search = window.search || {};
|
||||||
if (searchindex == null) { return; }
|
if (searchindex == null) { return; }
|
||||||
|
|
||||||
// Do the actual search
|
// Do the actual search
|
||||||
var results = searchindex.search(searchterm, searchoptions);
|
var results = searchindex.search(searchterm, search_options);
|
||||||
var resultcount = Math.min(results.length, resultsoptions.limit_results);
|
var resultcount = Math.min(results.length, results_options.limit_results);
|
||||||
|
|
||||||
// Display search metrics
|
// Display search metrics
|
||||||
searchresults_header.innerText = formatSearchMetric(resultcount, searchterm);
|
searchresults_header.innerText = formatSearchMetric(resultcount, searchterm);
|
||||||
|
@ -460,7 +462,16 @@ window.search = window.search || {};
|
||||||
showResults(true);
|
showResults(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
init();
|
fetch(path_to_root + 'searchindex.json')
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(json => init(json))
|
||||||
|
.catch(error => { // Try to load searchindex.js if fetch failed
|
||||||
|
var script = document.createElement('script');
|
||||||
|
script.src = path_to_root + 'searchindex.js';
|
||||||
|
script.onload = () => init(window.search);
|
||||||
|
document.head.appendChild(script);
|
||||||
|
});
|
||||||
|
|
||||||
// Exported functions
|
// Exported functions
|
||||||
search.hasFocus = hasFocus;
|
search.hasFocus = hasFocus;
|
||||||
})(window.search);
|
})(window.search);
|
||||||
|
|
|
@ -426,25 +426,39 @@ mod search {
|
||||||
|
|
||||||
let index = read_book_index(temp.path());
|
let index = read_book_index(temp.path());
|
||||||
|
|
||||||
|
let doc_urls = index["doc_urls"].as_array().unwrap();
|
||||||
|
let get_doc_ref = |url: &str| -> String {
|
||||||
|
doc_urls.iter()
|
||||||
|
.position(|s| s == url)
|
||||||
|
.unwrap()
|
||||||
|
.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let first_chapter = get_doc_ref("first/index.html#first-chapter");
|
||||||
|
let introduction = get_doc_ref("intro.html#introduction");
|
||||||
|
let some_section = get_doc_ref("first/index.html#some-section");
|
||||||
|
let summary = get_doc_ref("first/includes.html#summary");
|
||||||
|
let conclusion = get_doc_ref("conclusion.html#conclusion");
|
||||||
|
|
||||||
let bodyidx = &index["index"]["index"]["body"]["root"];
|
let bodyidx = &index["index"]["index"]["body"]["root"];
|
||||||
let textidx = &bodyidx["t"]["e"]["x"]["t"];
|
let textidx = &bodyidx["t"]["e"]["x"]["t"];
|
||||||
assert_eq!(textidx["df"], 2);
|
assert_eq!(textidx["df"], 2);
|
||||||
assert_eq!(textidx["docs"]["first/index.html#first-chapter"]["tf"], 1.0);
|
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
|
||||||
assert_eq!(textidx["docs"]["intro.html#introduction"]["tf"], 1.0);
|
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
|
||||||
|
|
||||||
let docs = &index["index"]["documentStore"]["docs"];
|
let docs = &index["index"]["documentStore"]["docs"];
|
||||||
assert_eq!(docs["first/index.html#first-chapter"]["body"], "more text.");
|
assert_eq!(docs[&first_chapter]["body"], "more text.");
|
||||||
assert_eq!(docs["first/index.html#some-section"]["body"], "");
|
assert_eq!(docs[&some_section]["body"], "");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs["first/includes.html#summary"]["body"],
|
docs[&summary]["body"],
|
||||||
"Introduction First Chapter Nested Chapter Includes Recursive Second Chapter Conclusion"
|
"Introduction First Chapter Nested Chapter Includes Recursive Second Chapter Conclusion"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs["first/includes.html#summary"]["breadcrumbs"],
|
docs[&summary]["breadcrumbs"],
|
||||||
"First Chapter » Summary"
|
"First Chapter » Summary"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs["conclusion.html#conclusion"]["body"],
|
docs[&conclusion]["body"],
|
||||||
"I put <HTML> in here!"
|
"I put <HTML> in here!"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -452,7 +466,7 @@ mod search {
|
||||||
// Setting this to `true` may cause issues with `cargo watch`,
|
// Setting this to `true` may cause issues with `cargo watch`,
|
||||||
// since it may not finish writing the fixture before the tests
|
// since it may not finish writing the fixture before the tests
|
||||||
// are run again.
|
// are run again.
|
||||||
const GENERATE_FIXTURE: bool = true;
|
const GENERATE_FIXTURE: bool = false;
|
||||||
|
|
||||||
fn get_fixture() -> serde_json::Value {
|
fn get_fixture() -> serde_json::Value {
|
||||||
if GENERATE_FIXTURE {
|
if GENERATE_FIXTURE {
|
||||||
|
@ -481,7 +495,7 @@ mod search {
|
||||||
//
|
//
|
||||||
// If you're pretty sure you haven't broken anything, change `GENERATE_FIXTURE`
|
// If you're pretty sure you haven't broken anything, change `GENERATE_FIXTURE`
|
||||||
// above to `true`, and run `cargo test` to generate a new fixture. Then
|
// above to `true`, and run `cargo test` to generate a new fixture. Then
|
||||||
// change it back to `false`. Include the changed `searchindex_fixture.json` in your commit.
|
// **change it back to `false`**. Include the changed `searchindex_fixture.json` in your commit.
|
||||||
#[test]
|
#[test]
|
||||||
fn search_index_hasnt_changed_accidentally() {
|
fn search_index_hasnt_changed_accidentally() {
|
||||||
let temp = DummyBook::new().build().unwrap();
|
let temp = DummyBook::new().build().unwrap();
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue