Use integer doc_refs to shrink the search index

This change reduced the searchindex.js of book_example from 508KB to 317KB.
This commit is contained in:
Matt Ickstadt 2018-06-13 14:09:31 -05:00
parent 8cd7061ff2
commit 019e74041d
2 changed files with 36 additions and 23 deletions

View File

@ -18,12 +18,13 @@ use theme::searcher;
/// Creates all files required for search. /// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> { pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
let mut index = Index::new(&["title", "body", "breadcrumbs"]); let mut index = Index::new(&["title", "body", "breadcrumbs"]);
let mut doc_urls = Vec::with_capacity(book.sections.len());
for item in book.iter() { for item in book.iter() {
render_item(&mut index, &search_config, item)?; render_item(&mut index, &search_config, &mut doc_urls, item)?;
} }
let index = write_to_js(index, &search_config)?; let index = write_to_js(index, &search_config, doc_urls)?;
debug!("Writing search index ✓"); debug!("Writing search index ✓");
if search_config.copy_js { if search_config.copy_js {
@ -38,26 +39,31 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
} }
/// Uses the given arguments to construct a search document, then inserts it to the given index. /// Uses the given arguments to construct a search document, then inserts it to the given index.
fn add_doc<'a>( fn add_doc(
index: &mut Index, index: &mut Index,
anchor_base: &'a str, doc_urls: &mut Vec<String>,
anchor_base: &str,
section_id: &Option<String>, section_id: &Option<String>,
items: &[&str], items: &[&str],
) { ) {
let doc_ref: Cow<'a, str> = if let &Some(ref id) = section_id { let doc_ref = if let &Some(ref id) = section_id {
format!("{}#{}", anchor_base, id).into() Cow::Owned(format!("{}#{}", anchor_base, id))
} else { } else {
anchor_base.into() Cow::Borrowed(anchor_base.into())
}; };
let doc_ref = utils::collapse_whitespace(doc_ref.trim()); let doc_ref = utils::collapse_whitespace(doc_ref.trim());
let key = doc_urls.len();
doc_urls.push(doc_ref.into());
let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim())); let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
index.add_doc(&doc_ref, items); index.add_doc(&key.to_string(), items);
} }
/// Renders markdown into flat unformatted text and adds it to the search index. /// Renders markdown into flat unformatted text and adds it to the search index.
fn render_item( fn render_item(
index: &mut Index, index: &mut Index,
search_config: &Search, search_config: &Search,
doc_urls: &mut Vec<String>,
item: &BookItem, item: &BookItem,
) -> Result<()> { ) -> Result<()> {
let chapter = match item { let chapter = match item {
@ -92,6 +98,7 @@ fn render_item(
// Write the data to the index, and clear it for the next section // Write the data to the index, and clear it for the next section
add_doc( add_doc(
index, index,
doc_urls,
&anchor_base, &anchor_base,
&section_id, &section_id,
&[&heading, &body, &breadcrumbs.join(" » ")], &[&heading, &body, &breadcrumbs.join(" » ")],
@ -144,6 +151,7 @@ fn render_item(
// Make sure the last section is added to the index // Make sure the last section is added to the index
add_doc( add_doc(
index, index,
doc_urls,
&anchor_base, &anchor_base,
&section_id, &section_id,
&[&heading, &body, &breadcrumbs.join(" » ")], &[&heading, &body, &breadcrumbs.join(" » ")],
@ -156,7 +164,7 @@ fn render_item(
/// Exports the index and search options to a JS script which stores the index in `window.search`. /// Exports the index and search options to a JS script which stores the index in `window.search`.
/// Using a JS script is a workaround for CORS in `file://` URIs. It also removes the need for /// Using a JS script is a workaround for CORS in `file://` URIs. It also removes the need for
/// downloading/parsing JSON in JS. /// downloading/parsing JSON in JS.
fn write_to_js(index: Index, search_config: &Search) -> Result<String> { fn write_to_js(index: Index, search_config: &Search, doc_urls: Vec<String>) -> Result<String> {
use std::collections::BTreeMap; use std::collections::BTreeMap;
use self::elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField}; use self::elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField};
@ -169,9 +177,11 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
#[derive(Serialize)] #[derive(Serialize)]
struct SearchindexJson { struct SearchindexJson {
/// The options used for displaying search results /// The options used for displaying search results
resultsoptions: ResultsOptions, results_options: ResultsOptions,
/// The searchoptions for elasticlunr.js /// The searchoptions for elasticlunr.js
searchoptions: SearchOptions, search_options: SearchOptions,
/// Used to lookup a document's URL from an integer document ref.
doc_urls: Vec<String>,
/// The index for elasticlunr.js /// The index for elasticlunr.js
index: elasticlunr::Index, index: elasticlunr::Index,
} }
@ -185,7 +195,7 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
opt.boost = Some(search_config.boost_hierarchy); opt.boost = Some(search_config.boost_hierarchy);
fields.insert("breadcrumbs".into(), opt); fields.insert("breadcrumbs".into(), opt);
let searchoptions = SearchOptions { let search_options = SearchOptions {
bool: if search_config.use_boolean_and { bool: if search_config.use_boolean_and {
SearchBool::And SearchBool::And
} else { } else {
@ -195,14 +205,15 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
fields, fields,
}; };
let resultsoptions = ResultsOptions { let results_options = ResultsOptions {
limit_results: search_config.limit_results, limit_results: search_config.limit_results,
teaser_word_count: search_config.teaser_word_count, teaser_word_count: search_config.teaser_word_count,
}; };
let json_contents = SearchindexJson { let json_contents = SearchindexJson {
resultsoptions, results_options,
searchoptions, search_options,
doc_urls,
index, index,
}; };

View File

@ -27,11 +27,12 @@ window.search = window.search || {};
content = document.getElementById('content'), content = document.getElementById('content'),
searchindex = null, searchindex = null,
resultsoptions = { doc_urls = [],
results_options = {
teaser_word_count: 30, teaser_word_count: 30,
limit_results: 30, limit_results: 30,
}, },
searchoptions = { search_options = {
bool: "AND", bool: "AND",
expand: true, expand: true,
fields: { fields: {
@ -139,7 +140,7 @@ window.search = window.search || {};
teaser_count++; teaser_count++;
// The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor // The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor
var url = result.ref.split("#"); var url = doc_urls[result.ref].split("#");
if (url.length == 1) { // no anchor found if (url.length == 1) { // no anchor found
url.push(""); url.push("");
} }
@ -196,7 +197,7 @@ window.search = window.search || {};
} }
var window_weight = []; var window_weight = [];
var window_size = Math.min(weighted.length, resultsoptions.teaser_word_count); var window_size = Math.min(weighted.length, results_options.teaser_word_count);
var cur_sum = 0; var cur_sum = 0;
for (var wordindex = 0; wordindex < window_size; wordindex++) { for (var wordindex = 0; wordindex < window_size; wordindex++) {
@ -247,9 +248,10 @@ window.search = window.search || {};
} }
function init() { function init() {
resultsoptions = window.search.resultsoptions; results_options = window.search.results_options;
searchoptions = window.search.searchoptions; search_options = window.search.search_options;
searchbar_outer = window.search.searchbar_outer; searchbar_outer = window.search.searchbar_outer;
doc_urls = window.search.doc_urls;
searchindex = elasticlunr.Index.load(window.search.index); searchindex = elasticlunr.Index.load(window.search.index);
// Set up events // Set up events
@ -441,8 +443,8 @@ window.search = window.search || {};
if (searchindex == null) { return; } if (searchindex == null) { return; }
// Do the actual search // Do the actual search
var results = searchindex.search(searchterm, searchoptions); var results = searchindex.search(searchterm, search_options);
var resultcount = Math.min(results.length, resultsoptions.limit_results); var resultcount = Math.min(results.length, results_options.limit_results);
// Display search metrics // Display search metrics
searchresults_header.innerText = formatSearchMetric(resultcount, searchterm); searchresults_header.innerText = formatSearchMetric(resultcount, searchterm);