From a5f861bf2b2dafa8b50c521ea0fb1432c749ab9d Mon Sep 17 00:00:00 2001 From: Dylan DPC <99973273+Dylan-DPC@users.noreply.github.com> Date: Wed, 22 Jun 2022 13:31:16 +0200 Subject: [PATCH] Revert "Omit words longer than 80 characters from the search index" --- src/renderer/html_handlebars/search.rs | 13 +------------ tests/dummy_book/src/first/no-headers.md | 4 +--- tests/rendered_output.rs | 2 +- tests/searchindex_fixture.json | 2 +- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs index b39569d4..0a59ffe9 100644 --- a/src/renderer/html_handlebars/search.rs +++ b/src/renderer/html_handlebars/search.rs @@ -13,8 +13,6 @@ use crate::utils; use serde::Serialize; -const MAX_WORD_LENGTH_TO_INDEX: usize = 80; - /// Creates all files required for search. pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> { let mut index = Index::new(&["title", "body", "breadcrumbs"]); @@ -46,15 +44,6 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Ok(()) } -/// Tokenizes in the same way as elasticlunr-rs (for English), but also drops long tokens. -fn tokenize(text: &str) -> Vec { - text.split(|c: char| c.is_whitespace() || c == '-') - .filter(|s| !s.is_empty()) - .map(|s| s.trim().to_lowercase()) - .filter(|s| s.len() <= MAX_WORD_LENGTH_TO_INDEX) - .collect() -} - /// Uses the given arguments to construct a search document, then inserts it to the given index. fn add_doc( index: &mut Index, @@ -73,7 +62,7 @@ fn add_doc( doc_urls.push(url.into()); let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim())); - index.add_doc_with_tokenizer(&doc_ref, items, tokenize); + index.add_doc(&doc_ref, items); } /// Renders markdown into flat unformatted text and adds it to the search index. diff --git a/tests/dummy_book/src/first/no-headers.md b/tests/dummy_book/src/first/no-headers.md index 5d799aa6..8f9a6d17 100644 --- a/tests/dummy_book/src/first/no-headers.md +++ b/tests/dummy_book/src/first/no-headers.md @@ -1,5 +1,3 @@ Capybara capybara capybara. -Capybara capybara capybara. - -ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex. +Capybara capybara capybara. \ No newline at end of file diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs index c6267830..873a622d 100644 --- a/tests/rendered_output.rs +++ b/tests/rendered_output.rs @@ -772,7 +772,7 @@ mod search { ); assert_eq!( docs[&no_headers]["body"], - "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex." + "Capybara capybara capybara. Capybara capybara capybara." ); } diff --git a/tests/searchindex_fixture.json b/tests/searchindex_fixture.json index 3d7062d2..9c349b6b 100644 --- a/tests/searchindex_fixture.json +++ b/tests/searchindex_fixture.json @@ -229,7 +229,7 @@ "title": "Unicode stress tests" }, "18": { - "body": "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.", + "body": "Capybara capybara capybara. Capybara capybara capybara.", "breadcrumbs": "First Chapter ยป No Headers", "id": "18", "title": "First Chapter"