Merge pull request #1828 from rust-lang/revert-1809-2022-05_searchindex

Revert "Omit words longer than 80 characters from the search index"
2022-06-22 13:50:01 +02:00 · 2022-06-22 13:50:01 +02:00 · 1056b8361c
parent 93aee6419e a5f861bf2b
commit 1056b8361c
4 changed files with 4 additions and 17 deletions
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@ -13,8 +13,6 @@ use crate::utils;
 use serde::Serialize;
 const MAX_WORD_LENGTH_TO_INDEX: usize = 80;
 /// Creates all files required for search.
 pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
    let mut index = Index::new(&["title", "body", "breadcrumbs"]);
@ -46,15 +44,6 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
    Ok(())
 }
 /// Tokenizes in the same way as elasticlunr-rs (for English), but also drops long tokens.
 fn tokenize(text: &str) -> Vec<String> {
    text.split(|c: char| c.is_whitespace() || c == '-')
        .filter(|s| !s.is_empty())
        .map(|s| s.trim().to_lowercase())
        .filter(|s| s.len() <= MAX_WORD_LENGTH_TO_INDEX)
        .collect()
 }
 /// Uses the given arguments to construct a search document, then inserts it to the given index.
 fn add_doc(
    index: &mut Index,
@ -73,7 +62,7 @@ fn add_doc(
    doc_urls.push(url.into());
    let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
-    index.add_doc_with_tokenizer(&doc_ref, items, tokenize);
+    index.add_doc(&doc_ref, items);
 }
 /// Renders markdown into flat unformatted text and adds it to the search index.
--- a/tests/dummy_book/src/first/no-headers.md
+++ b/tests/dummy_book/src/first/no-headers.md
@ -1,5 +1,3 @@
 Capybara capybara capybara.
-Capybara capybara capybara.
+Capybara capybara capybara.
 ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@ -772,7 +772,7 @@ mod search {
        );
        assert_eq!(
            docs[&no_headers]["body"],
-            "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex."
+            "Capybara capybara capybara. Capybara capybara capybara."
        );
    }
--- a/tests/searchindex_fixture.json
+++ b/tests/searchindex_fixture.json
@ -229,7 +229,7 @@
          "title": "Unicode stress tests"
        },
        "18": {
-          "body": "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.",
+          "body": "Capybara capybara capybara. Capybara capybara capybara.",
          "breadcrumbs": "First Chapter » No Headers",
          "id": "18",
          "title": "First Chapter"