From fe1927f344da5b698dd3c4a45cb98c9095a7f58d Mon Sep 17 00:00:00 2001 From: Zhou Yue Date: Sun, 4 Apr 2021 03:50:41 +0800 Subject: [PATCH] chinese search support --- Cargo.toml | 2 +- src/renderer/html_handlebars/hbs_renderer.rs | 2 +- src/renderer/html_handlebars/search.rs | 20 ++++- src/theme/index.hbs | 1 + src/theme/searcher/lunr.zh.js | 89 ++++++++++++++++++++ src/theme/searcher/mod.rs | 1 + src/theme/searcher/searcher.js | 33 ++++---- 7 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 src/theme/searcher/lunr.zh.js diff --git a/Cargo.toml b/Cargo.toml index 6f5b364c..8cf1f9aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ tokio = { version = "0.2.18", features = ["macros"], optional = true } warp = { version = "0.2.2", default-features = false, features = ["websocket"], optional = true } # Search feature -elasticlunr-rs = { version = "2.3", optional = true, default-features = false } +elasticlunr-rs = { version = "2.3", optional = true } ammonia = { version = "3", optional = true } [dev-dependencies] diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index 254f189f..b340a637 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -550,7 +550,7 @@ impl Renderer for HtmlHandlebars { { let search = html_config.search.unwrap_or_default(); if search.enable { - super::search::create_files(&search, &destination, &book)?; + super::search::create_files(&search, &ctx.config.book.language, &destination, &book)?; } } diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs index 8ee9b0af..97266c64 100644 --- a/src/renderer/html_handlebars/search.rs +++ b/src/renderer/html_handlebars/search.rs @@ -12,8 +12,23 @@ use crate::theme::searcher; use crate::utils; /// Creates all files required for search. -pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> { - let mut index = Index::new(&["title", "body", "breadcrumbs"]); +pub fn create_files( + search_config: &Search, + lang: &Option, + destination: &Path, + book: &Book, +) -> Result<()> { + let mut index = match lang { + Some(lang_str) => match lang_str.to_lowercase().as_str() { + "zh" => Index::with_language( + elasticlunr::Language::Chinese, + &["title", "body", "breadcrumbs"], + ), + _ => Index::new(&["title", "body", "breadcrumbs"]), + }, + None => Index::new(&["title", "body", "breadcrumbs"]), + }; + let mut doc_urls = Vec::with_capacity(book.sections.len()); for item in book.iter() { @@ -36,6 +51,7 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> utils::fs::write_file(destination, "searcher.js", searcher::JS)?; utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?; utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?; + utils::fs::write_file(destination, "lunr.zh.js", searcher::LUNR_ZH_JS)?; debug!("Copying search files ✓"); } diff --git a/src/theme/index.hbs b/src/theme/index.hbs index e9e6cff8..7b95a181 100644 --- a/src/theme/index.hbs +++ b/src/theme/index.hbs @@ -271,6 +271,7 @@ {{#if search_js}} + {{/if}} diff --git a/src/theme/searcher/lunr.zh.js b/src/theme/searcher/lunr.zh.js new file mode 100644 index 00000000..32c12727 --- /dev/null +++ b/src/theme/searcher/lunr.zh.js @@ -0,0 +1,89 @@ +(function (root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } +}(this, function () { + return function (lunr) { + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* register specific locale function */ + lunr.zh = function () { + this.pipeline.reset(); + this.pipeline.add( + lunr.zh.trimmer, + lunr.zh.stopWordFilter, + lunr.zh.stemmer + ); + + // for lunr version 2 + // this is necessary so that every searched word is also stemmed before + // in lunr <= 1 this is not needed, as it is done using the normal pipeline + if (this.searchPipeline) { + this.searchPipeline.reset(); + this.searchPipeline.add(lunr.zh.stemmer) + } + }; + + lunr.zh.tokenizer = function (str) { + if (!arguments.length || str === null || str === undefined) return []; + if (Array.isArray(str)) { + var arr = str.filter(function (token) { + if (token === null || token === undefined) { + return false; + } + + return true; + }); + + arr = arr.map(function (t) { + return lunr.utils.toString(t); + }); + + var out = []; + arr.forEach(function (item) { + var tokens = item.split(lunr.tokenizer.seperator); + out = out.concat(tokens); + }, this); + + return out; + } + + return str.toString().trim().split(lunr.tokenizer.seperator); + }; + + + /* lunr trimmer function */ + lunr.zh.trimmer = function (_token) { + return _token; + } + + lunr.Pipeline.registerFunction(lunr.zh.trimmer, 'trimmer-zh'); + + /* lunr stemmer function */ + lunr.zh.stemmer = (function () { + /* and return a function that stems a word for the current locale */ + return function (token) { + return token; + } + })(); + lunr.Pipeline.registerFunction(lunr.zh.stemmer, 'stemmer-zh'); + + lunr.zh.stopWordFilter = function (token) { + return token; + }; + lunr.Pipeline.registerFunction(lunr.zh.stopWordFilter, 'stopWordFilter-zh'); + }; +})) \ No newline at end of file diff --git a/src/theme/searcher/mod.rs b/src/theme/searcher/mod.rs index d5029db1..f2819f20 100644 --- a/src/theme/searcher/mod.rs +++ b/src/theme/searcher/mod.rs @@ -4,3 +4,4 @@ pub static JS: &[u8] = include_bytes!("searcher.js"); pub static MARK_JS: &[u8] = include_bytes!("mark.min.js"); pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js"); +pub static LUNR_ZH_JS: &[u8] = include_bytes!("lunr.zh.js"); diff --git a/src/theme/searcher/searcher.js b/src/theme/searcher/searcher.js index d2b0aeed..ea712f00 100644 --- a/src/theme/searcher/searcher.js +++ b/src/theme/searcher/searcher.js @@ -89,7 +89,7 @@ window.search = window.search || {}; path: a.pathname.replace(/^([^/])/,'/$1') }; } - + // Helper to recreate a url string from its building blocks. function renderURL(urlobject) { var url = urlobject.protocol + "://" + urlobject.host; @@ -124,7 +124,7 @@ window.search = window.search || {}; return s.replace(/[&<>'"]/g, repl); }; })(); - + function formatSearchMetric(count, searchterm) { if (count == 1) { return count + " search result for '" + searchterm + "':"; @@ -134,7 +134,7 @@ window.search = window.search || {}; return count + " search results for '" + searchterm + "':"; } } - + function formatSearchResult(result, searchterms) { var teaser = makeTeaser(escapeHTML(result.doc.body), searchterms); teaser_count++; @@ -152,10 +152,10 @@ window.search = window.search || {}; return '' + result.doc.breadcrumbs + '' - + '' + + '' + teaser + ''; } - + function makeTeaser(body, searchterms) { // The strategy is as follows: // First, assign a value to each word in the document: @@ -257,6 +257,9 @@ window.search = window.search || {}; search_options = config.search_options; searchbar_outer = config.searchbar_outer; doc_urls = config.doc_urls; + if (config.index.lang == "Chinese") { + elasticlunr.tokenizer = elasticlunr.zh.tokenizer + } searchindex = elasticlunr.Index.load(config.index); // Set up events @@ -271,7 +274,7 @@ window.search = window.search || {}; // If reloaded, do the search or mark again, depending on the current url parameters doSearchOrMarkFromUrl(); } - + function unfocusSearchbar() { // hacky, but just focusing a div only works once var tmp = document.createElement('input'); @@ -280,7 +283,7 @@ window.search = window.search || {}; tmp.focus(); tmp.remove(); } - + // On reload or browser history backwards/forwards events, parse the url and do search or mark function doSearchOrMarkFromUrl() { // Check current URL for search request @@ -313,7 +316,7 @@ window.search = window.search || {}; } } } - + // Eventhandler for keyevents on `document` function globalKeyHandler(e) { if (e.altKey || e.ctrlKey || e.metaKey || e.shiftKey || e.target.type === 'textarea' || e.target.type === 'text') { return; } @@ -338,8 +341,8 @@ window.search = window.search || {}; unfocusSearchbar(); searchresults.firstElementChild.classList.add("focus"); } else if (!hasFocus() && (e.keyCode === DOWN_KEYCODE - || e.keyCode === UP_KEYCODE - || e.keyCode === SELECT_KEYCODE)) { + || e.keyCode === UP_KEYCODE + || e.keyCode === SELECT_KEYCODE)) { // not `:focus` because browser does annoying scrolling var focused = searchresults.querySelector("li.focus"); if (!focused) return; @@ -363,7 +366,7 @@ window.search = window.search || {}; } } } - + function showSearch(yes) { if (yes) { search_wrap.classList.remove('hidden'); @@ -396,7 +399,7 @@ window.search = window.search || {}; showSearch(false); } } - + // Eventhandler for keyevents while the searchbar is focused function searchbarKeyUpHandler() { var searchterm = searchbar.value.trim(); @@ -414,7 +417,7 @@ window.search = window.search || {}; // Remove marks marker.unmark(); } - + // Update current url with ?URL_SEARCH_PARAM= parameter, remove ?URL_MARK_PARAM and #heading-anchor . // `action` can be one of "push", "replace", "push_if_new_search_else_replace" // and replaces or pushes a new browser history item. @@ -439,7 +442,7 @@ window.search = window.search || {}; history.replaceState({}, document.title, renderURL(url)); } } - + function doSearch(searchterm) { // Don't search the same twice @@ -470,7 +473,7 @@ window.search = window.search || {}; fetch(path_to_root + 'searchindex.json') .then(response => response.json()) - .then(json => init(json)) + .then(json => init(json)) .catch(error => { // Try to load searchindex.js if fetch failed var script = document.createElement('script'); script.src = path_to_root + 'searchindex.js';