From 18a1dc08c86c3e0aeae75a8e7753eb372b106458 Mon Sep 17 00:00:00 2001 From: Phaiax Date: Tue, 3 Oct 2017 00:09:10 +0200 Subject: [PATCH] Render markdown into searchindex for elasticlunr. (feature gated because nightly requirement) --- Cargo.toml | 2 + src/lib.rs | 8 +- src/renderer/html_handlebars/hbs_renderer.rs | 50 ++++++++-- src/utils/mod.rs | 100 +++++++++++++++++++ 4 files changed, 149 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index beea4abc..fbdeaff8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ toml = "0.4" open = "1.1" regex = "0.2.1" tempdir = "0.3.4" +elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true} # Watch feature notify = { version = "4.0", optional = true } @@ -55,6 +56,7 @@ output = [] regenerate-css = [] watch = ["notify", "time", "crossbeam"] serve = ["iron", "staticfile", "ws"] +searchindex = ["elasticlunr"] [[bin]] doc = false diff --git a/src/lib.rs b/src/lib.rs index 2cf5e3e7..cc2c7771 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,13 +25,13 @@ //! //! fn main() { //! let mut md = MDBook::new("my-book"); -//! +//! //! // tweak the book configuration a bit //! md.config.book.src = PathBuf::from("source"); //! md.config.book.build_dir = PathBuf::from("book"); -//! +//! //! // Render the book -//! md.build().unwrap(); +//! md.build().unwrap(); //! } //! ``` //! @@ -88,6 +88,8 @@ extern crate serde_derive; extern crate serde_json; extern crate tempdir; extern crate toml; +#[cfg(feature = "searchindex")] +extern crate elasticlunr; mod parse; mod preprocess; diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index f8186948..d0e2fc32 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -9,6 +9,9 @@ use theme::{Theme, playpen_editor}; use errors::*; use regex::{Captures, Regex}; +#[cfg(feature = "searchindex")] +use elasticlunr; + use std::ascii::AsciiExt; use std::path::{Path, PathBuf}; use std::fs::{self, File}; @@ -31,7 +34,8 @@ impl HtmlHandlebars { fn render_item(&self, item: &BookItem, mut ctx: RenderItemContext, - print_content: &mut String) + print_content: &mut String, + search_documents : &mut Vec) -> Result<()> { // FIXME: This should be made DRY-er and rely less on mutable state match *item { @@ -42,6 +46,15 @@ impl HtmlHandlebars { let content = utils::fs::file_to_string(&path)?; let base = path.parent() .ok_or_else(|| String::from("Invalid bookitem path!"))?; + let path = ch.path.to_str().ok_or_else(|| { + io::Error::new(io::ErrorKind::Other, "Could not convert path to str") + })?; + + utils::render_markdown_into_searchindex(search_documents, + &content, + path, + &vec![], + id_from_content); // Parse and expand links let content = preprocess::links::replace_all(&content, base)?; @@ -49,11 +62,6 @@ impl HtmlHandlebars { print_content.push_str(&content); // Update the context with data for this file - let path = ch.path.to_str().ok_or_else(|| { - io::Error::new(io::ErrorKind::Other, - "Could not convert path \ - to str") - })?; // Non-lexical lifetimes needed :'( let title: String; @@ -264,6 +272,9 @@ impl Renderer for HtmlHandlebars { // Print version let mut print_content = String::new(); + // Search index + let mut search_documents = vec![]; + // TODO: The Renderer trait should really pass in where it wants us to build to... let destination = book.get_destination(); @@ -280,9 +291,12 @@ impl Renderer for HtmlHandlebars { is_index: i == 0, html_config: html_config.clone(), }; - self.render_item(item, ctx, &mut print_content)?; + self.render_item(item, ctx, &mut print_content, &mut search_documents)?; } + // Search index + make_searchindex(book, &search_documents)?; + // Print version self.configure_print_version(&mut data, &print_content); if let Some(ref title) = book.config.book.title { @@ -300,7 +314,7 @@ impl Renderer for HtmlHandlebars { book.write_file(Path::new("print").with_extension("html"), &rendered.into_bytes())?; - info!("[*] Creating print.html ✓"); + info!("[*] Creating \"print.html\" ✓"); // Copy static files (js, css, images, ...) debug!("[*] Copy static files"); @@ -619,6 +633,26 @@ pub fn normalize_id(content: &str) -> String { .collect::() } +#[cfg(not(feature = "searchindex"))] +fn make_searchindex(_book: &MDBook, _search_documents : &Vec) -> Result<()> { + Ok(()) +} + +#[cfg(feature = "searchindex")] +fn make_searchindex(book: &MDBook, search_documents : &Vec) -> Result<()> { + let mut index = elasticlunr::IndexBuilder::new(); + for sd in search_documents { + index.add_document(&sd.title, &sd.body); + } + + book.write_file( + Path::new("searchindex").with_extension("json"), + &index.to_json().as_bytes(), + )?; + info!("[*] Creating \"searchindex.json\" ✓"); + + Ok(()) +} #[cfg(test)] mod tests { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 4c265dcc..fa77e0f5 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -3,7 +3,107 @@ pub mod fs; use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES, OPTION_ENABLE_TABLES}; use std::borrow::Cow; +use std::fmt::Write; +use regex::Regex; +use std::rc::Rc; +/// A heading together with the successive content until the next heading will +/// make up one `SearchDocument`. It represents some independently searchable part of the book. +#[derive(Default, Debug)] +pub struct SearchDocument { + // Corresponding heading + pub title : String, + // Content: Flatted paragraphs, lists, code + pub body : String, + /// Needed information to generate a link to the corresponding title anchor + /// First part is the `reference_base` that should be the same for all documents that + /// came from the same `.md` file. The second part is derived from the heading of the search + /// document. + pub sref : (Rc, Option), + // Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"] + // as a human understandable path to the search document. + pub breadcrumbs : Vec>, +} + +impl SearchDocument { + fn new(sref0 : &Rc, bcs : &Vec>) -> SearchDocument { + SearchDocument { + title : "".to_owned(), + body : "".to_owned(), + sref : (sref0.clone(), None), + breadcrumbs : bcs.clone() + } + } + + fn has_content(&self) -> bool { + self.title.len() > 0 + } + + fn add(&mut self, text : &str, to_title : bool) { + if to_title { + self.title.write_str(&text).unwrap(); + } else { + self.body.write_str(&text).unwrap(); + self.body.write_str(&" ").unwrap(); + } + } +} + +/// Renders markdown into flat unformatted text for usage in the search index. +/// Refer to the struct `SearchDocument`. +/// +/// The field `sref` in the `SearchDocument` struct becomes +/// `(reference_base, Some(heading_to_sref("The Section Heading")))` +pub fn render_markdown_into_searchindex( + search_documents: &mut Vec, + text: &str, + reference_base: &str, + breadcrumbs : &Vec>, + heading_to_sref : F) + where F : Fn(&str) -> String { + + let mut opts = Options::empty(); + opts.insert(OPTION_ENABLE_TABLES); + opts.insert(OPTION_ENABLE_FOOTNOTES); + let p = Parser::new_ext(text, opts); + + let reference_base = Rc::new(reference_base.to_owned()); + let mut current = SearchDocument::new(&reference_base, breadcrumbs); + let mut in_header = false; + + for event in p { + match event { + Event::Start(Tag::Header(i)) if i <= 3 => { + if current.has_content() { + search_documents.push(current); + } + current = SearchDocument::new(&reference_base, breadcrumbs); + in_header = true; + } + Event::End(Tag::Header(_)) => { + // Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs + current.breadcrumbs.push(Rc::new(current.title.clone())); + current.sref.1 = Some(heading_to_sref(¤t.title)); + in_header = false; + } + Event::Start(_) | Event::End(_) => {} + Event::Text(text) => { + current.add(&text, in_header); + } + Event::Html(html) | Event::InlineHtml(html) => { + current.body.write_str(&trim_html_tags(&html)).unwrap(); + } + Event::FootnoteReference(_) => {} + Event::SoftBreak | Event::HardBreak => {} + } + } + search_documents.push(current); +} + +fn trim_html_tags<'a>(text : &'a str) -> Cow<'a, str> { + let regex = Regex::new(r"<[^>]*?>").unwrap(); + regex.replace_all(text, "") +} /// ///