Render markdown into searchindex for elasticlunr. (feature gated because nightly requirement)
This commit is contained in:
parent
893dc39b60
commit
18a1dc08c8
|
@ -29,6 +29,7 @@ toml = "0.4"
|
||||||
open = "1.1"
|
open = "1.1"
|
||||||
regex = "0.2.1"
|
regex = "0.2.1"
|
||||||
tempdir = "0.3.4"
|
tempdir = "0.3.4"
|
||||||
|
elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true}
|
||||||
|
|
||||||
# Watch feature
|
# Watch feature
|
||||||
notify = { version = "4.0", optional = true }
|
notify = { version = "4.0", optional = true }
|
||||||
|
@ -55,6 +56,7 @@ output = []
|
||||||
regenerate-css = []
|
regenerate-css = []
|
||||||
watch = ["notify", "time", "crossbeam"]
|
watch = ["notify", "time", "crossbeam"]
|
||||||
serve = ["iron", "staticfile", "ws"]
|
serve = ["iron", "staticfile", "ws"]
|
||||||
|
searchindex = ["elasticlunr"]
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
doc = false
|
doc = false
|
||||||
|
|
|
@ -88,6 +88,8 @@ extern crate serde_derive;
|
||||||
extern crate serde_json;
|
extern crate serde_json;
|
||||||
extern crate tempdir;
|
extern crate tempdir;
|
||||||
extern crate toml;
|
extern crate toml;
|
||||||
|
#[cfg(feature = "searchindex")]
|
||||||
|
extern crate elasticlunr;
|
||||||
|
|
||||||
mod parse;
|
mod parse;
|
||||||
mod preprocess;
|
mod preprocess;
|
||||||
|
|
|
@ -9,6 +9,9 @@ use theme::{Theme, playpen_editor};
|
||||||
use errors::*;
|
use errors::*;
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Regex};
|
||||||
|
|
||||||
|
#[cfg(feature = "searchindex")]
|
||||||
|
use elasticlunr;
|
||||||
|
|
||||||
use std::ascii::AsciiExt;
|
use std::ascii::AsciiExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
|
@ -31,7 +34,8 @@ impl HtmlHandlebars {
|
||||||
fn render_item(&self,
|
fn render_item(&self,
|
||||||
item: &BookItem,
|
item: &BookItem,
|
||||||
mut ctx: RenderItemContext,
|
mut ctx: RenderItemContext,
|
||||||
print_content: &mut String)
|
print_content: &mut String,
|
||||||
|
search_documents : &mut Vec<utils::SearchDocument>)
|
||||||
-> Result<()> {
|
-> Result<()> {
|
||||||
// FIXME: This should be made DRY-er and rely less on mutable state
|
// FIXME: This should be made DRY-er and rely less on mutable state
|
||||||
match *item {
|
match *item {
|
||||||
|
@ -42,6 +46,15 @@ impl HtmlHandlebars {
|
||||||
let content = utils::fs::file_to_string(&path)?;
|
let content = utils::fs::file_to_string(&path)?;
|
||||||
let base = path.parent()
|
let base = path.parent()
|
||||||
.ok_or_else(|| String::from("Invalid bookitem path!"))?;
|
.ok_or_else(|| String::from("Invalid bookitem path!"))?;
|
||||||
|
let path = ch.path.to_str().ok_or_else(|| {
|
||||||
|
io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
utils::render_markdown_into_searchindex(search_documents,
|
||||||
|
&content,
|
||||||
|
path,
|
||||||
|
&vec![],
|
||||||
|
id_from_content);
|
||||||
|
|
||||||
// Parse and expand links
|
// Parse and expand links
|
||||||
let content = preprocess::links::replace_all(&content, base)?;
|
let content = preprocess::links::replace_all(&content, base)?;
|
||||||
|
@ -49,11 +62,6 @@ impl HtmlHandlebars {
|
||||||
print_content.push_str(&content);
|
print_content.push_str(&content);
|
||||||
|
|
||||||
// Update the context with data for this file
|
// Update the context with data for this file
|
||||||
let path = ch.path.to_str().ok_or_else(|| {
|
|
||||||
io::Error::new(io::ErrorKind::Other,
|
|
||||||
"Could not convert path \
|
|
||||||
to str")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Non-lexical lifetimes needed :'(
|
// Non-lexical lifetimes needed :'(
|
||||||
let title: String;
|
let title: String;
|
||||||
|
@ -264,6 +272,9 @@ impl Renderer for HtmlHandlebars {
|
||||||
// Print version
|
// Print version
|
||||||
let mut print_content = String::new();
|
let mut print_content = String::new();
|
||||||
|
|
||||||
|
// Search index
|
||||||
|
let mut search_documents = vec![];
|
||||||
|
|
||||||
// TODO: The Renderer trait should really pass in where it wants us to build to...
|
// TODO: The Renderer trait should really pass in where it wants us to build to...
|
||||||
let destination = book.get_destination();
|
let destination = book.get_destination();
|
||||||
|
|
||||||
|
@ -280,9 +291,12 @@ impl Renderer for HtmlHandlebars {
|
||||||
is_index: i == 0,
|
is_index: i == 0,
|
||||||
html_config: html_config.clone(),
|
html_config: html_config.clone(),
|
||||||
};
|
};
|
||||||
self.render_item(item, ctx, &mut print_content)?;
|
self.render_item(item, ctx, &mut print_content, &mut search_documents)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Search index
|
||||||
|
make_searchindex(book, &search_documents)?;
|
||||||
|
|
||||||
// Print version
|
// Print version
|
||||||
self.configure_print_version(&mut data, &print_content);
|
self.configure_print_version(&mut data, &print_content);
|
||||||
if let Some(ref title) = book.config.book.title {
|
if let Some(ref title) = book.config.book.title {
|
||||||
|
@ -300,7 +314,7 @@ impl Renderer for HtmlHandlebars {
|
||||||
|
|
||||||
book.write_file(Path::new("print").with_extension("html"),
|
book.write_file(Path::new("print").with_extension("html"),
|
||||||
&rendered.into_bytes())?;
|
&rendered.into_bytes())?;
|
||||||
info!("[*] Creating print.html ✓");
|
info!("[*] Creating \"print.html\" ✓");
|
||||||
|
|
||||||
// Copy static files (js, css, images, ...)
|
// Copy static files (js, css, images, ...)
|
||||||
debug!("[*] Copy static files");
|
debug!("[*] Copy static files");
|
||||||
|
@ -619,6 +633,26 @@ pub fn normalize_id(content: &str) -> String {
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "searchindex"))]
|
||||||
|
fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "searchindex")]
|
||||||
|
fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
|
||||||
|
let mut index = elasticlunr::IndexBuilder::new();
|
||||||
|
for sd in search_documents {
|
||||||
|
index.add_document(&sd.title, &sd.body);
|
||||||
|
}
|
||||||
|
|
||||||
|
book.write_file(
|
||||||
|
Path::new("searchindex").with_extension("json"),
|
||||||
|
&index.to_json().as_bytes(),
|
||||||
|
)?;
|
||||||
|
info!("[*] Creating \"searchindex.json\" ✓");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
100
src/utils/mod.rs
100
src/utils/mod.rs
|
@ -3,7 +3,107 @@ pub mod fs;
|
||||||
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
|
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
|
||||||
OPTION_ENABLE_TABLES};
|
OPTION_ENABLE_TABLES};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::fmt::Write;
|
||||||
|
use regex::Regex;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
/// A heading together with the successive content until the next heading will
|
||||||
|
/// make up one `SearchDocument`. It represents some independently searchable part of the book.
|
||||||
|
#[derive(Default, Debug)]
|
||||||
|
pub struct SearchDocument {
|
||||||
|
// Corresponding heading
|
||||||
|
pub title : String,
|
||||||
|
// Content: Flatted paragraphs, lists, code
|
||||||
|
pub body : String,
|
||||||
|
/// Needed information to generate a link to the corresponding title anchor
|
||||||
|
/// First part is the `reference_base` that should be the same for all documents that
|
||||||
|
/// came from the same `.md` file. The second part is derived from the heading of the search
|
||||||
|
/// document.
|
||||||
|
pub sref : (Rc<String>, Option<String>),
|
||||||
|
// Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
|
||||||
|
// as a human understandable path to the search document.
|
||||||
|
pub breadcrumbs : Vec<Rc<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchDocument {
|
||||||
|
fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument {
|
||||||
|
SearchDocument {
|
||||||
|
title : "".to_owned(),
|
||||||
|
body : "".to_owned(),
|
||||||
|
sref : (sref0.clone(), None),
|
||||||
|
breadcrumbs : bcs.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_content(&self) -> bool {
|
||||||
|
self.title.len() > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self, text : &str, to_title : bool) {
|
||||||
|
if to_title {
|
||||||
|
self.title.write_str(&text).unwrap();
|
||||||
|
} else {
|
||||||
|
self.body.write_str(&text).unwrap();
|
||||||
|
self.body.write_str(&" ").unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Renders markdown into flat unformatted text for usage in the search index.
|
||||||
|
/// Refer to the struct `SearchDocument`.
|
||||||
|
///
|
||||||
|
/// The field `sref` in the `SearchDocument` struct becomes
|
||||||
|
/// `(reference_base, Some(heading_to_sref("The Section Heading")))`
|
||||||
|
pub fn render_markdown_into_searchindex<F>(
|
||||||
|
search_documents: &mut Vec<SearchDocument>,
|
||||||
|
text: &str,
|
||||||
|
reference_base: &str,
|
||||||
|
breadcrumbs : &Vec<Rc<String>>,
|
||||||
|
heading_to_sref : F)
|
||||||
|
where F : Fn(&str) -> String {
|
||||||
|
|
||||||
|
let mut opts = Options::empty();
|
||||||
|
opts.insert(OPTION_ENABLE_TABLES);
|
||||||
|
opts.insert(OPTION_ENABLE_FOOTNOTES);
|
||||||
|
let p = Parser::new_ext(text, opts);
|
||||||
|
|
||||||
|
let reference_base = Rc::new(reference_base.to_owned());
|
||||||
|
let mut current = SearchDocument::new(&reference_base, breadcrumbs);
|
||||||
|
let mut in_header = false;
|
||||||
|
|
||||||
|
for event in p {
|
||||||
|
match event {
|
||||||
|
Event::Start(Tag::Header(i)) if i <= 3 => {
|
||||||
|
if current.has_content() {
|
||||||
|
search_documents.push(current);
|
||||||
|
}
|
||||||
|
current = SearchDocument::new(&reference_base, breadcrumbs);
|
||||||
|
in_header = true;
|
||||||
|
}
|
||||||
|
Event::End(Tag::Header(_)) => {
|
||||||
|
// Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
|
||||||
|
current.breadcrumbs.push(Rc::new(current.title.clone()));
|
||||||
|
current.sref.1 = Some(heading_to_sref(¤t.title));
|
||||||
|
in_header = false;
|
||||||
|
}
|
||||||
|
Event::Start(_) | Event::End(_) => {}
|
||||||
|
Event::Text(text) => {
|
||||||
|
current.add(&text, in_header);
|
||||||
|
}
|
||||||
|
Event::Html(html) | Event::InlineHtml(html) => {
|
||||||
|
current.body.write_str(&trim_html_tags(&html)).unwrap();
|
||||||
|
}
|
||||||
|
Event::FootnoteReference(_) => {}
|
||||||
|
Event::SoftBreak | Event::HardBreak => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
search_documents.push(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn trim_html_tags<'a>(text : &'a str) -> Cow<'a, str> {
|
||||||
|
let regex = Regex::new(r"<[^>]*?>").unwrap();
|
||||||
|
regex.replace_all(text, "")
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
///
|
///
|
||||||
|
|
Loading…
Reference in New Issue