Render markdown into searchindex for elasticlunr. (feature gated because nightly requirement)
This commit is contained in:
parent
893dc39b60
commit
18a1dc08c8
|
@ -29,6 +29,7 @@ toml = "0.4"
|
|||
open = "1.1"
|
||||
regex = "0.2.1"
|
||||
tempdir = "0.3.4"
|
||||
elasticlunr = { git = "https://github.com/mattico/elasticlunr-rs", optional = true}
|
||||
|
||||
# Watch feature
|
||||
notify = { version = "4.0", optional = true }
|
||||
|
@ -55,6 +56,7 @@ output = []
|
|||
regenerate-css = []
|
||||
watch = ["notify", "time", "crossbeam"]
|
||||
serve = ["iron", "staticfile", "ws"]
|
||||
searchindex = ["elasticlunr"]
|
||||
|
||||
[[bin]]
|
||||
doc = false
|
||||
|
|
|
@ -88,6 +88,8 @@ extern crate serde_derive;
|
|||
extern crate serde_json;
|
||||
extern crate tempdir;
|
||||
extern crate toml;
|
||||
#[cfg(feature = "searchindex")]
|
||||
extern crate elasticlunr;
|
||||
|
||||
mod parse;
|
||||
mod preprocess;
|
||||
|
|
|
@ -9,6 +9,9 @@ use theme::{Theme, playpen_editor};
|
|||
use errors::*;
|
||||
use regex::{Captures, Regex};
|
||||
|
||||
#[cfg(feature = "searchindex")]
|
||||
use elasticlunr;
|
||||
|
||||
use std::ascii::AsciiExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::fs::{self, File};
|
||||
|
@ -31,7 +34,8 @@ impl HtmlHandlebars {
|
|||
fn render_item(&self,
|
||||
item: &BookItem,
|
||||
mut ctx: RenderItemContext,
|
||||
print_content: &mut String)
|
||||
print_content: &mut String,
|
||||
search_documents : &mut Vec<utils::SearchDocument>)
|
||||
-> Result<()> {
|
||||
// FIXME: This should be made DRY-er and rely less on mutable state
|
||||
match *item {
|
||||
|
@ -42,6 +46,15 @@ impl HtmlHandlebars {
|
|||
let content = utils::fs::file_to_string(&path)?;
|
||||
let base = path.parent()
|
||||
.ok_or_else(|| String::from("Invalid bookitem path!"))?;
|
||||
let path = ch.path.to_str().ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::Other, "Could not convert path to str")
|
||||
})?;
|
||||
|
||||
utils::render_markdown_into_searchindex(search_documents,
|
||||
&content,
|
||||
path,
|
||||
&vec![],
|
||||
id_from_content);
|
||||
|
||||
// Parse and expand links
|
||||
let content = preprocess::links::replace_all(&content, base)?;
|
||||
|
@ -49,11 +62,6 @@ impl HtmlHandlebars {
|
|||
print_content.push_str(&content);
|
||||
|
||||
// Update the context with data for this file
|
||||
let path = ch.path.to_str().ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::Other,
|
||||
"Could not convert path \
|
||||
to str")
|
||||
})?;
|
||||
|
||||
// Non-lexical lifetimes needed :'(
|
||||
let title: String;
|
||||
|
@ -264,6 +272,9 @@ impl Renderer for HtmlHandlebars {
|
|||
// Print version
|
||||
let mut print_content = String::new();
|
||||
|
||||
// Search index
|
||||
let mut search_documents = vec![];
|
||||
|
||||
// TODO: The Renderer trait should really pass in where it wants us to build to...
|
||||
let destination = book.get_destination();
|
||||
|
||||
|
@ -280,9 +291,12 @@ impl Renderer for HtmlHandlebars {
|
|||
is_index: i == 0,
|
||||
html_config: html_config.clone(),
|
||||
};
|
||||
self.render_item(item, ctx, &mut print_content)?;
|
||||
self.render_item(item, ctx, &mut print_content, &mut search_documents)?;
|
||||
}
|
||||
|
||||
// Search index
|
||||
make_searchindex(book, &search_documents)?;
|
||||
|
||||
// Print version
|
||||
self.configure_print_version(&mut data, &print_content);
|
||||
if let Some(ref title) = book.config.book.title {
|
||||
|
@ -300,7 +314,7 @@ impl Renderer for HtmlHandlebars {
|
|||
|
||||
book.write_file(Path::new("print").with_extension("html"),
|
||||
&rendered.into_bytes())?;
|
||||
info!("[*] Creating print.html ✓");
|
||||
info!("[*] Creating \"print.html\" ✓");
|
||||
|
||||
// Copy static files (js, css, images, ...)
|
||||
debug!("[*] Copy static files");
|
||||
|
@ -619,6 +633,26 @@ pub fn normalize_id(content: &str) -> String {
|
|||
.collect::<String>()
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "searchindex"))]
|
||||
fn make_searchindex(_book: &MDBook, _search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "searchindex")]
|
||||
fn make_searchindex(book: &MDBook, search_documents : &Vec<utils::SearchDocument>) -> Result<()> {
|
||||
let mut index = elasticlunr::IndexBuilder::new();
|
||||
for sd in search_documents {
|
||||
index.add_document(&sd.title, &sd.body);
|
||||
}
|
||||
|
||||
book.write_file(
|
||||
Path::new("searchindex").with_extension("json"),
|
||||
&index.to_json().as_bytes(),
|
||||
)?;
|
||||
info!("[*] Creating \"searchindex.json\" ✓");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
100
src/utils/mod.rs
100
src/utils/mod.rs
|
@ -3,7 +3,107 @@ pub mod fs;
|
|||
use pulldown_cmark::{html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES,
|
||||
OPTION_ENABLE_TABLES};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Write;
|
||||
use regex::Regex;
|
||||
use std::rc::Rc;
|
||||
|
||||
/// A heading together with the successive content until the next heading will
|
||||
/// make up one `SearchDocument`. It represents some independently searchable part of the book.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct SearchDocument {
|
||||
// Corresponding heading
|
||||
pub title : String,
|
||||
// Content: Flatted paragraphs, lists, code
|
||||
pub body : String,
|
||||
/// Needed information to generate a link to the corresponding title anchor
|
||||
/// First part is the `reference_base` that should be the same for all documents that
|
||||
/// came from the same `.md` file. The second part is derived from the heading of the search
|
||||
/// document.
|
||||
pub sref : (Rc<String>, Option<String>),
|
||||
// Breadcrumbs like ["Main Chapter Title", "Sub Chapter Title", "H1 Heading"]
|
||||
// as a human understandable path to the search document.
|
||||
pub breadcrumbs : Vec<Rc<String>>,
|
||||
}
|
||||
|
||||
impl SearchDocument {
|
||||
fn new(sref0 : &Rc<String>, bcs : &Vec<Rc<String>>) -> SearchDocument {
|
||||
SearchDocument {
|
||||
title : "".to_owned(),
|
||||
body : "".to_owned(),
|
||||
sref : (sref0.clone(), None),
|
||||
breadcrumbs : bcs.clone()
|
||||
}
|
||||
}
|
||||
|
||||
fn has_content(&self) -> bool {
|
||||
self.title.len() > 0
|
||||
}
|
||||
|
||||
fn add(&mut self, text : &str, to_title : bool) {
|
||||
if to_title {
|
||||
self.title.write_str(&text).unwrap();
|
||||
} else {
|
||||
self.body.write_str(&text).unwrap();
|
||||
self.body.write_str(&" ").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Renders markdown into flat unformatted text for usage in the search index.
|
||||
/// Refer to the struct `SearchDocument`.
|
||||
///
|
||||
/// The field `sref` in the `SearchDocument` struct becomes
|
||||
/// `(reference_base, Some(heading_to_sref("The Section Heading")))`
|
||||
pub fn render_markdown_into_searchindex<F>(
|
||||
search_documents: &mut Vec<SearchDocument>,
|
||||
text: &str,
|
||||
reference_base: &str,
|
||||
breadcrumbs : &Vec<Rc<String>>,
|
||||
heading_to_sref : F)
|
||||
where F : Fn(&str) -> String {
|
||||
|
||||
let mut opts = Options::empty();
|
||||
opts.insert(OPTION_ENABLE_TABLES);
|
||||
opts.insert(OPTION_ENABLE_FOOTNOTES);
|
||||
let p = Parser::new_ext(text, opts);
|
||||
|
||||
let reference_base = Rc::new(reference_base.to_owned());
|
||||
let mut current = SearchDocument::new(&reference_base, breadcrumbs);
|
||||
let mut in_header = false;
|
||||
|
||||
for event in p {
|
||||
match event {
|
||||
Event::Start(Tag::Header(i)) if i <= 3 => {
|
||||
if current.has_content() {
|
||||
search_documents.push(current);
|
||||
}
|
||||
current = SearchDocument::new(&reference_base, breadcrumbs);
|
||||
in_header = true;
|
||||
}
|
||||
Event::End(Tag::Header(_)) => {
|
||||
// Possible extension: Use h1,h2,h3 as hierarchy for the breadcrumbs
|
||||
current.breadcrumbs.push(Rc::new(current.title.clone()));
|
||||
current.sref.1 = Some(heading_to_sref(¤t.title));
|
||||
in_header = false;
|
||||
}
|
||||
Event::Start(_) | Event::End(_) => {}
|
||||
Event::Text(text) => {
|
||||
current.add(&text, in_header);
|
||||
}
|
||||
Event::Html(html) | Event::InlineHtml(html) => {
|
||||
current.body.write_str(&trim_html_tags(&html)).unwrap();
|
||||
}
|
||||
Event::FootnoteReference(_) => {}
|
||||
Event::SoftBreak | Event::HardBreak => {}
|
||||
}
|
||||
}
|
||||
search_documents.push(current);
|
||||
}
|
||||
|
||||
fn trim_html_tags<'a>(text : &'a str) -> Cow<'a, str> {
|
||||
let regex = Regex::new(r"<[^>]*?>").unwrap();
|
||||
regex.replace_all(text, "")
|
||||
}
|
||||
|
||||
///
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue