mdBook/src/utils/mod.rs

405 lines
11 KiB
Rust
Raw Normal View History

#![allow(missing_docs)] // FIXME: Document this
pub mod fs;
mod string;
use errors::Error;
use regex::Regex;
2018-07-24 01:45:01 +08:00
use pulldown_cmark::{
html, Event, Options, Parser, Tag, OPTION_ENABLE_FOOTNOTES, OPTION_ENABLE_TABLES,
};
use std::borrow::Cow;
2018-07-24 01:45:01 +08:00
pub use self::string::{take_lines, RangeArgument};
/// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace<'a>(text: &'a str) -> Cow<'a, str> {
lazy_static! {
static ref RE: Regex = Regex::new(r"\s\s+").unwrap();
}
RE.replace_all(text, " ")
}
/// Convert the given string to a valid HTML element ID
pub fn normalize_id(content: &str) -> String {
let mut ret = content
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
} else if ch.is_whitespace() {
Some('-')
} else {
None
}
2018-08-21 23:58:44 +08:00
}).collect::<String>();
// Ensure that the first character is [A-Za-z]
if ret
.chars()
2018-07-24 01:45:01 +08:00
.next()
.map_or(false, |c| !c.is_ascii_alphabetic())
{
ret.insert(0, 'a');
}
ret
}
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
pub fn id_from_content(content: &str) -> String {
let mut content = content.to_string();
// Skip any tags or html-encoded stuff
2018-07-24 01:45:01 +08:00
const REPL_SUB: &[&str] = &[
"<em>",
"</em>",
"<code>",
"</code>",
"<strong>",
"</strong>",
"&lt;",
"&gt;",
"&amp;",
"&#39;",
"&quot;",
];
for sub in REPL_SUB {
content = content.replace(sub, "");
}
// Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_left_matches('#').trim();
normalize_id(trimmed)
}
fn adjust_links(event: Event) -> Event {
lazy_static! {
static ref HTTP_LINK: Regex = Regex::new("^https?://").unwrap();
static ref MD_LINK: Regex = Regex::new("(?P<link>.*).md(?P<anchor>#.*)?").unwrap();
}
match event {
Event::Start(Tag::Link(dest, title)) => {
if !HTTP_LINK.is_match(&dest) {
if let Some(caps) = MD_LINK.captures(&dest) {
let mut html_link = [&caps["link"], ".html"].concat();
if let Some(anchor) = caps.name("anchor") {
html_link.push_str(anchor.as_str());
}
2018-07-24 01:45:01 +08:00
return Event::Start(Tag::Link(Cow::from(html_link), title));
}
}
Event::Start(Tag::Link(dest, title))
2018-07-24 01:45:01 +08:00
}
_ => event,
}
}
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
let mut s = String::with_capacity(text.len() * 3 / 2);
let mut opts = Options::empty();
opts.insert(OPTION_ENABLE_TABLES);
opts.insert(OPTION_ENABLE_FOOTNOTES);
let p = Parser::new_ext(text, opts);
let mut converter = EventQuoteConverter::new(curly_quotes);
let events = p
.map(clean_codeblock_headers)
2018-07-24 01:45:01 +08:00
.map(adjust_links)
.map(|event| converter.convert(event));
html::push_html(&mut s, events);
s
}
struct EventQuoteConverter {
enabled: bool,
convert_text: bool,
}
impl EventQuoteConverter {
fn new(enabled: bool) -> Self {
EventQuoteConverter {
enabled: enabled,
convert_text: true,
}
}
fn convert<'a>(&mut self, event: Event<'a>) -> Event<'a> {
if !self.enabled {
return event;
}
match event {
Event::Start(Tag::CodeBlock(_)) | Event::Start(Tag::Code) => {
self.convert_text = false;
event
}
Event::End(Tag::CodeBlock(_)) | Event::End(Tag::Code) => {
self.convert_text = true;
event
}
Event::Text(ref text) if self.convert_text => {
Event::Text(Cow::from(convert_quotes_to_curly(text)))
}
_ => event,
}
}
}
fn clean_codeblock_headers(event: Event) -> Event {
match event {
Event::Start(Tag::CodeBlock(ref info)) => {
let info: String = info.chars().filter(|ch| !ch.is_whitespace()).collect();
Event::Start(Tag::CodeBlock(Cow::from(info)))
}
_ => event,
}
}
fn convert_quotes_to_curly(original_text: &str) -> String {
// We'll consider the start to be "whitespace".
let mut preceded_by_whitespace = true;
2018-07-24 01:45:01 +08:00
original_text
.chars()
.map(|original_char| {
let converted_char = match original_char {
'\'' => {
if preceded_by_whitespace {
''
} else {
''
}
}
2018-07-24 01:45:01 +08:00
'"' => {
if preceded_by_whitespace {
'“'
} else {
'”'
}
}
2018-07-24 01:45:01 +08:00
_ => original_char,
};
2018-07-24 01:45:01 +08:00
preceded_by_whitespace = original_char.is_whitespace();
2018-07-24 01:45:01 +08:00
converted_char
2018-08-21 23:58:44 +08:00
}).collect()
}
/// Prints a "backtrace" of some `Error`.
pub fn log_backtrace(e: &Error) {
error!("Error: {}", e);
for cause in e.iter().skip(1) {
error!("\tCaused By: {}", cause);
}
}
#[cfg(test)]
mod tests {
mod render_markdown {
use super::super::render_markdown;
#[test]
fn preserves_external_links() {
2018-07-24 01:45:01 +08:00
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", false),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
2018-07-24 01:45:01 +08:00
assert_eq!(
render_markdown("[example](example.md)", false),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", false),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
}
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
}
#[test]
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
let input = r#"
'one'
```
'two'
```
`'three'` 'four'"#;
let expected = r#"<p>one</p>
<pre><code>'two'
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn whitespace_outside_of_codeblock_header_is_preserved() {
let input = r#"
some text with spaces
```rust
fn main() {
// code inside is unchanged
}
```
more text with spaces
"#;
let expected = r#"<p>some text with spaces</p>
<pre><code class="language-rust">fn main() {
// code inside is unchanged
}
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let input = r#"
```rust,no_run,should_panic,property_3
```
"#;
let expected =
r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
let input = r#"
```rust, no_run,,,should_panic , ,property_3
```
"#;
let expected =
r#"<pre><code class="language-rust,no_run,,,should_panic,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_without_properties_has_proper_html_class() {
let input = r#"
```rust
```
"#;
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
}
mod html_munging {
use super::super::{id_from_content, normalize_id};
#[test]
fn it_generates_anchors() {
assert_eq!(
id_from_content("## Method-call expressions"),
"method-call-expressions"
);
assert_eq!(
id_from_content("## **Bold** title"),
"bold-title"
);
assert_eq!(
id_from_content("## `Code` title"),
"code-title"
);
}
#[test]
fn it_generates_anchors_from_non_ascii_initial() {
2018-07-24 01:45:01 +08:00
assert_eq!(
id_from_content("## `--passes`: add more rustdoc passes"),
"--passes-add-more-rustdoc-passes"
2018-07-24 01:45:01 +08:00
);
assert_eq!(
id_from_content("## 中文標題 CJK title"),
"中文標題-cjk-title"
);
assert_eq!(
id_from_content("## Über"),
"Über"
2018-07-24 01:45:01 +08:00
);
}
#[test]
fn it_normalizes_ids() {
2018-07-24 01:45:01 +08:00
assert_eq!(
normalize_id("`--passes`: add more rustdoc passes"),
"--passes-add-more-rustdoc-passes"
2018-07-24 01:45:01 +08:00
);
assert_eq!(
normalize_id("Method-call 🐙 expressions \u{1f47c}"),
"method-call--expressions-"
);
assert_eq!(normalize_id("_-_12345"), "_-_12345");
assert_eq!(normalize_id("12345"), "12345");
assert_eq!(normalize_id("中文"), "中文");
assert_eq!(normalize_id("にほんご"), "にほんご");
assert_eq!(normalize_id("한국어"), "한국어");
assert_eq!(normalize_id(""), "");
}
}
mod convert_quotes_to_curly {
use super::super::convert_quotes_to_curly;
#[test]
fn it_converts_single_quotes() {
2018-07-24 01:45:01 +08:00
assert_eq!(
convert_quotes_to_curly("'one', 'two'"),
"one, two"
);
}
#[test]
fn it_converts_double_quotes() {
2018-07-24 01:45:01 +08:00
assert_eq!(
convert_quotes_to_curly(r#""one", "two""#),
"“one”, “two”"
);
}
#[test]
fn it_treats_tab_as_whitespace() {
assert_eq!(convert_quotes_to_curly("\t'one'"), "\tone");
}
}
}