490 lines
16 KiB
Rust
490 lines
16 KiB
Rust
#![allow(missing_docs)] // FIXME: Document this
|
||
|
||
pub mod fs;
|
||
mod string;
|
||
pub(crate) mod toml_ext;
|
||
use crate::errors::Error;
|
||
use log::error;
|
||
use once_cell::sync::Lazy;
|
||
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
|
||
use regex::Regex;
|
||
|
||
use std::borrow::Cow;
|
||
use std::collections::HashMap;
|
||
use std::fmt::Write;
|
||
use std::path::Path;
|
||
|
||
pub use self::string::{
|
||
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
|
||
take_rustdoc_include_lines,
|
||
};
|
||
|
||
/// Replaces multiple consecutive whitespace characters with a single space character.
|
||
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
|
||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s\s+").unwrap());
|
||
RE.replace_all(text, " ")
|
||
}
|
||
|
||
/// Convert the given string to a valid HTML element ID.
|
||
/// The only restriction is that the ID must not contain any ASCII whitespace.
|
||
pub fn normalize_id(content: &str) -> String {
|
||
content
|
||
.chars()
|
||
.filter_map(|ch| {
|
||
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
|
||
Some(ch.to_ascii_lowercase())
|
||
} else if ch.is_whitespace() {
|
||
Some('-')
|
||
} else {
|
||
None
|
||
}
|
||
})
|
||
.collect::<String>()
|
||
}
|
||
|
||
/// Generate an ID for use with anchors which is derived from a "normalised"
|
||
/// string.
|
||
// This function should be made private when the deprecation expires.
|
||
#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
|
||
pub fn id_from_content(content: &str) -> String {
|
||
let mut content = content.to_string();
|
||
|
||
// Skip any tags or html-encoded stuff
|
||
static HTML: Lazy<Regex> = Lazy::new(|| Regex::new(r"(<.*?>)").unwrap());
|
||
content = HTML.replace_all(&content, "").into();
|
||
const REPL_SUB: &[&str] = &["<", ">", "&", "'", """];
|
||
for sub in REPL_SUB {
|
||
content = content.replace(sub, "");
|
||
}
|
||
|
||
// Remove spaces and hashes indicating a header
|
||
let trimmed = content.trim().trim_start_matches('#').trim();
|
||
normalize_id(trimmed)
|
||
}
|
||
|
||
/// Generate an ID for use with anchors which is derived from a "normalised"
|
||
/// string.
|
||
///
|
||
/// Each ID returned will be unique, if the same `id_counter` is provided on
|
||
/// each call.
|
||
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
|
||
let id = {
|
||
#[allow(deprecated)]
|
||
id_from_content(content)
|
||
};
|
||
|
||
// If we have headers with the same normalized id, append an incrementing counter
|
||
let id_count = id_counter.entry(id.clone()).or_insert(0);
|
||
let unique_id = match *id_count {
|
||
0 => id,
|
||
id_count => format!("{}-{}", id, id_count),
|
||
};
|
||
*id_count += 1;
|
||
unique_id
|
||
}
|
||
|
||
/// Fix links to the correct location.
|
||
///
|
||
/// This adjusts links, such as turning `.md` extensions to `.html`.
|
||
///
|
||
/// `path` is the path to the page being rendered relative to the root of the
|
||
/// book. This is used for the `print.html` page so that links on the print
|
||
/// page go to the original location. Normal page rendering sets `path` to
|
||
/// None. Ideally, print page links would link to anchors on the print page,
|
||
/// but that is very difficult.
|
||
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
|
||
static SCHEME_LINK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
|
||
static MD_LINK: Lazy<Regex> =
|
||
Lazy::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
|
||
|
||
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
|
||
if dest.starts_with('#') {
|
||
// Fragment-only link.
|
||
if let Some(path) = path {
|
||
let mut base = path.display().to_string();
|
||
if base.ends_with(".md") {
|
||
base.replace_range(base.len() - 3.., ".html");
|
||
}
|
||
return format!("{}{}", base, dest).into();
|
||
} else {
|
||
return dest;
|
||
}
|
||
}
|
||
// Don't modify links with schemes like `https`.
|
||
if !SCHEME_LINK.is_match(&dest) {
|
||
// This is a relative link, adjust it as necessary.
|
||
let mut fixed_link = String::new();
|
||
if let Some(path) = path {
|
||
let base = path
|
||
.parent()
|
||
.expect("path can't be empty")
|
||
.to_str()
|
||
.expect("utf-8 paths only");
|
||
if !base.is_empty() {
|
||
write!(fixed_link, "{}/", base).unwrap();
|
||
}
|
||
}
|
||
|
||
if let Some(caps) = MD_LINK.captures(&dest) {
|
||
fixed_link.push_str(&caps["link"]);
|
||
fixed_link.push_str(".html");
|
||
if let Some(anchor) = caps.name("anchor") {
|
||
fixed_link.push_str(anchor.as_str());
|
||
}
|
||
} else {
|
||
fixed_link.push_str(&dest);
|
||
};
|
||
return CowStr::from(fixed_link);
|
||
}
|
||
dest
|
||
}
|
||
|
||
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
|
||
// This is a terrible hack, but should be reasonably reliable. Nobody
|
||
// should ever parse a tag with a regex. However, there isn't anything
|
||
// in Rust that I know of that is suitable for handling partial html
|
||
// fragments like those generated by pulldown_cmark.
|
||
//
|
||
// There are dozens of HTML tags/attributes that contain paths, so
|
||
// feel free to add more tags if desired; these are the only ones I
|
||
// care about right now.
|
||
static HTML_LINK: Lazy<Regex> =
|
||
Lazy::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
|
||
|
||
HTML_LINK
|
||
.replace_all(&html, |caps: ®ex::Captures<'_>| {
|
||
let fixed = fix(caps[2].into(), path);
|
||
format!("{}{}\"", &caps[1], fixed)
|
||
})
|
||
.into_owned()
|
||
.into()
|
||
}
|
||
|
||
match event {
|
||
Event::Start(Tag::Link(link_type, dest, title)) => {
|
||
Event::Start(Tag::Link(link_type, fix(dest, path), title))
|
||
}
|
||
Event::Start(Tag::Image(link_type, dest, title)) => {
|
||
Event::Start(Tag::Image(link_type, fix(dest, path), title))
|
||
}
|
||
Event::Html(html) => Event::Html(fix_html(html, path)),
|
||
_ => event,
|
||
}
|
||
}
|
||
|
||
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
|
||
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
|
||
render_markdown_with_path(text, curly_quotes, None)
|
||
}
|
||
|
||
pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
|
||
let mut opts = Options::empty();
|
||
opts.insert(Options::ENABLE_TABLES);
|
||
opts.insert(Options::ENABLE_FOOTNOTES);
|
||
opts.insert(Options::ENABLE_STRIKETHROUGH);
|
||
opts.insert(Options::ENABLE_TASKLISTS);
|
||
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
|
||
if curly_quotes {
|
||
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
|
||
}
|
||
Parser::new_ext(text, opts)
|
||
}
|
||
|
||
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
|
||
let mut s = String::with_capacity(text.len() * 3 / 2);
|
||
let p = new_cmark_parser(text, curly_quotes);
|
||
let events = p
|
||
.map(clean_codeblock_headers)
|
||
.map(|event| adjust_links(event, path))
|
||
.flat_map(|event| {
|
||
let (a, b) = wrap_tables(event);
|
||
a.into_iter().chain(b)
|
||
});
|
||
|
||
html::push_html(&mut s, events);
|
||
s
|
||
}
|
||
|
||
/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
|
||
fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
|
||
match event {
|
||
Event::Start(Tag::Table(_)) => (
|
||
Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
|
||
Some(event),
|
||
),
|
||
Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
|
||
_ => (Some(event), None),
|
||
}
|
||
}
|
||
|
||
fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
|
||
match event {
|
||
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
|
||
let info: String = info
|
||
.chars()
|
||
.map(|x| match x {
|
||
' ' | '\t' => ',',
|
||
_ => x,
|
||
})
|
||
.filter(|ch| !ch.is_whitespace())
|
||
.collect();
|
||
|
||
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
|
||
}
|
||
_ => event,
|
||
}
|
||
}
|
||
|
||
/// Prints a "backtrace" of some `Error`.
|
||
pub fn log_backtrace(e: &Error) {
|
||
error!("Error: {}", e);
|
||
|
||
for cause in e.chain().skip(1) {
|
||
error!("\tCaused By: {}", cause);
|
||
}
|
||
}
|
||
|
||
pub(crate) fn bracket_escape(mut s: &str) -> String {
|
||
let mut escaped = String::with_capacity(s.len());
|
||
let needs_escape: &[char] = &['<', '>'];
|
||
while let Some(next) = s.find(needs_escape) {
|
||
escaped.push_str(&s[..next]);
|
||
match s.as_bytes()[next] {
|
||
b'<' => escaped.push_str("<"),
|
||
b'>' => escaped.push_str(">"),
|
||
_ => unreachable!(),
|
||
}
|
||
s = &s[next + 1..];
|
||
}
|
||
escaped.push_str(s);
|
||
escaped
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::bracket_escape;
|
||
|
||
mod render_markdown {
|
||
use super::super::render_markdown;
|
||
|
||
#[test]
|
||
fn preserves_external_links() {
|
||
assert_eq!(
|
||
render_markdown("[example](https://www.rust-lang.org/)", false),
|
||
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn it_can_adjust_markdown_links() {
|
||
assert_eq!(
|
||
render_markdown("[example](example.md)", false),
|
||
"<p><a href=\"example.html\">example</a></p>\n"
|
||
);
|
||
assert_eq!(
|
||
render_markdown("[example_anchor](example.md#anchor)", false),
|
||
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
|
||
);
|
||
|
||
// this anchor contains 'md' inside of it
|
||
assert_eq!(
|
||
render_markdown("[phantom data](foo.html#phantomdata)", false),
|
||
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn it_can_wrap_tables() {
|
||
let src = r#"
|
||
| Original | Punycode | Punycode + Encoding |
|
||
|-----------------|-----------------|---------------------|
|
||
| føø | f-5gaa | f_5gaa |
|
||
"#;
|
||
let out = r#"
|
||
<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
|
||
<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
|
||
</tbody></table>
|
||
</div>
|
||
"#.trim();
|
||
assert_eq!(render_markdown(src, false), out);
|
||
}
|
||
|
||
#[test]
|
||
fn it_can_keep_quotes_straight() {
|
||
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
|
||
}
|
||
|
||
#[test]
|
||
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
|
||
let input = r#"
|
||
'one'
|
||
```
|
||
'two'
|
||
```
|
||
`'three'` 'four'"#;
|
||
let expected = r#"<p>‘one’</p>
|
||
<pre><code>'two'
|
||
</code></pre>
|
||
<p><code>'three'</code> ‘four’</p>
|
||
"#;
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
}
|
||
|
||
#[test]
|
||
fn whitespace_outside_of_codeblock_header_is_preserved() {
|
||
let input = r#"
|
||
some text with spaces
|
||
```rust
|
||
fn main() {
|
||
// code inside is unchanged
|
||
}
|
||
```
|
||
more text with spaces
|
||
"#;
|
||
|
||
let expected = r#"<p>some text with spaces</p>
|
||
<pre><code class="language-rust">fn main() {
|
||
// code inside is unchanged
|
||
}
|
||
</code></pre>
|
||
<p>more text with spaces</p>
|
||
"#;
|
||
assert_eq!(render_markdown(input, false), expected);
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
}
|
||
|
||
#[test]
|
||
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
|
||
let input = r#"
|
||
```rust,no_run,should_panic,property_3
|
||
```
|
||
"#;
|
||
|
||
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
|
||
"#;
|
||
assert_eq!(render_markdown(input, false), expected);
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
}
|
||
|
||
#[test]
|
||
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
|
||
let input = r#"
|
||
```rust, no_run,,,should_panic , ,property_3
|
||
```
|
||
"#;
|
||
|
||
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
|
||
"#;
|
||
assert_eq!(render_markdown(input, false), expected);
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
}
|
||
|
||
#[test]
|
||
fn rust_code_block_without_properties_has_proper_html_class() {
|
||
let input = r#"
|
||
```rust
|
||
```
|
||
"#;
|
||
|
||
let expected = r#"<pre><code class="language-rust"></code></pre>
|
||
"#;
|
||
assert_eq!(render_markdown(input, false), expected);
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
|
||
let input = r#"
|
||
```rust
|
||
```
|
||
"#;
|
||
assert_eq!(render_markdown(input, false), expected);
|
||
assert_eq!(render_markdown(input, true), expected);
|
||
}
|
||
}
|
||
|
||
#[allow(deprecated)]
|
||
mod id_from_content {
|
||
use super::super::id_from_content;
|
||
|
||
#[test]
|
||
fn it_generates_anchors() {
|
||
assert_eq!(
|
||
id_from_content("## Method-call expressions"),
|
||
"method-call-expressions"
|
||
);
|
||
assert_eq!(id_from_content("## **Bold** title"), "bold-title");
|
||
assert_eq!(id_from_content("## `Code` title"), "code-title");
|
||
assert_eq!(
|
||
id_from_content("## title <span dir=rtl>foo</span>"),
|
||
"title-foo"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn it_generates_anchors_from_non_ascii_initial() {
|
||
assert_eq!(
|
||
id_from_content("## `--passes`: add more rustdoc passes"),
|
||
"--passes-add-more-rustdoc-passes"
|
||
);
|
||
assert_eq!(
|
||
id_from_content("## 中文標題 CJK title"),
|
||
"中文標題-cjk-title"
|
||
);
|
||
assert_eq!(id_from_content("## Über"), "Über");
|
||
}
|
||
}
|
||
|
||
mod html_munging {
|
||
use super::super::{normalize_id, unique_id_from_content};
|
||
|
||
#[test]
|
||
fn it_normalizes_ids() {
|
||
assert_eq!(
|
||
normalize_id("`--passes`: add more rustdoc passes"),
|
||
"--passes-add-more-rustdoc-passes"
|
||
);
|
||
assert_eq!(
|
||
normalize_id("Method-call 🐙 expressions \u{1f47c}"),
|
||
"method-call--expressions-"
|
||
);
|
||
assert_eq!(normalize_id("_-_12345"), "_-_12345");
|
||
assert_eq!(normalize_id("12345"), "12345");
|
||
assert_eq!(normalize_id("中文"), "中文");
|
||
assert_eq!(normalize_id("にほんご"), "にほんご");
|
||
assert_eq!(normalize_id("한국어"), "한국어");
|
||
assert_eq!(normalize_id(""), "");
|
||
}
|
||
|
||
#[test]
|
||
fn it_generates_unique_ids_from_content() {
|
||
// Same id if not given shared state
|
||
assert_eq!(
|
||
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
||
"中文標題-cjk-title"
|
||
);
|
||
assert_eq!(
|
||
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
||
"中文標題-cjk-title"
|
||
);
|
||
|
||
// Different id if given shared state
|
||
let mut id_counter = Default::default();
|
||
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
|
||
assert_eq!(
|
||
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
|
||
"中文標題-cjk-title"
|
||
);
|
||
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
|
||
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn escaped_brackets() {
|
||
assert_eq!(bracket_escape(""), "");
|
||
assert_eq!(bracket_escape("<"), "<");
|
||
assert_eq!(bracket_escape(">"), ">");
|
||
assert_eq!(bracket_escape("<>"), "<>");
|
||
assert_eq!(bracket_escape("<test>"), "<test>");
|
||
assert_eq!(bracket_escape("a<test>b"), "a<test>b");
|
||
}
|
||
}
|