Rewrite links in Markdown to point to fallback if missing in translation

It will follow relative links to other pages and embedded images.
This commit is contained in:
Ruin0x11 2020-08-28 14:50:04 -07:00
parent ee740aceff
commit c72ce182d5
14 changed files with 299 additions and 105 deletions

View File

@ -129,10 +129,11 @@ pub fn execute(args: &ArgMatches) -> Result<()> {
info!("Building book...");
// FIXME: This area is really ugly because we need to re-set livereload :(
let result = MDBook::load(&book_dir).and_then(|mut b| {
update_config(&mut b);
b.build()
});
let result =
MDBook::load_with_build_opts(&book_dir, build_opts.clone()).and_then(|mut b| {
update_config(&mut b);
b.build()
});
if let Err(e) = result {
error!("Unable to load the book");

View File

@ -34,7 +34,7 @@ pub fn make_subcommand<'a, 'b>() -> App<'a, 'b> {
pub fn execute(args: &ArgMatches) -> Result<()> {
let book_dir = get_book_dir(args);
let build_opts = get_build_opts(args);
let mut book = MDBook::load_with_build_opts(&book_dir, build_opts)?;
let mut book = MDBook::load_with_build_opts(&book_dir, build_opts.clone())?;
let update_config = |book: &mut MDBook| {
if let Some(dest_dir) = args.value_of("dest-dir") {
@ -50,7 +50,7 @@ pub fn execute(args: &ArgMatches) -> Result<()> {
trigger_on_change(&book, |paths, book_dir| {
info!("Files changed: {:?}\nBuilding book...\n", paths);
let result = MDBook::load(&book_dir).and_then(|mut b| {
let result = MDBook::load_with_build_opts(&book_dir, build_opts.clone()).and_then(|mut b| {
update_config(&mut b);
b.build()
});

View File

@ -119,7 +119,7 @@ impl HtmlHandlebars {
let mut is_index = true;
for item in book.iter() {
let ctx = RenderItemContext {
let item_ctx = RenderItemContext {
handlebars: &handlebars,
destination: destination.to_path_buf(),
data: data.clone(),
@ -127,7 +127,7 @@ impl HtmlHandlebars {
html_config: html_config.clone(),
edition: ctx.config.rust.edition,
};
self.render_item(item, ctx, &mut print_content)?;
self.render_item(item, item_ctx, src_dir, &ctx.config, &mut print_content)?;
is_index = false;
}
@ -138,6 +138,7 @@ impl HtmlHandlebars {
&html_config,
src_dir,
destination,
language_ident,
handlebars,
&mut data,
)?;
@ -193,6 +194,8 @@ impl HtmlHandlebars {
&self,
item: &BookItem,
mut ctx: RenderItemContext<'_>,
src_dir: &PathBuf,
cfg: &Config,
print_content: &mut String,
) -> Result<()> {
// FIXME: This should be made DRY-er and rely less on mutable state
@ -216,11 +219,29 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}
let content = ch.content.clone();
let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
let fallback_path = cfg.default_language().map(|lang_ident| {
let mut fallback = PathBuf::from(utils::fs::path_to_root(&path));
fallback.push("../");
fallback.push(lang_ident.clone());
fallback
});
let fixed_content =
utils::render_markdown_with_path(&ch.content, ctx.html_config.curly_quotes, Some(path));
let content = ch.content.clone();
let content = utils::render_markdown_with_path(
&content,
ctx.html_config.curly_quotes,
Some(&path),
Some(&src_dir),
&fallback_path,
);
let fixed_content = utils::render_markdown_with_path(
&ch.content,
ctx.html_config.curly_quotes,
Some(&path),
Some(&src_dir),
&fallback_path,
);
if !ctx.is_index {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@ -298,6 +319,7 @@ impl HtmlHandlebars {
html_config: &HtmlConfig,
src_dir: &PathBuf,
destination: &PathBuf,
language_ident: &Option<String>,
handlebars: &mut Handlebars<'_>,
data: &mut serde_json::Map<String, serde_json::Value>,
) -> Result<()> {
@ -321,16 +343,26 @@ impl HtmlHandlebars {
let html_content_404 = utils::render_markdown(&content_404, html_config.curly_quotes);
let mut data_404 = data.clone();
let base_url = if let Some(site_url) = &html_config.site_url {
site_url
let mut base_url = if let Some(site_url) = &html_config.site_url {
site_url.clone()
} else {
debug!(
"HTML 'site-url' parameter not set, defaulting to '/'. Please configure \
this to ensure the 404 page work correctly, especially if your site is hosted in a \
subdirectory on the HTTP server."
);
"/"
String::from("/")
};
// Set the subdirectory to the currently localized version if using a
// multilingual output format.
if let LoadedBook::Localized(_) = ctx.book {
if let Some(lang_ident) = language_ident {
base_url.push_str(lang_ident);
base_url.push_str("/");
}
}
data_404.insert("base_url".to_owned(), json!(base_url));
// Set a dummy path to ensure other paths (e.g. in the TOC) are generated correctly
data_404.insert("path".to_owned(), json!("404.md"));

View File

@ -10,13 +10,18 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use std::borrow::Cow;
use std::fmt::Write;
use std::path::Path;
use std::path::{Path, PathBuf};
pub use self::string::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines,
};
lazy_static! {
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
}
/// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
lazy_static! {
@ -71,6 +76,119 @@ pub fn id_from_content(content: &str) -> String {
normalize_id(trimmed)
}
fn md_to_html_link<'a>(dest: &CowStr<'a>, fixed_link: &mut String) {
if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};
}
fn fix<'a, P: AsRef<Path>>(
dest: CowStr<'a>,
path: Option<&Path>,
src_dir: Option<&Path>,
fallback_path: &Option<P>,
) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{}{}", base, dest).into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
// If this link is missing on the filesystem in the current directory,
// but not in the fallback directory, use the fallback's page.
let mut redirected_path = false;
if let Some(src_dir) = src_dir {
let mut dest_path = src_dir.to_str().unwrap().to_string();
write!(dest_path, "/{}", dest).unwrap();
trace!("Check existing: {:?}", dest_path);
if !PathBuf::from(dest_path).exists() {
if let Some(fallback_path) = fallback_path {
let mut fallback_file = src_dir.to_str().unwrap().to_string();
// Check if there is a Markdown or other file in the fallback.
write!(
fallback_file,
"/{}/{}",
fallback_path.as_ref().display(),
dest
)
.unwrap();
trace!("Check fallback: {:?}", fallback_file);
if PathBuf::from(fallback_file).exists() {
write!(fixed_link, "{}/", fallback_path.as_ref().display()).unwrap();
debug!(
"Redirect link to default translation: {:?} -> {:?}",
dest, fixed_link
);
redirected_path = true;
}
}
}
}
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
trace!("Base: {:?}", base);
if !redirected_path && !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}
md_to_html_link(&dest, &mut fixed_link);
return CowStr::from(fixed_link);
}
dest
}
fn fix_html<'a, P: AsRef<Path>>(
html: CowStr<'a>,
path: Option<&Path>,
src_dir: Option<&Path>,
fallback_path: &Option<P>,
) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
lazy_static! {
static ref HTML_LINK: Regex =
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
}
HTML_LINK
.replace_all(&html, move |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path, src_dir, fallback_path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
@ -80,92 +198,31 @@ pub fn id_from_content(content: &str) -> String {
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
lazy_static! {
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
}
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{}{}", base, dest).into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}
if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};
return CowStr::from(fixed_link);
}
dest
}
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
lazy_static! {
static ref HTML_LINK: Regex =
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
}
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}
fn adjust_links<'a, P: AsRef<Path>>(
event: Event<'a>,
path: Option<&Path>,
src_dir: Option<&Path>,
fallback_path: &Option<P>,
) -> Event<'a> {
match event {
Event::Start(Tag::Link(link_type, dest, title)) => {
Event::Start(Tag::Link(link_type, fix(dest, path), title))
}
Event::Start(Tag::Image(link_type, dest, title)) => {
Event::Start(Tag::Image(link_type, fix(dest, path), title))
}
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::Start(Tag::Link(link_type, dest, title)) => Event::Start(Tag::Link(
link_type,
fix(dest, path, src_dir, fallback_path),
title,
)),
Event::Start(Tag::Image(link_type, dest, title)) => Event::Start(Tag::Image(
link_type,
fix(dest, path, src_dir, fallback_path),
title,
)),
Event::Html(html) => Event::Html(fix_html(html, path, src_dir, fallback_path)),
_ => event,
}
}
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
render_markdown_with_path(text, curly_quotes, None)
render_markdown_with_path(text, curly_quotes, None, None, &None::<PathBuf>)
}
pub fn new_cmark_parser(text: &str) -> Parser<'_> {
@ -177,13 +234,19 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
Parser::new_ext(text, opts)
}
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
pub fn render_markdown_with_path<P: AsRef<Path>>(
text: &str,
curly_quotes: bool,
path: Option<&Path>,
src_dir: Option<&Path>,
fallback_path: &Option<P>,
) -> String {
let mut s = String::with_capacity(text.len() * 3 / 2);
let p = new_cmark_parser(text);
let mut converter = EventQuoteConverter::new(curly_quotes);
let events = p
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.map(|event| adjust_links(event, path, src_dir, fallback_path))
.map(|event| converter.convert(event));
html::push_html(&mut s, events);
@ -287,7 +350,7 @@ pub fn log_backtrace(e: &Error) {
#[cfg(test)]
mod tests {
mod render_markdown {
use super::super::render_markdown;
use super::super::{render_markdown, render_markdown_with_path};
#[test]
fn preserves_external_links() {
@ -404,6 +467,75 @@ more text with spaces
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
use std::fs::{self, File};
use std::io::Write;
use std::path::PathBuf;
use tempfile::{Builder as TempFileBuilder, TempDir};
const DUMMY_SRC: &str = "
# Dummy Chapter
this is some dummy text.
And here is some \
more text.
";
/// Create a dummy `Link` in a temporary directory.
fn dummy_link() -> (PathBuf, TempDir) {
let temp = TempFileBuilder::new().prefix("book").tempdir().unwrap();
let chapter_path = temp.path().join("chapter_1.md");
File::create(&chapter_path)
.unwrap()
.write_all(DUMMY_SRC.as_bytes())
.unwrap();
let path = chapter_path.to_path_buf();
(path, temp)
}
#[test]
fn links_are_rewritten_to_fallback_for_nonexistent_files() {
let input = r#"
[Link](chapter_1.md)
"#;
let (localized_file, localized_dir) = dummy_link();
fs::remove_file(&localized_file).unwrap();
let (_, fallback_dir) = dummy_link();
let mut relative_fallback_dir =
PathBuf::from(super::super::fs::path_to_root(localized_dir.path()));
relative_fallback_dir.push(fallback_dir.path().file_name().unwrap());
let expected_fallback = format!(
"<p><a href=\"{}/chapter_1.html\">Link</a></p>\n",
relative_fallback_dir.display()
);
assert_eq!(
render_markdown_with_path(
input,
false,
None,
Some(localized_dir.path()),
&Some(&relative_fallback_dir)
),
expected_fallback
);
assert_eq!(
render_markdown_with_path(
input,
true,
None,
Some(localized_dir.path()),
&Some(&relative_fallback_dir)
),
expected_fallback
);
}
}
mod html_munging {

View File

@ -4,4 +4,6 @@
- [Chapter 1](chapter/README.md)
- [Section 1](chapter/1.md)
- [Section 2](chapter/2.md)
- [Untranslated Chapter](untranslated.md)
- [Untranslated Page](untranslated-page.md)
- [Inline Link Fallbacks](inline-link-fallbacks.md)
- [Missing Summary Chapter](missing-summary-chapter.md)

View File

@ -0,0 +1 @@
# 第三節

View File

@ -0,0 +1,7 @@
# Inline Link Fallbacks
This page tests localization fallbacks of inline links.
Select another language from the dropdown to see a demonstation.
![Rust logo](rust_logo.png)

View File

@ -0,0 +1,3 @@
# Missing Summary Chapter
This page is to test that inline links to a page missing in a translation's SUMMARY.md redirect to the page in the fallback translation.

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

View File

@ -0,0 +1,3 @@
# Untranslated page.
This page is not available in any translation. If things work correctly, you should see this page written in the fallback language (English) if the other translations list it on their summary page.

View File

@ -1,3 +0,0 @@
# Untranslated chapter.
This chapter is not available in any translation. If things work correctly, you should see this page written in the fallback language (English) if the other translations list it on their summary page.

View File

@ -4,5 +4,6 @@
- [第一章](chapter/README.md)
- [第一節](chapter/1.md)
- [第二節](chapter/2.md)
- [第三節](chapter/3.md)
- [Untranslated Chapter](untranslated.md)
- [Untranslated Page](untranslated-page.md)
- [日本語専用のページ](translation-local-page.md)
- [内部リンクの入れ替え](inline-link-fallbacks.md)

View File

@ -0,0 +1,15 @@
# 内部リンクの入れ替え
以下のイメージは英語バージョンから移植されたでしょうか。
If inline link substitution works, then an image should appear below, sourced from the English translation.
![Rust logo](rust_logo.png)
Here is an [inline link](translation-local-page.md) to an existing page in this translation.
Here is an [inline link](missing-summary-chapter.md) to a page missing from this translation's SUMMARY.md. It should have been modified to point to the page in the English version of the book.
Also, here is an [inline link](blah.md) to a page missing from both translations. It should point to this language's 404 page.
The substitution won't work if you specify the `-l`/`--language` option, since it only builds a single translation in that case.

View File

@ -1,4 +1,4 @@
# 第三節。
# 日本語専用のページ
実は、このページは英語バージョンに存在しません。