Merge pull request #1749 from tommilligan/unique-search-anchors

search: fix anchor ids for duplicate headers
This commit is contained in:
Eric Huss 2022-03-28 12:44:20 -07:00 committed by GitHub
commit a5fddfa468
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 554 additions and 137 deletions

View File

@ -778,16 +778,7 @@ fn insert_link_into_header(
content: &str, content: &str,
id_counter: &mut HashMap<String, usize>, id_counter: &mut HashMap<String, usize>,
) -> String { ) -> String {
let raw_id = utils::id_from_content(content); let id = utils::unique_id_from_content(content, id_counter);
let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
let id = match *id_count {
0 => raw_id,
other => format!("{}-{}", raw_id, other),
};
*id_count += 1;
format!( format!(
r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##, r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,

View File

@ -97,6 +97,7 @@ fn render_item(
breadcrumbs.push(chapter.name.clone()); breadcrumbs.push(chapter.name.clone());
let mut id_counter = HashMap::new();
while let Some(event) = p.next() { while let Some(event) = p.next() {
match event { match event {
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => { Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
@ -120,7 +121,7 @@ fn render_item(
} }
Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => { Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
in_heading = false; in_heading = false;
section_id = Some(utils::id_from_content(&heading)); section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
breadcrumbs.push(heading.clone()); breadcrumbs.push(heading.clone());
} }
Event::Start(Tag::FootnoteDefinition(name)) => { Event::Start(Tag::FootnoteDefinition(name)) => {

View File

@ -9,6 +9,7 @@ use regex::Regex;
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag}; use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write; use std::fmt::Write;
use std::path::Path; use std::path::Path;
@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {
/// Generate an ID for use with anchors which is derived from a "normalised" /// Generate an ID for use with anchors which is derived from a "normalised"
/// string. /// string.
// This function should be made private when the deprecation expires.
#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
pub fn id_from_content(content: &str) -> String { pub fn id_from_content(content: &str) -> String {
let mut content = content.to_string(); let mut content = content.to_string();
@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {
// Remove spaces and hashes indicating a header // Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_start_matches('#').trim(); let trimmed = content.trim().trim_start_matches('#').trim();
normalize_id(trimmed) normalize_id(trimmed)
} }
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
///
/// Each ID returned will be unique, if the same `id_counter` is provided on
/// each call.
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
let id = {
#[allow(deprecated)]
id_from_content(content)
};
// If we have headers with the same normalized id, append an incrementing counter
let id_count = id_counter.entry(id.clone()).or_insert(0);
let unique_id = match *id_count {
0 => id,
id_count => format!("{}-{}", id, id_count),
};
*id_count += 1;
unique_id
}
/// Fix links to the correct location. /// Fix links to the correct location.
/// ///
/// This adjusts links, such as turning `.md` extensions to `.html`. /// This adjusts links, such as turning `.md` extensions to `.html`.
@ -332,8 +355,9 @@ more text with spaces
} }
} }
mod html_munging { #[allow(deprecated)]
use super::super::{id_from_content, normalize_id}; mod id_from_content {
use super::super::id_from_content;
#[test] #[test]
fn it_generates_anchors() { fn it_generates_anchors() {
@ -361,6 +385,10 @@ more text with spaces
); );
assert_eq!(id_from_content("## Über"), "Über"); assert_eq!(id_from_content("## Über"), "Über");
} }
}
mod html_munging {
use super::super::{normalize_id, unique_id_from_content};
#[test] #[test]
fn it_normalizes_ids() { fn it_normalizes_ids() {
@ -379,5 +407,28 @@ more text with spaces
assert_eq!(normalize_id("한국어"), "한국어"); assert_eq!(normalize_id("한국어"), "한국어");
assert_eq!(normalize_id(""), ""); assert_eq!(normalize_id(""), "");
} }
#[test]
fn it_generates_unique_ids_from_content() {
// Same id if not given shared state
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
// Different id if given shared state
let mut id_counter = Default::default();
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
"中文標題-cjk-title"
);
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
}
} }
} }

View File

@ -13,6 +13,7 @@
- [Markdown](first/markdown.md) - [Markdown](first/markdown.md)
- [Unicode](first/unicode.md) - [Unicode](first/unicode.md)
- [No Headers](first/no-headers.md) - [No Headers](first/no-headers.md)
- [Duplicate Headers](first/duplicate-headers.md)
- [Second Chapter](second.md) - [Second Chapter](second.md)
- [Nested Chapter](second/nested.md) - [Nested Chapter](second/nested.md)

View File

@ -0,0 +1,9 @@
# Duplicate headers
This page validates behaviour of duplicate headers.
# Header Text
# Header Text
# header-text

View File

@ -36,6 +36,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
"1.4. Markdown", "1.4. Markdown",
"1.5. Unicode", "1.5. Unicode",
"1.6. No Headers", "1.6. No Headers",
"1.7. Duplicate Headers",
"2.1. Nested Chapter", "2.1. Nested Chapter",
]; ];
@ -653,11 +654,12 @@ mod search {
let some_section = get_doc_ref("first/index.html#some-section"); let some_section = get_doc_ref("first/index.html#some-section");
let summary = get_doc_ref("first/includes.html#summary"); let summary = get_doc_ref("first/includes.html#summary");
let no_headers = get_doc_ref("first/no-headers.html"); let no_headers = get_doc_ref("first/no-headers.html");
let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
let conclusion = get_doc_ref("conclusion.html#conclusion"); let conclusion = get_doc_ref("conclusion.html#conclusion");
let bodyidx = &index["index"]["index"]["body"]["root"]; let bodyidx = &index["index"]["index"]["body"]["root"];
let textidx = &bodyidx["t"]["e"]["x"]["t"]; let textidx = &bodyidx["t"]["e"]["x"]["t"];
assert_eq!(textidx["df"], 2); assert_eq!(textidx["df"], 5);
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0); assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0); assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
@ -666,7 +668,7 @@ mod search {
assert_eq!(docs[&some_section]["body"], ""); assert_eq!(docs[&some_section]["body"], "");
assert_eq!( assert_eq!(
docs[&summary]["body"], docs[&summary]["body"],
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion" "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
); );
assert_eq!( assert_eq!(
docs[&summary]["breadcrumbs"], docs[&summary]["breadcrumbs"],
@ -677,6 +679,10 @@ mod search {
docs[&no_headers]["breadcrumbs"], docs[&no_headers]["breadcrumbs"],
"First Chapter » No Headers" "First Chapter » No Headers"
); );
assert_eq!(
docs[&duplicate_headers_1]["breadcrumbs"],
"First Chapter » Duplicate Headers » Header Text"
);
assert_eq!( assert_eq!(
docs[&no_headers]["body"], docs[&no_headers]["body"],
"Capybara capybara capybara. Capybara capybara capybara." "Capybara capybara capybara. Capybara capybara capybara."

File diff suppressed because it is too large Load Diff