search: fix anchor ids for duplicate headers
This commit is contained in:
parent
2213312938
commit
972c61fa76
|
@ -768,16 +768,7 @@ fn insert_link_into_header(
|
||||||
content: &str,
|
content: &str,
|
||||||
id_counter: &mut HashMap<String, usize>,
|
id_counter: &mut HashMap<String, usize>,
|
||||||
) -> String {
|
) -> String {
|
||||||
let raw_id = utils::id_from_content(content);
|
let id = utils::unique_id_from_content(content, id_counter);
|
||||||
|
|
||||||
let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
|
|
||||||
|
|
||||||
let id = match *id_count {
|
|
||||||
0 => raw_id,
|
|
||||||
other => format!("{}-{}", raw_id, other),
|
|
||||||
};
|
|
||||||
|
|
||||||
*id_count += 1;
|
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,
|
r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,
|
||||||
|
|
|
@ -97,6 +97,7 @@ fn render_item(
|
||||||
|
|
||||||
breadcrumbs.push(chapter.name.clone());
|
breadcrumbs.push(chapter.name.clone());
|
||||||
|
|
||||||
|
let mut id_counter = HashMap::new();
|
||||||
while let Some(event) = p.next() {
|
while let Some(event) = p.next() {
|
||||||
match event {
|
match event {
|
||||||
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
|
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
|
||||||
|
@ -120,7 +121,7 @@ fn render_item(
|
||||||
}
|
}
|
||||||
Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
|
Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
|
||||||
in_heading = false;
|
in_heading = false;
|
||||||
section_id = Some(utils::id_from_content(&heading));
|
section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
|
||||||
breadcrumbs.push(heading.clone());
|
breadcrumbs.push(heading.clone());
|
||||||
}
|
}
|
||||||
Event::Start(Tag::FootnoteDefinition(name)) => {
|
Event::Start(Tag::FootnoteDefinition(name)) => {
|
||||||
|
|
|
@ -9,6 +9,7 @@ use regex::Regex;
|
||||||
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
|
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {
|
||||||
|
|
||||||
/// Generate an ID for use with anchors which is derived from a "normalised"
|
/// Generate an ID for use with anchors which is derived from a "normalised"
|
||||||
/// string.
|
/// string.
|
||||||
|
// This function should be made private when the deprecation expires.
|
||||||
|
#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
|
||||||
pub fn id_from_content(content: &str) -> String {
|
pub fn id_from_content(content: &str) -> String {
|
||||||
let mut content = content.to_string();
|
let mut content = content.to_string();
|
||||||
|
|
||||||
|
@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {
|
||||||
|
|
||||||
// Remove spaces and hashes indicating a header
|
// Remove spaces and hashes indicating a header
|
||||||
let trimmed = content.trim().trim_start_matches('#').trim();
|
let trimmed = content.trim().trim_start_matches('#').trim();
|
||||||
|
|
||||||
normalize_id(trimmed)
|
normalize_id(trimmed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate an ID for use with anchors which is derived from a "normalised"
|
||||||
|
/// string.
|
||||||
|
///
|
||||||
|
/// Each ID returned will be unique, if the same `id_counter` is provided on
|
||||||
|
/// each call.
|
||||||
|
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
|
||||||
|
let id = {
|
||||||
|
#[allow(deprecated)]
|
||||||
|
id_from_content(content)
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we have headers with the same normalized id, append an incrementing counter
|
||||||
|
let id_count = id_counter.entry(id.clone()).or_insert(0);
|
||||||
|
let unique_id = match *id_count {
|
||||||
|
0 => id,
|
||||||
|
id_count => format!("{}-{}", id, id_count),
|
||||||
|
};
|
||||||
|
*id_count += 1;
|
||||||
|
unique_id
|
||||||
|
}
|
||||||
|
|
||||||
/// Fix links to the correct location.
|
/// Fix links to the correct location.
|
||||||
///
|
///
|
||||||
/// This adjusts links, such as turning `.md` extensions to `.html`.
|
/// This adjusts links, such as turning `.md` extensions to `.html`.
|
||||||
|
@ -332,8 +355,9 @@ more text with spaces
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod html_munging {
|
#[allow(deprecated)]
|
||||||
use super::super::{id_from_content, normalize_id};
|
mod id_from_content {
|
||||||
|
use super::super::id_from_content;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn it_generates_anchors() {
|
fn it_generates_anchors() {
|
||||||
|
@ -361,6 +385,10 @@ more text with spaces
|
||||||
);
|
);
|
||||||
assert_eq!(id_from_content("## Über"), "Über");
|
assert_eq!(id_from_content("## Über"), "Über");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod html_munging {
|
||||||
|
use super::super::{normalize_id, unique_id_from_content};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn it_normalizes_ids() {
|
fn it_normalizes_ids() {
|
||||||
|
@ -379,5 +407,28 @@ more text with spaces
|
||||||
assert_eq!(normalize_id("한국어"), "한국어");
|
assert_eq!(normalize_id("한국어"), "한국어");
|
||||||
assert_eq!(normalize_id(""), "");
|
assert_eq!(normalize_id(""), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_generates_unique_ids_from_content() {
|
||||||
|
// Same id if not given shared state
|
||||||
|
assert_eq!(
|
||||||
|
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
||||||
|
"中文標題-cjk-title"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
|
||||||
|
"中文標題-cjk-title"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Different id if given shared state
|
||||||
|
let mut id_counter = Default::default();
|
||||||
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
|
||||||
|
assert_eq!(
|
||||||
|
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
|
||||||
|
"中文標題-cjk-title"
|
||||||
|
);
|
||||||
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
|
||||||
|
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
- [Markdown](first/markdown.md)
|
- [Markdown](first/markdown.md)
|
||||||
- [Unicode](first/unicode.md)
|
- [Unicode](first/unicode.md)
|
||||||
- [No Headers](first/no-headers.md)
|
- [No Headers](first/no-headers.md)
|
||||||
|
- [Duplicate Headers](first/duplicate-headers.md)
|
||||||
- [Second Chapter](second.md)
|
- [Second Chapter](second.md)
|
||||||
- [Nested Chapter](second/nested.md)
|
- [Nested Chapter](second/nested.md)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Duplicate headers
|
||||||
|
|
||||||
|
This page validates behaviour of duplicate headers.
|
||||||
|
|
||||||
|
# Header Text
|
||||||
|
|
||||||
|
# Header Text
|
||||||
|
|
||||||
|
# header-text
|
|
@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
|
||||||
"1.4. Markdown",
|
"1.4. Markdown",
|
||||||
"1.5. Unicode",
|
"1.5. Unicode",
|
||||||
"1.6. No Headers",
|
"1.6. No Headers",
|
||||||
|
"1.7. Duplicate Headers",
|
||||||
"2.1. Nested Chapter",
|
"2.1. Nested Chapter",
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -633,11 +634,12 @@ mod search {
|
||||||
let some_section = get_doc_ref("first/index.html#some-section");
|
let some_section = get_doc_ref("first/index.html#some-section");
|
||||||
let summary = get_doc_ref("first/includes.html#summary");
|
let summary = get_doc_ref("first/includes.html#summary");
|
||||||
let no_headers = get_doc_ref("first/no-headers.html");
|
let no_headers = get_doc_ref("first/no-headers.html");
|
||||||
|
let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
|
||||||
let conclusion = get_doc_ref("conclusion.html#conclusion");
|
let conclusion = get_doc_ref("conclusion.html#conclusion");
|
||||||
|
|
||||||
let bodyidx = &index["index"]["index"]["body"]["root"];
|
let bodyidx = &index["index"]["index"]["body"]["root"];
|
||||||
let textidx = &bodyidx["t"]["e"]["x"]["t"];
|
let textidx = &bodyidx["t"]["e"]["x"]["t"];
|
||||||
assert_eq!(textidx["df"], 2);
|
assert_eq!(textidx["df"], 5);
|
||||||
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
|
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
|
||||||
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
|
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
|
||||||
|
|
||||||
|
@ -646,7 +648,7 @@ mod search {
|
||||||
assert_eq!(docs[&some_section]["body"], "");
|
assert_eq!(docs[&some_section]["body"], "");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs[&summary]["body"],
|
docs[&summary]["body"],
|
||||||
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
|
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs[&summary]["breadcrumbs"],
|
docs[&summary]["breadcrumbs"],
|
||||||
|
@ -657,6 +659,10 @@ mod search {
|
||||||
docs[&no_headers]["breadcrumbs"],
|
docs[&no_headers]["breadcrumbs"],
|
||||||
"First Chapter » No Headers"
|
"First Chapter » No Headers"
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
docs[&duplicate_headers_1]["breadcrumbs"],
|
||||||
|
"First Chapter » Duplicate Headers » Header Text"
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
docs[&no_headers]["body"],
|
docs[&no_headers]["body"],
|
||||||
"Capybara capybara capybara. Capybara capybara capybara."
|
"Capybara capybara capybara. Capybara capybara capybara."
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue