search: fix anchor ids for duplicate headers

2022-02-18 15:27:24 +00:00 · 2022-02-18 15:27:24 +00:00 · 972c61fa76
parent 2213312938
commit 972c61fa76
7 changed files with 554 additions and 137 deletions
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@ -768,16 +768,7 @@ fn insert_link_into_header(
    content: &str,
    id_counter: &mut HashMap<String, usize>,
 ) -> String {
-    let raw_id = utils::id_from_content(content);
+    let id = utils::unique_id_from_content(content, id_counter);
    let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
    let id = match *id_count {
        0 => raw_id,
        other => format!("{}-{}", raw_id, other),
    };
    *id_count += 1;
    format!(
        r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@ -97,6 +97,7 @@ fn render_item(
    breadcrumbs.push(chapter.name.clone());
    let mut id_counter = HashMap::new();
    while let Some(event) = p.next() {
        match event {
            Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
@ -120,7 +121,7 @@ fn render_item(
            }
            Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
                in_heading = false;
-                section_id = Some(utils::id_from_content(&heading));
+                section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
                breadcrumbs.push(heading.clone());
            }
            Event::Start(Tag::FootnoteDefinition(name)) => {
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -9,6 +9,7 @@ use regex::Regex;
 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::Write;
 use std::path::Path;
@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {
 /// Generate an ID for use with anchors which is derived from a "normalised"
 /// string.
 // This function should be made private when the deprecation expires.
 #[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
 pub fn id_from_content(content: &str) -> String {
    let mut content = content.to_string();
@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {
    // Remove spaces and hashes indicating a header
    let trimmed = content.trim().trim_start_matches('#').trim();
    normalize_id(trimmed)
 }
 /// Generate an ID for use with anchors which is derived from a "normalised"
 /// string.
 ///
 /// Each ID returned will be unique, if the same `id_counter` is provided on
 /// each call.
 pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
    let id = {
        #[allow(deprecated)]
        id_from_content(content)
    };
    // If we have headers with the same normalized id, append an incrementing counter
    let id_count = id_counter.entry(id.clone()).or_insert(0);
    let unique_id = match *id_count {
        0 => id,
        id_count => format!("{}-{}", id, id_count),
    };
    *id_count += 1;
    unique_id
 }
 /// Fix links to the correct location.
 ///
 /// This adjusts links, such as turning `.md` extensions to `.html`.
@ -332,8 +355,9 @@ more text with spaces
        }
    }
-    mod html_munging {
+    #[allow(deprecated)]
-        use super::super::{id_from_content, normalize_id};
+    mod id_from_content {
        use super::super::id_from_content;
        #[test]
        fn it_generates_anchors() {
@ -361,6 +385,10 @@ more text with spaces
            );
            assert_eq!(id_from_content("## Über"), "Über");
        }
    }
    mod html_munging {
        use super::super::{normalize_id, unique_id_from_content};
        #[test]
        fn it_normalizes_ids() {
@ -379,5 +407,28 @@ more text with spaces
            assert_eq!(normalize_id("한국어"), "한국어");
            assert_eq!(normalize_id(""), "");
        }
        #[test]
        fn it_generates_unique_ids_from_content() {
            // Same id if not given shared state
            assert_eq!(
                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
                "中文標題-cjk-title"
            );
            assert_eq!(
                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
                "中文標題-cjk-title"
            );
            // Different id if given shared state
            let mut id_counter = Default::default();
            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
            assert_eq!(
                unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
                "中文標題-cjk-title"
            );
            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
        }
    }
 }
--- a/tests/dummy_book/src/SUMMARY.md
+++ b/tests/dummy_book/src/SUMMARY.md
@ -13,6 +13,7 @@
    - [Markdown](first/markdown.md)
    - [Unicode](first/unicode.md)
    - [No Headers](first/no-headers.md)
    - [Duplicate Headers](first/duplicate-headers.md)
 - [Second Chapter](second.md)
    - [Nested Chapter](second/nested.md)
--- a/tests/dummy_book/src/first/duplicate-headers.md
+++ b/tests/dummy_book/src/first/duplicate-headers.md
@ -0,0 +1,9 @@
 # Duplicate headers
 This page validates behaviour of duplicate headers.
 # Header Text
 # Header Text
 # header-text
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
    "1.4. Markdown",
    "1.5. Unicode",
    "1.6. No Headers",
    "1.7. Duplicate Headers",
    "2.1. Nested Chapter",
 ];
@ -633,11 +634,12 @@ mod search {
        let some_section = get_doc_ref("first/index.html#some-section");
        let summary = get_doc_ref("first/includes.html#summary");
        let no_headers = get_doc_ref("first/no-headers.html");
        let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
        let conclusion = get_doc_ref("conclusion.html#conclusion");
        let bodyidx = &index["index"]["index"]["body"]["root"];
        let textidx = &bodyidx["t"]["e"]["x"]["t"];
-        assert_eq!(textidx["df"], 2);
+        assert_eq!(textidx["df"], 5);
        assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
        assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
@ -646,7 +648,7 @@ mod search {
        assert_eq!(docs[&some_section]["body"], "");
        assert_eq!(
            docs[&summary]["body"],
-            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
+            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
        );
        assert_eq!(
            docs[&summary]["breadcrumbs"],
@ -657,6 +659,10 @@ mod search {
            docs[&no_headers]["breadcrumbs"],
            "First Chapter » No Headers"
        );
        assert_eq!(
            docs[&duplicate_headers_1]["breadcrumbs"],
            "First Chapter » Duplicate Headers » Header Text"
        );
        assert_eq!(
            docs[&no_headers]["body"],
            "Capybara capybara capybara. Capybara capybara capybara."
--- a/tests/searchindex_fixture.json
+++ b/tests/searchindex_fixture.json