Scope splitting syntect "boring" implementation

This PR attempts to get a syntect implementation that actually works, by manipulating the scope stack directly instead of trying to post-process the HTML. It takes strings like this: let _t = "interesting string \# boring string "; And produces DOMs that look like this: <span class="syn-source syn-rust"> <span class="syn-storage syn-type syn-rust">let</span> _t <span class="syn-keyword syn-operator syn-assignment syn-rust">=</span> <span class="syn-string syn-quoted syn-double syn-rust"> <span class="syn-punctuation syn-definition syn-string syn-begin syn-rust">"</span> interesting string </span> </span> <span class="boring"> <span class="syn-source syn-rust"> <span class="syn-string syn-quoted syn-double syn-rust">boring string</span> </span> </span> <span class="syn-source syn-rust"> <span class="syn-string syn-quoted syn-double syn-rust"> <span class="syn-punctuation syn-definition syn-string syn-end syn-rust">"</span> </span> <span class="syn-punctuation syn-terminator syn-rust">;</span> </span> In other words, it splits it up the same way a WYSIWYG editor might if you tried to apply a block style to a deeply-nested selection; it maintains the styles, but always ensures "boring" is top-level. It doesn't produce optimal HTML, but it should always work.
2021-09-17 11:55:00 -07:00 · 2021-09-17 11:55:00 -07:00 · 14250259ef
parent 83e4915ab2
commit 14250259ef
3 changed files with 66 additions and 8 deletions
--- a/src/utils/highlight.rs
+++ b/src/utils/highlight.rs
@ -7,7 +7,7 @@ use std::borrow::Cow;
 use regex::Regex;
 use syntect::{
    html::{self, ClassStyle},
-    parsing::{ParseState, ScopeStack, SyntaxReference, SyntaxSet},
+    parsing::{ParseState, Scope, ScopeStack, ScopeStackOp, SyntaxReference, SyntaxSet},
 };

 pub struct HtmlGenerator<'a> {
@ -42,19 +42,57 @@ impl<'a> HtmlGenerator<'a> {
        } else {
            (Cow::from(line), false)
        };
-        let parsed_line = self.parse_state.parse_line(&line, self.syntaxes);
-        let (formatted_line, delta) = html::line_tokens_to_classed_spans(
+        let parsed_line = if did_boringify {
+            // The empty scope is a valid prefix of every other scope.
+            // If we tried to just use a scope called "boring", we'd need to modify
+            // the Rust syntax definition.
+            let boring = Scope::new("").expect("boring is a valid scope");
+            // Close all open spans, insert `boring`, then re-open all of them.
+            // `boring` must be at the very top, so that the parser doesn't touch it.
+            let mut final_parsed_line = Vec::new();
+            if self.scope_stack.len() != 0 {
+                final_parsed_line.push((0, ScopeStackOp::Pop(self.scope_stack.len())));
+            }
+            final_parsed_line.push((0, ScopeStackOp::Push(boring.clone())));
+            for item in &self.scope_stack.scopes {
+                final_parsed_line.push((0, ScopeStackOp::Push(item.clone())));
+            }
+            // Now run the parser.
+            // It should see basically the stack it expects, except the `boring` at the very top,
+            // which it shouldn't touch because it doesn't know it's there.
+            let inner_parsed_line = self.parse_state.parse_line(&line, self.syntaxes);
+            final_parsed_line.extend_from_slice(&inner_parsed_line);
+            // Figure out what the final stack is.
+            let mut stack_at_end = self.scope_stack.clone();
+            for (_, item) in inner_parsed_line {
+                stack_at_end.apply(&item);
+            }
+            // Pop everything, including `boring`.
+            final_parsed_line.push((line.len(), ScopeStackOp::Pop(stack_at_end.len() + 1)));
+            // Push all the state back on at the end.
+            for item in stack_at_end.scopes.into_iter() {
+                final_parsed_line.push((line.len(), ScopeStackOp::Push(item)));
+            }
+            final_parsed_line
+        } else {
+            self.parse_state.parse_line(&line, self.syntaxes)
+        };
+        let (mut formatted_line, delta) = html::line_tokens_to_classed_spans(
            &line,
            parsed_line.as_slice(),
            self.style,
            &mut self.scope_stack,
        );
+        if did_boringify {
+            // Since the boring scope is preceded only by a Pop operation,
+            // it must be the first match on the line for <span class="">
+            formatted_line = formatted_line.replace(
+                r#"<span class="">"#,
+                r#"<span class="boring">"#,
+                );
+        }
        self.open_spans += delta;
-        self.html.push_str(&if did_boringify {
-            format!("<span class=\"boring\">{}</span>", formatted_line)
-        } else {
-            formatted_line
-        });
+        self.html.push_str(&formatted_line);
    }

    pub fn finalize(mut self) -> String {
--- a/tests/dummy_book/src/example.rs
+++ b/tests/dummy_book/src/example.rs
@ -3,4 +3,9 @@ fn main() {
 #
 #    // You can even hide lines! :D
 #   println!("I am hidden! Expand the code snippet to see me");
+
+    // You can hide lines within string literals.
+    let _t = "interesting string
+# boring string
+    ";
 }
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@ -200,6 +200,21 @@ fn rustdoc_include_hides_the_unspecified_part_of_the_file() {
    assert_contains_strings(nested, &text);
 }

+#[test]
+fn boringify_properly_splits_string() {
+    let temp = DummyBook::new().build().unwrap();
+    let md = MDBook::load(temp.path()).unwrap();
+    md.build().unwrap();
+
+    let nested = temp.path().join("book/second.html");
+    let text = vec![
+        r#"<span class="syn-string syn-quoted syn-double syn-rust"><span class="syn-punctuation syn-definition syn-string syn-begin syn-rust">&quot;</span>interesting string"#,
+        r#"</span></span></span></span><span class="boring"><span class="syn-source syn-rust"><span class="syn-meta syn-function syn-rust"><span class="syn-meta syn-block syn-rust"><span class="syn-string syn-quoted syn-double syn-rust">boring string"#,
+    ];
+
+    assert_contains_strings(nested, &text);
+}
+
 #[test]
 fn chapter_content_appears_in_rendered_document() {
    let content = vec![