Scope splitting syntect "boring" implementation

This PR attempts to get a syntect implementation that actually works,
by manipulating the scope stack directly instead of trying to post-process
the HTML.

It takes strings like this:

    let _t = "interesting string
    \# boring string
    ";

And produces DOMs that look like this:

    <span class="syn-source syn-rust">
        <span class="syn-storage syn-type syn-rust">let</span>
        _t
        <span class="syn-keyword syn-operator syn-assignment syn-rust">=</span>
        <span class="syn-string syn-quoted syn-double syn-rust">
            <span class="syn-punctuation syn-definition syn-string syn-begin syn-rust">&quot;</span>
            interesting string
        </span>
    </span>
    <span class="boring">
        <span class="syn-source syn-rust">
            <span class="syn-string syn-quoted syn-double syn-rust">boring string</span>
        </span>
    </span>
    <span class="syn-source syn-rust">
        <span class="syn-string syn-quoted syn-double syn-rust">
            <span class="syn-punctuation syn-definition syn-string syn-end syn-rust">&quot;</span>
        </span>
        <span class="syn-punctuation syn-terminator syn-rust">;</span>
    </span>

In other words, it splits it up the same way a WYSIWYG editor might if you tried to apply a block
style to a deeply-nested selection; it maintains the styles, but always ensures "boring" is top-level.
It doesn't produce optimal HTML, but it should always work.
This commit is contained in:
Michael Howell 2021-09-17 11:55:00 -07:00
parent 83e4915ab2
commit 14250259ef
3 changed files with 66 additions and 8 deletions

View File

@ -7,7 +7,7 @@ use std::borrow::Cow;
use regex::Regex;
use syntect::{
html::{self, ClassStyle},
parsing::{ParseState, ScopeStack, SyntaxReference, SyntaxSet},
parsing::{ParseState, Scope, ScopeStack, ScopeStackOp, SyntaxReference, SyntaxSet},
};
pub struct HtmlGenerator<'a> {
@ -42,19 +42,57 @@ impl<'a> HtmlGenerator<'a> {
} else {
(Cow::from(line), false)
};
let parsed_line = self.parse_state.parse_line(&line, self.syntaxes);
let (formatted_line, delta) = html::line_tokens_to_classed_spans(
let parsed_line = if did_boringify {
// The empty scope is a valid prefix of every other scope.
// If we tried to just use a scope called "boring", we'd need to modify
// the Rust syntax definition.
let boring = Scope::new("").expect("boring is a valid scope");
// Close all open spans, insert `boring`, then re-open all of them.
// `boring` must be at the very top, so that the parser doesn't touch it.
let mut final_parsed_line = Vec::new();
if self.scope_stack.len() != 0 {
final_parsed_line.push((0, ScopeStackOp::Pop(self.scope_stack.len())));
}
final_parsed_line.push((0, ScopeStackOp::Push(boring.clone())));
for item in &self.scope_stack.scopes {
final_parsed_line.push((0, ScopeStackOp::Push(item.clone())));
}
// Now run the parser.
// It should see basically the stack it expects, except the `boring` at the very top,
// which it shouldn't touch because it doesn't know it's there.
let inner_parsed_line = self.parse_state.parse_line(&line, self.syntaxes);
final_parsed_line.extend_from_slice(&inner_parsed_line);
// Figure out what the final stack is.
let mut stack_at_end = self.scope_stack.clone();
for (_, item) in inner_parsed_line {
stack_at_end.apply(&item);
}
// Pop everything, including `boring`.
final_parsed_line.push((line.len(), ScopeStackOp::Pop(stack_at_end.len() + 1)));
// Push all the state back on at the end.
for item in stack_at_end.scopes.into_iter() {
final_parsed_line.push((line.len(), ScopeStackOp::Push(item)));
}
final_parsed_line
} else {
self.parse_state.parse_line(&line, self.syntaxes)
};
let (mut formatted_line, delta) = html::line_tokens_to_classed_spans(
&line,
parsed_line.as_slice(),
self.style,
&mut self.scope_stack,
);
if did_boringify {
// Since the boring scope is preceded only by a Pop operation,
// it must be the first match on the line for <span class="">
formatted_line = formatted_line.replace(
r#"<span class="">"#,
r#"<span class="boring">"#,
);
}
self.open_spans += delta;
self.html.push_str(&if did_boringify {
format!("<span class=\"boring\">{}</span>", formatted_line)
} else {
formatted_line
});
self.html.push_str(&formatted_line);
}
pub fn finalize(mut self) -> String {

View File

@ -3,4 +3,9 @@ fn main() {
#
# // You can even hide lines! :D
# println!("I am hidden! Expand the code snippet to see me");
// You can hide lines within string literals.
let _t = "interesting string
# boring string
";
}

View File

@ -200,6 +200,21 @@ fn rustdoc_include_hides_the_unspecified_part_of_the_file() {
assert_contains_strings(nested, &text);
}
#[test]
fn boringify_properly_splits_string() {
let temp = DummyBook::new().build().unwrap();
let md = MDBook::load(temp.path()).unwrap();
md.build().unwrap();
let nested = temp.path().join("book/second.html");
let text = vec![
r#"<span class="syn-string syn-quoted syn-double syn-rust"><span class="syn-punctuation syn-definition syn-string syn-begin syn-rust">&quot;</span>interesting string"#,
r#"</span></span></span></span><span class="boring"><span class="syn-source syn-rust"><span class="syn-meta syn-function syn-rust"><span class="syn-meta syn-block syn-rust"><span class="syn-string syn-quoted syn-double syn-rust">boring string"#,
];
assert_contains_strings(nested, &text);
}
#[test]
fn chapter_content_appears_in_rendered_document() {
let content = vec![