Add include by anchor in preprocessor.

2018-12-12 18:44:15 -05:00 · 2018-12-12 18:44:15 -05:00 · d29a79349c
parent d6088c8a57
commit d29a79349c
3 changed files with 119 additions and 8 deletions
--- a/src/preprocess/links.rs
+++ b/src/preprocess/links.rs
@ -1,5 +1,5 @@
 use crate::errors::*;
-use crate::utils::take_lines;
+use crate::utils::{take_anchored_lines, take_lines};
 use regex::{CaptureMatches, Captures, Regex};
 use std::fs;
 use std::ops::{Range, RangeFrom, RangeFull, RangeTo};
@ -106,6 +106,7 @@ enum LinkType<'a> {
    IncludeRangeFrom(PathBuf, RangeFrom<usize>),
    IncludeRangeTo(PathBuf, RangeTo<usize>),
    IncludeRangeFull(PathBuf, RangeFull),
    IncludeAnchor(PathBuf, String),
    Playpen(PathBuf, Vec<&'a str>),
 }
@ -118,6 +119,7 @@ impl<'a> LinkType<'a> {
            LinkType::IncludeRangeFrom(p, _) => Some(return_relative_path(base, &p)),
            LinkType::IncludeRangeTo(p, _) => Some(return_relative_path(base, &p)),
            LinkType::IncludeRangeFull(p, _) => Some(return_relative_path(base, &p)),
            LinkType::IncludeAnchor(p, _) => Some(return_relative_path(base, &p)),
            LinkType::Playpen(p, _) => Some(return_relative_path(base, &p)),
        }
    }
@ -133,11 +135,21 @@ fn return_relative_path<P: AsRef<Path>>(base: P, relative: P) -> PathBuf {
 fn parse_include_path(path: &str) -> LinkType<'static> {
    let mut parts = path.split(':');
    let path = parts.next().unwrap().into();
    let next_element = parts.next();
    let start = if let Some(value) = next_element.and_then(|s| s.parse::<usize>().ok()) {
        // subtract 1 since line numbers usually begin with 1
-    let start = parts
+        Some(value.saturating_sub(1))
-        .next()
+    } else if let Some(anchor) = next_element {
-        .and_then(|s| s.parse::<usize>().ok())
+        if anchor == "" {
-        .map(|val| val.saturating_sub(1));
+            None
        } else {
            return LinkType::IncludeAnchor(path, String::from(anchor));
        }
    } else {
        None
    };
    let end = parts.next();
    let has_end = end.is_some();
    let end = end.and_then(|s| s.parse::<usize>().ok());
@ -258,6 +270,19 @@ impl<'a> Link<'a> {
                    )
                })
            }
            LinkType::IncludeAnchor(ref pat, ref anchor) => {
                let target = base.join(pat);
                fs::read_to_string(&target)
                    .map(|s| take_anchored_lines(&s, anchor))
                    .chain_err(|| {
                        format!(
                            "Could not read file for link {} ({})",
                            self.link_text,
                            target.display(),
                        )
                    })
            }
            LinkType::Playpen(ref pat, ref attrs) => {
                let target = base.join(pat);
@ -482,6 +507,25 @@ mod tests {
        );
    }
    #[test]
    fn test_find_links_with_anchor() {
        let s = "Some random text with {{#include file.rs:anchor}}...";
        let res = find_links(s).collect::<Vec<_>>();
        println!("\nOUTPUT: {:?}\n", res);
        assert_eq!(
            res,
            vec![Link {
                start_index: 22,
                end_index: 49,
                link_type: LinkType::IncludeAnchor(
                    PathBuf::from("file.rs"),
                    String::from("anchor")
                ),
                link_text: "{{#include file.rs:anchor}}",
            }]
        );
    }
    #[test]
    fn test_find_links_escaped_link() {
        let s = "Some random text with escaped playpen \\{{#playpen file.rs editable}} ...";
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -11,7 +11,7 @@ use std::borrow::Cow;
 use std::fmt::Write;
 use std::path::Path;
-pub use self::string::take_lines;
+pub use self::string::{take_anchored_lines, take_lines};
 /// Replaces multiple consecutive whitespace characters with a single space character.
 pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
--- a/src/utils/string.rs
+++ b/src/utils/string.rs
@ -1,4 +1,5 @@
 use itertools::Itertools;
 use regex::Regex;
 use std::ops::Bound::{Excluded, Included, Unbounded};
 use std::ops::RangeBounds;
@ -17,9 +18,46 @@ pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String {
    }
 }
 /// Take anchored lines from a string.
 /// Lines containing anchor are ignored.
 pub fn take_anchored_lines(s: &str, anchor: &str) -> String {
    lazy_static! {
        static ref RE_START: Regex = Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap();
        static ref RE_END: Regex = Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap();
    }
    let mut retained = Vec::<&str>::new();
    let mut anchor_found = false;
    for l in s.lines() {
        if anchor_found {
            match RE_END.captures(l) {
                Some(cap) => {
                    if &cap["anchor_name"] == anchor {
                        break;
                    }
                }
                None => {
                    if !RE_START.is_match(l) {
                        retained.push(l);
                    }
                }
            }
        } else {
            if let Some(cap) = RE_START.captures(l) {
                if &cap["anchor_name"] == anchor {
                    anchor_found = true;
                }
            }
        }
    }
    retained.join("\n")
 }
 #[cfg(test)]
 mod tests {
-    use super::take_lines;
+    use super::{take_anchored_lines, take_lines};
    #[test]
    fn take_lines_test() {
@ -32,4 +70,33 @@ mod tests {
        assert_eq!(take_lines(s, 4..3), "");
        assert_eq!(take_lines(s, ..100), s);
    }
    #[test]
    fn take_anchored_lines_test() {
        let s = "Lorem\nipsum\ndolor\nsit\namet";
        assert_eq!(take_anchored_lines(s, "test"), "");
        let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet";
        assert_eq!(take_anchored_lines(s, "test"), "");
        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet";
        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
        assert_eq!(take_anchored_lines(s, "something"), "");
        let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
        assert_eq!(take_anchored_lines(s, "something"), "");
        let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum";
        assert_eq!(take_anchored_lines(s, "test"), "ipsum\ndolor\nsit\namet");
        assert_eq!(take_anchored_lines(s, "something"), "");
        let s = "Lorem\nANCHOR:    test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum";
        assert_eq!(
            take_anchored_lines(s, "test2"),
            "ipsum\ndolor\nsit\namet\nlorem"
        );
        assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet");
        assert_eq!(take_anchored_lines(s, "something"), "");
    }
 }