From 07459aef60da8e6f9e9500a34fe078edf90ecc1e Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 23 Jul 2019 20:45:43 -0400 Subject: [PATCH 1/7] Factor out the use of different ranges from linktype This eliminates some duplication and will enable different kinds of LinkTypes to have line number ranges. Implement `From` for the std `Range` types to enable easier construction. The new code reaaalllly makes me wish for a delegation mechanism though :( --- src/preprocess/links.rs | 127 +++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 60 deletions(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 2a469cde..08d36dcd 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -2,7 +2,7 @@ use crate::errors::*; use crate::utils::{take_anchored_lines, take_lines}; use regex::{CaptureMatches, Captures, Regex}; use std::fs; -use std::ops::{Range, RangeFrom, RangeFull, RangeTo}; +use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo}; use std::path::{Path, PathBuf}; use super::{Preprocessor, PreprocessorContext}; @@ -102,23 +102,70 @@ where #[derive(PartialEq, Debug, Clone)] enum LinkType<'a> { Escaped, - IncludeRange(PathBuf, Range), - IncludeRangeFrom(PathBuf, RangeFrom), - IncludeRangeTo(PathBuf, RangeTo), - IncludeRangeFull(PathBuf, RangeFull), + IncludeRange(PathBuf, LineRange), IncludeAnchor(PathBuf, String), Playpen(PathBuf, Vec<&'a str>), } +// A range of lines specified with some include directive. +#[derive(PartialEq, Debug, Clone)] +enum LineRange { + Range(Range), + RangeFrom(RangeFrom), + RangeTo(RangeTo), + RangeFull(RangeFull), +} + +impl RangeBounds for LineRange { + fn start_bound(&self) -> Bound<&usize> { + match self { + LineRange::Range(r) => r.start_bound(), + LineRange::RangeFrom(r) => r.start_bound(), + LineRange::RangeTo(r) => r.start_bound(), + LineRange::RangeFull(r) => r.start_bound(), + } + } + + fn end_bound(&self) -> Bound<&usize> { + match self { + LineRange::Range(r) => r.end_bound(), + LineRange::RangeFrom(r) => r.end_bound(), + LineRange::RangeTo(r) => r.end_bound(), + LineRange::RangeFull(r) => r.end_bound(), + } + } +} + +impl From> for LineRange { + fn from(r: Range) -> LineRange { + LineRange::Range(r) + } +} + +impl From> for LineRange { + fn from(r: RangeFrom) -> LineRange { + LineRange::RangeFrom(r) + } +} + +impl From> for LineRange { + fn from(r: RangeTo) -> LineRange { + LineRange::RangeTo(r) + } +} + +impl From for LineRange { + fn from(r: RangeFull) -> LineRange { + LineRange::RangeFull(r) + } +} + impl<'a> LinkType<'a> { fn relative_path>(self, base: P) -> Option { let base = base.as_ref(); match self { LinkType::Escaped => None, LinkType::IncludeRange(p, _) => Some(return_relative_path(base, &p)), - LinkType::IncludeRangeFrom(p, _) => Some(return_relative_path(base, &p)), - LinkType::IncludeRangeTo(p, _) => Some(return_relative_path(base, &p)), - LinkType::IncludeRangeFull(p, _) => Some(return_relative_path(base, &p)), LinkType::IncludeAnchor(p, _) => Some(return_relative_path(base, &p)), LinkType::Playpen(p, _) => Some(return_relative_path(base, &p)), } @@ -155,24 +202,21 @@ fn parse_include_path(path: &str) -> LinkType<'static> { let end = end.and_then(|s| s.parse::().ok()); match start { Some(start) => match end { - Some(end) => LinkType::IncludeRange(path, Range { start, end }), + Some(end) => LinkType::IncludeRange(path, LineRange::from(start..end)), None => { if has_end { - LinkType::IncludeRangeFrom(path, RangeFrom { start }) + LinkType::IncludeRange(path, LineRange::from(start..)) } else { LinkType::IncludeRange( path, - Range { - start, - end: start + 1, - }, + LineRange::from(start..start + 1), ) } } }, None => match end { - Some(end) => LinkType::IncludeRangeTo(path, RangeTo { end }), - None => LinkType::IncludeRangeFull(path, RangeFull), + Some(end) => LinkType::IncludeRange(path, LineRange::from(..end)), + None => LinkType::IncludeRange(path, LineRange::from(RangeFull)), }, } } @@ -233,43 +277,6 @@ impl<'a> Link<'a> { ) }) } - LinkType::IncludeRangeFrom(ref pat, ref range) => { - let target = base.join(pat); - - fs::read_to_string(&target) - .map(|s| take_lines(&s, range.clone())) - .chain_err(|| { - format!( - "Could not read file for link {} ({})", - self.link_text, - target.display(), - ) - }) - } - LinkType::IncludeRangeTo(ref pat, ref range) => { - let target = base.join(pat); - - fs::read_to_string(&target) - .map(|s| take_lines(&s, *range)) - .chain_err(|| { - format!( - "Could not read file for link {} ({})", - self.link_text, - target.display(), - ) - }) - } - LinkType::IncludeRangeFull(ref pat, _) => { - let target = base.join(pat); - - fs::read_to_string(&target).chain_err(|| { - format!( - "Could not read file for link {} ({})", - self.link_text, - target.display() - ) - }) - } LinkType::IncludeAnchor(ref pat, ref anchor) => { let target = base.join(pat); @@ -421,7 +428,7 @@ mod tests { vec![Link { start_index: 22, end_index: 48, - link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), 9..20), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(9..20)), link_text: "{{#include file.rs:10:20}}", }] ); @@ -437,7 +444,7 @@ mod tests { vec![Link { start_index: 22, end_index: 45, - link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), 9..10), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(9..10)), link_text: "{{#include file.rs:10}}", }] ); @@ -453,7 +460,7 @@ mod tests { vec![Link { start_index: 22, end_index: 46, - link_type: LinkType::IncludeRangeFrom(PathBuf::from("file.rs"), 9..), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(9..)), link_text: "{{#include file.rs:10:}}", }] ); @@ -469,7 +476,7 @@ mod tests { vec![Link { start_index: 22, end_index: 46, - link_type: LinkType::IncludeRangeTo(PathBuf::from("file.rs"), ..20), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(..20)), link_text: "{{#include file.rs::20}}", }] ); @@ -485,7 +492,7 @@ mod tests { vec![Link { start_index: 22, end_index: 44, - link_type: LinkType::IncludeRangeFull(PathBuf::from("file.rs"), ..), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(..)), link_text: "{{#include file.rs::}}", }] ); @@ -501,7 +508,7 @@ mod tests { vec![Link { start_index: 22, end_index: 42, - link_type: LinkType::IncludeRangeFull(PathBuf::from("file.rs"), ..), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(..)), link_text: "{{#include file.rs}}", }] ); @@ -587,7 +594,7 @@ mod tests { Link { start_index: 38, end_index: 58, - link_type: LinkType::IncludeRangeFull(PathBuf::from("file.rs"), ..), + link_type: LinkType::IncludeRange(PathBuf::from("file.rs"), LineRange::from(..)), link_text: "{{#include file.rs}}", } ); From 50a2ec3cf1eb54fa7e57f0e1cee05dbce7bf917f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sun, 4 Aug 2019 21:20:45 -0400 Subject: [PATCH 2/7] Ensure the iterator will always return None after the first None I'm not sure in what cases this iterator might possibly return Some again, but let's make absolutely sure. --- src/preprocess/links.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 08d36dcd..4b7354e0 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -180,7 +180,7 @@ fn return_relative_path>(base: P, relative: P) -> PathBuf { } fn parse_include_path(path: &str) -> LinkType<'static> { - let mut parts = path.split(':'); + let mut parts = path.split(':').fuse(); let path = parts.next().unwrap().into(); let next_element = parts.next(); From 3716123e10c363c973dd2d44308532e839f93a44 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sun, 4 Aug 2019 21:52:14 -0400 Subject: [PATCH 3/7] Increase test coverage of parse_include_path --- src/preprocess/links.rs | 134 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 4b7354e0..d7a36a19 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -621,4 +621,138 @@ mod tests { ); } + #[test] + fn parse_without_colon_includes_all() { + let link_type = parse_include_path("arbitrary"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(RangeFull)) + ); + } + + #[test] + fn parse_with_nothing_after_colon_includes_all() { + let link_type = parse_include_path("arbitrary:"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(RangeFull)) + ); + } + + #[test] + fn parse_with_two_colons_includes_all() { + let link_type = parse_include_path("arbitrary::"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(RangeFull)) + ); + } + + #[test] + fn parse_with_garbage_after_two_colons_includes_all() { + let link_type = parse_include_path("arbitrary::NaN"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(RangeFull)) + ); + } + + #[test] + fn parse_with_one_number_after_colon_only_that_line() { + let link_type = parse_include_path("arbitrary:5"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(4..5)) + ); + } + + #[test] + fn parse_with_one_based_start_becomes_zero_based() { + let link_type = parse_include_path("arbitrary:1"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(0..1)) + ); + } + + #[test] + fn parse_with_zero_based_start_stays_zero_based_but_is_probably_an_error() { + let link_type = parse_include_path("arbitrary:0"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(0..1)) + ); + } + + #[test] + fn parse_start_only_range() { + let link_type = parse_include_path("arbitrary:5:"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(4..)) + ); + } + + #[test] + fn parse_start_with_garbage_interpreted_as_start_only_range() { + let link_type = parse_include_path("arbitrary:5:NaN"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(4..)) + ); + } + + #[test] + fn parse_end_only_range() { + let link_type = parse_include_path("arbitrary::5"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(..5)) + ); + } + + #[test] + fn parse_start_and_end_range() { + let link_type = parse_include_path("arbitrary:5:10"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(4..10)) + ); + } + + #[test] + fn parse_with_negative_interpreted_as_anchor() { + let link_type = parse_include_path("arbitrary:-5"); + assert_eq!( + link_type, + LinkType::IncludeAnchor(PathBuf::from("arbitrary"), "-5".to_string()) + ); + } + + #[test] + fn parse_with_floating_point_interpreted_as_anchor() { + let link_type = parse_include_path("arbitrary:-5.7"); + assert_eq!( + link_type, + LinkType::IncludeAnchor(PathBuf::from("arbitrary"), "-5.7".to_string()) + ); + } + + #[test] + fn parse_with_anchor_followed_by_colon() { + let link_type = parse_include_path("arbitrary:some-anchor:this-gets-ignored"); + assert_eq!( + link_type, + LinkType::IncludeAnchor(PathBuf::from("arbitrary"), "some-anchor".to_string()) + ); + } + + #[test] + fn parse_with_more_than_three_colons_ignores_everything_after_third_colon() { + let link_type = parse_include_path("arbitrary:5:10:17:anything:"); + assert_eq!( + link_type, + LinkType::IncludeRange(PathBuf::from("arbitrary"), LineRange::from(4..10)) + ); + } } From d96844307462718acfc4cf19208aae2c3e0eb691 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 5 Aug 2019 20:03:51 -0400 Subject: [PATCH 4/7] Don't bother splitting the path after the 3rd colon --- src/preprocess/links.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index d7a36a19..2b435a3f 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -180,7 +180,7 @@ fn return_relative_path>(base: P, relative: P) -> PathBuf { } fn parse_include_path(path: &str) -> LinkType<'static> { - let mut parts = path.split(':').fuse(); + let mut parts = path.splitn(4, ':').fuse(); let path = parts.next().unwrap().into(); let next_element = parts.next(); From aa67245743ae366924ef2c870884f61053aafa69 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 5 Aug 2019 12:45:17 -0400 Subject: [PATCH 5/7] Unnest a conditional --- src/preprocess/links.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 2b435a3f..a9f9a0c7 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -187,12 +187,10 @@ fn parse_include_path(path: &str) -> LinkType<'static> { let start = if let Some(value) = next_element.and_then(|s| s.parse::().ok()) { // subtract 1 since line numbers usually begin with 1 Some(value.saturating_sub(1)) + } else if let Some("") = next_element { + None } else if let Some(anchor) = next_element { - if anchor == "" { - None - } else { - return LinkType::IncludeAnchor(path, String::from(anchor)); - } + return LinkType::IncludeAnchor(path, String::from(anchor)); } else { None }; From 40159362c08ed8713a05f26ae6c1f14f17c53c0f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 5 Aug 2019 12:56:07 -0400 Subject: [PATCH 6/7] Unnest another conditional --- src/preprocess/links.rs | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index a9f9a0c7..9a0bacf7 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -198,24 +198,15 @@ fn parse_include_path(path: &str) -> LinkType<'static> { let end = parts.next(); let has_end = end.is_some(); let end = end.and_then(|s| s.parse::().ok()); - match start { - Some(start) => match end { - Some(end) => LinkType::IncludeRange(path, LineRange::from(start..end)), - None => { - if has_end { - LinkType::IncludeRange(path, LineRange::from(start..)) - } else { - LinkType::IncludeRange( - path, - LineRange::from(start..start + 1), - ) - } - } - }, - None => match end { - Some(end) => LinkType::IncludeRange(path, LineRange::from(..end)), - None => LinkType::IncludeRange(path, LineRange::from(RangeFull)), - }, + + match (start, end, has_end) { + (Some(start), Some(end), _) => LinkType::IncludeRange(path, LineRange::from(start..end)), + (Some(start), None, true) => LinkType::IncludeRange(path, LineRange::from(start..)), + (Some(start), None, false) => { + LinkType::IncludeRange(path, LineRange::from(start..start + 1)) + } + (None, Some(end), _) => LinkType::IncludeRange(path, LineRange::from(..end)), + (None, None, _) => LinkType::IncludeRange(path, LineRange::from(RangeFull)), } } From 8c4b292d5849d21bc0d5b6d122695042a94c5b38 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 5 Aug 2019 19:49:43 -0400 Subject: [PATCH 7/7] Rework a match to possibly be more understandable --- src/preprocess/links.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 9a0bacf7..624d74d6 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -196,17 +196,19 @@ fn parse_include_path(path: &str) -> LinkType<'static> { }; let end = parts.next(); - let has_end = end.is_some(); - let end = end.and_then(|s| s.parse::().ok()); + // If `end` is empty string or any other value that can't be parsed as a usize, treat this + // include as a range with only a start bound. However, if end isn't specified, include only + // the single line specified by `start`. + let end = end.map(|s| s.parse::()); - match (start, end, has_end) { - (Some(start), Some(end), _) => LinkType::IncludeRange(path, LineRange::from(start..end)), - (Some(start), None, true) => LinkType::IncludeRange(path, LineRange::from(start..)), - (Some(start), None, false) => { - LinkType::IncludeRange(path, LineRange::from(start..start + 1)) + match (start, end) { + (Some(start), Some(Ok(end))) => LinkType::IncludeRange(path, LineRange::from(start..end)), + (Some(start), Some(Err(_))) => LinkType::IncludeRange(path, LineRange::from(start..)), + (Some(start), None) => LinkType::IncludeRange(path, LineRange::from(start..start + 1)), + (None, Some(Ok(end))) => LinkType::IncludeRange(path, LineRange::from(..end)), + (None, None) | (None, Some(Err(_))) => { + LinkType::IncludeRange(path, LineRange::from(RangeFull)) } - (None, Some(end), _) => LinkType::IncludeRange(path, LineRange::from(..end)), - (None, None, _) => LinkType::IncludeRange(path, LineRange::from(RangeFull)), } }