diff --git a/Cargo.toml b/Cargo.toml index 69c41abd..dcf2c377 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ clap = "2.2.1" handlebars = "0.20.0" rustc-serialize = "0.3.18" pulldown-cmark = "0.0.8" -nom = "1.2.3" +nom = "1.2.4" # Watch feature notify = { version = "2.5.5", optional = true } diff --git a/src/parse/nom.rs b/src/parse/nom.rs index 2c23f20d..8a44a5ae 100644 --- a/src/parse/nom.rs +++ b/src/parse/nom.rs @@ -1,4 +1,4 @@ -use nom::{self, IResult, alphanumeric}; +use nom::{self, IResult}; use std::str; use std::str::FromStr; @@ -15,6 +15,7 @@ fn link(i: &[u8]) -> IResult<&[u8], Link> { } /// Parser for parsing the title part of the link: [title](destination) +/// ^^^^^^^ /// /// From the Common Mark spec (http://spec.commonmark.org/0.26/#links): /// @@ -22,9 +23,42 @@ fn link(i: &[u8]) -> IResult<&[u8], Link> { /// (a) they are backslash-escaped or /// (b) they appear as a matched pair of brackets, /// with an open bracket [, a sequence of zero or more inlines, and a close bracket ]. -/// ^^^^^^^ +/// fn link_text(i: &[u8]) -> IResult<&[u8], String> { - map_res!(i, map_res!(delimited!(char!('['), is_not!("[]"), char!(']')), str::from_utf8), FromStr::from_str) + map_res!(i, + map_res!( + delimited!( + // Begin with '[' + char!('['), + // Followed by anything that is not '[' or ']' + // Make sure to allow escaped brackets and balanced brackets + recognize!(many1!(alt!(not_unescaped_bracket | balanced_brackets))), + // End with ']' + char!(']') + ), + str::from_utf8 + ), + FromStr::from_str + ) +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +fn balanced_brackets(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize!(i, + delimited!( + // Begin with '[' + char!('['), + // Followed by anything that is not '[' or ']' + // Make sure to allow escaped brackets and balanced brackets + many0!(alt!(not_unescaped_bracket | balanced_brackets)), + // End with ']' + char!(']') + ) + ) +} + +fn not_unescaped_bracket(i: &[u8]) -> IResult<&[u8], &[u8]> { + escaped!(i, is_not!("\\[]"), '\\', is_a_bytes!(&b"[]"[..])) } /// Parser for parsing the destination part of the link: [title](destination) @@ -37,6 +71,33 @@ fn link_destination(i: &[u8]) -> IResult<&[u8], String> { mod tests { use nom::{self, IResult}; + use nom::Err::{NodePosition, Position}; + use nom::ErrorKind::Escaped; + + // Tests not_unescaped_bracket + #[test] + fn not_unescaped_bracket() { + assert_eq!(super::not_unescaped_bracket(b"a"), IResult::Done(&b""[..], &b"a"[..])); + assert_eq!(super::not_unescaped_bracket(b"-"), IResult::Done(&b""[..], &b"-"[..])); + assert_eq!(super::not_unescaped_bracket(b"\\["), IResult::Done(&b""[..], &b"\\["[..])); + assert_eq!(super::not_unescaped_bracket(b"]"), IResult::Error(NodePosition(Escaped, &[93][..], Box::new(Position(Escaped, &[93][..]))))); + } + + // Tests for balanced brackets + #[test] + fn balanced_brackets() { + assert_eq!(super::balanced_brackets(b"[a]"), nom::IResult::Done(&b""[..], &b"[a]"[..])); + } + + #[test] + fn balanced_brackets_empty() { + assert_eq!(super::balanced_brackets(b"[]"), nom::IResult::Done(&b""[..], &b"[]"[..])); + } + + #[test] + fn balanced_brackets_nested() { + assert_eq!(super::balanced_brackets(b"[abc[a]]"), nom::IResult::Done(&b""[..], &b"[abc[a]]"[..])); + } // Tests for link_text @@ -66,6 +127,6 @@ mod tests { #[test] fn link_text_brackets() { assert_eq!(super::link_text(b"[Intro[]]"), nom::IResult::Done(&b""[..], String::from("Intro[]"))); - assert_eq!(super::link_text(br"[Intro\[]"), nom::IResult::Done(&b""[..], String::from("Intro["))); + assert_eq!(super::link_text(br"[Intro\]]"), nom::IResult::Done(&b""[..], String::from("Intro["))); } }