ProvableHQ · acoglio · Feb 12, 2025 · Feb 14, 2025
diff --git a/console/network/environment/src/helpers/sanitizer.rs b/console/network/environment/src/helpers/sanitizer.rs
@@ -51,7 +51,7 @@ impl Sanitizer {
         )(string)
     }
 
-    /// Parse a safe character (in the sense explained in [string_parser::is_char_supported]).
+    /// Parse a safe character (in the sense explained in [is_char_supported]).
     /// Returns an error if no character is found or a non-safe character is found.
     /// The character is returned, along with the remaining input.
     ///
@@ -84,9 +84,8 @@ impl Sanitizer {
         }
     }
 
-    /// A newline parser that accepts:
-    ///
-    /// - A newline.
+    /// A parser that accepts:
+    /// - A newline, either `CR LF` or just `LF`.
     /// - The end of input.
     fn eol(string: &str) -> ParserResult<()> {
         alt((
@@ -113,15 +112,19 @@ impl Sanitizer {
 
     /// Parse a string until the end of line.
     ///
-    /// This parser accepts the multiline annotation (\) to break the string on several lines.
+    /// This parser accepts the multiline annotation (`\ LF`) to break the string on several lines.
+    ///
+    /// The line may end with a newline (either `CR LF` or just `LF`), or it may end with the input.
     ///
-    /// Discard any leading newline.
+    /// Return the body of the comment, i.e. what is between `//` and the end of line.
+    /// If the line ends with `CR LF`, the `CR` is included in the returned body.
+    /// The `LF`, if present, is never included in the returned body.
     fn str_till_eol(string: &str) -> ParserResult<&str> {
         // A heuristic approach is applied here in order to avoid costly parsing operations in the
         // most common scenarios: non-parsing methods are used to verify if the string has multiple
         // lines and if there are any unsafe characters.
         if let Some((before, after)) = string.split_once('\n') {
-            let is_multiline = before.ends_with('\\');
+            let is_multiline = before.ends_with('\\'); // is `LF` preceded by `\`?
 
             if !is_multiline {
                 let contains_unsafe_chars = !before.chars().all(is_char_supported);
@@ -130,7 +133,8 @@ impl Sanitizer {
                     Ok((after, before))
                 } else {
                     // `eoi` is used here instead of `eol`, since the earlier call to `split_once`
-                    // already removed the newline
+                    // already removed the `LF`. This will fail at the first unsafe character,
+                    // which is known to exist because we are under the condition contains_unsafe_chars.
                     recognize(Self::till(value((), Sanitizer::parse_safe_char), Self::eoi))(before)
                 }
             } else {
@@ -140,12 +144,19 @@ impl Sanitizer {
                         Self::eol,
                     )),
                     |i| {
+                        // Exclude the final `LF`, if any, from the comment body.
                         if i.as_bytes().last() == Some(&b'\n') { &i[0..i.len() - 1] } else { i }
                     },
                 )(string)
             }
-        } else {
+        } else if string.chars().all(is_char_supported) {
+            // There is no `LF`. We return all the characters up to the end of file.
             Ok((string, ""))
+        } else {
+            // `eoi` is used here because we are under the condition that there is no newline.
+            // This will fail at the first unsafe character, which is known to exist because
+            // we are under the condition that not all characters are safe.
+            recognize(Self::till(value((), Sanitizer::parse_safe_char), Self::eoi))(string)
         }
     }
 
@@ -303,5 +314,6 @@ mod tests {
         assert!(Sanitizer::parse_comments("/* hel\u{202d}lo */\nhello world").is_err());
         assert!(Sanitizer::parse_comments("/** hel\x00lo */\nhello world").is_err());
         assert!(Sanitizer::parse_comments("/** hel\u{202a}lo */\nhello world").is_err());
+        assert!(Sanitizer::parse_comments("// unsafe \u{202a} no newline").is_err());
     }
 }
diff --git a/console/network/environment/src/traits/parse_string.rs b/console/network/environment/src/traits/parse_string.rs
@@ -55,7 +55,7 @@ pub mod string_parser {
     /// Other ASCII control characters
     /// (except for horizontal tab, space, line feed, and carriage return, which are allowed)
     /// may or may not present dangers, but we see no good reason for allowing them.
-    /// At some point we may want disallow additional non-ASCII characters,
+    /// At some point we may want to disallow additional non-ASCII characters,
     /// if we see no good reason to allow them.
     ///
     /// Note that we say 'Unicode scalar values' above,
@@ -84,8 +84,8 @@ pub mod string_parser {
         }
     }
 
-    /// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6
-    /// hexadecimal numerals. We will combine this later with parse_escaped_char
+    /// Parse a Unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6
+    /// hexadecimal numerals. We will combine this later with [parse_escaped_char]
     /// to parse sequences like \u{00AC}.
     fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
     where
@@ -112,7 +112,7 @@ pub mod string_parser {
 
         // map_opt is like map_res, but it takes an Option instead of a Result. If
         // the function returns None, map_opt returns an error. In this case, because
-        // not all u32 values are valid unicode code points, we have to fallibly
+        // not all u32 values are valid Unicode code points, we have to fallibly
         // convert to char with from_u32.
         map_opt(parse_u32, std::char::from_u32)(input)
     }
@@ -130,8 +130,8 @@ pub mod string_parser {
                 parse_unicode,
                 // The `value` parser returns a fixed value (the first argument) if its
                 // parser (the second argument) succeeds. In these cases, it looks for
-                // the marker characters (n, r, t, etc) and returns the matching
-                // character (\n, \r, \t, etc).
+                // the marker characters (n, r, t, etc.) and returns the matching
+                // character (\n, \r, \t, etc.).
                 value('\n', char('n')),
                 value('\r', char('r')),
                 value('\t', char('t')),
@@ -162,7 +162,7 @@ pub mod string_parser {
         let not_quote_slash = is_not("\"\\");
 
         // `verify` runs a parser, then runs a verification function on the output of
-        // the parser. The verification function accepts out output only if it
+        // the parser. The verification function accepts output only if it
         // returns true. In this case, we want to ensure that the output of is_not
         // is non-empty.
         verify(not_quote_slash, |s: &str| !s.is_empty())(input)

diff --git a/ledger/block/src/verify.rs b/ledger/block/src/verify.rs
@@ -251,7 +251,7 @@ impl<N: Network> Block<N> {
                 "Leader certificate has an incorrect committee ID"
             );
 
-            // Check that all all certificates on each round have the same committee ID.
+            // Check that all certificates on each round have the same committee ID.
             cfg_iter!(subdag).try_for_each(|(round, certificates)| {
                 // Check that every certificate for a given round shares the same committee ID.
                 let expected_committee_id = certificates