Skip to content

Commit fd5d5e6

Browse files
committed
fix(SourceCode): Fix and improve default SourceCode implementation
- Fix a panic when a 0 length span covers the end of a document - Fix incorrect `line_count` - Add new unit tests and complete existing ones - Improve readability
1 parent f88e331 commit fd5d5e6

File tree

1 file changed

+181
-72
lines changed

1 file changed

+181
-72
lines changed

src/source_impls.rs

+181-72
Original file line numberDiff line numberDiff line change
@@ -5,91 +5,99 @@ use std::{borrow::Cow, collections::VecDeque, fmt::Debug, sync::Arc};
55

66
use crate::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanContents};
77

8+
#[derive(Clone, Copy)]
9+
struct LineInfo {
10+
line_no: usize,
11+
start: usize,
12+
end: usize,
13+
}
14+
815
fn context_info<'a>(
916
input: &'a [u8],
1017
span: &SourceSpan,
1118
context_lines_before: usize,
1219
context_lines_after: usize,
1320
) -> Result<MietteSpanContents<'a>, MietteError> {
14-
let mut offset = 0usize;
15-
let mut line_count = 0usize;
16-
let mut start_line = 0usize;
17-
let mut start_column = 0usize;
18-
let mut before_lines_starts = VecDeque::new();
19-
let mut current_line_start = 0usize;
20-
let mut end_lines = 0usize;
21-
let mut post_span = false;
22-
let mut post_span_got_newline = false;
23-
let mut iter = input.iter().copied().peekable();
24-
while let Some(char) = iter.next() {
25-
if matches!(char, b'\r' | b'\n') {
26-
line_count += 1;
27-
if char == b'\r' && iter.next_if_eq(&b'\n').is_some() {
28-
offset += 1;
21+
let mut iter = input
22+
.split_inclusive(|b| *b == b'\n')
23+
.enumerate()
24+
.map(|(line_no, line)| {
25+
let offset = unsafe { line.as_ptr().offset_from(input.as_ptr()) as usize };
26+
LineInfo {
27+
line_no,
28+
start: offset,
29+
end: offset + line.len(),
2930
}
30-
if offset < span.offset() {
31-
// We're before the start of the span.
32-
start_column = 0;
33-
before_lines_starts.push_back(current_line_start);
34-
if before_lines_starts.len() > context_lines_before {
35-
start_line += 1;
36-
before_lines_starts.pop_front();
37-
}
38-
} else if offset >= span.offset() + span.len().saturating_sub(1) {
39-
// We're after the end of the span, but haven't necessarily
40-
// started collecting end lines yet (we might still be
41-
// collecting context lines).
42-
if post_span {
43-
start_column = 0;
44-
if post_span_got_newline {
45-
end_lines += 1;
46-
} else {
47-
post_span_got_newline = true;
48-
}
49-
if end_lines >= context_lines_after {
50-
offset += 1;
51-
break;
52-
}
53-
}
54-
}
55-
current_line_start = offset + 1;
56-
} else if offset < span.offset() {
57-
start_column += 1;
58-
}
31+
});
5932

60-
if offset >= (span.offset() + span.len()).saturating_sub(1) {
61-
post_span = true;
62-
if end_lines >= context_lines_after {
63-
offset += 1;
64-
break;
65-
}
33+
// First line handled separately because otherwise, we can't distinguish
34+
// between `None` from an empty document (which we still want to handle as
35+
// a "single empty line"), and `None` from the end of the document.
36+
let mut line_starts = VecDeque::new();
37+
let mut line_info = match iter.next() {
38+
None => LineInfo {
39+
line_no: 0,
40+
start: 0,
41+
end: 0,
42+
},
43+
Some(info) => info,
44+
};
45+
line_starts.push_back(line_info);
46+
47+
// Get the "before" lines (including the line containing the start
48+
// of the span)
49+
while span.offset() >= line_info.end {
50+
line_info = match iter.next() {
51+
None => break,
52+
Some(info) => info,
53+
};
54+
55+
if line_starts.len() > context_lines_before {
56+
line_starts.pop_front();
6657
}
58+
line_starts.push_back(line_info);
59+
}
60+
let (start_lineno, start_offset, start_column) = {
61+
let start_info = line_starts.pop_front().unwrap();
62+
if context_lines_before > 0 {
63+
(start_info.line_no, start_info.start, 0)
64+
} else {
65+
(
66+
start_info.line_no,
67+
span.offset(),
68+
span.offset() - start_info.start,
69+
)
70+
}
71+
};
6772

68-
offset += 1;
73+
// Find the end of the span
74+
while span.offset() + span.len() > line_info.end {
75+
line_info = match iter.next() {
76+
None => break,
77+
Some(info) => info,
78+
};
6979
}
7080

71-
if offset >= (span.offset() + span.len()).saturating_sub(1) {
72-
let starting_offset = before_lines_starts.front().copied().unwrap_or_else(|| {
73-
if context_lines_before == 0 {
74-
span.offset()
75-
} else {
76-
0
77-
}
78-
});
79-
Ok(MietteSpanContents::new(
80-
&input[starting_offset..offset],
81-
(starting_offset, offset - starting_offset).into(),
82-
start_line,
83-
if context_lines_before == 0 {
84-
start_column
85-
} else {
86-
0
87-
},
88-
line_count,
89-
))
90-
} else {
91-
Err(MietteError::OutOfBounds)
81+
// Get the "after" lines
82+
if let Some(last) = iter.take(context_lines_after).last() {
83+
line_info = last;
84+
}
85+
if span.offset() + span.len() > line_info.end {
86+
return Err(MietteError::OutOfBounds);
9287
}
88+
let (end_lineno, end_offset) = if context_lines_after > 0 {
89+
(line_info.line_no, line_info.end)
90+
} else {
91+
(line_info.line_no, span.offset() + span.len())
92+
};
93+
94+
Ok(MietteSpanContents::new(
95+
&input[start_offset..end_offset],
96+
(start_offset..end_offset).into(),
97+
start_lineno,
98+
start_column,
99+
end_lineno - start_lineno + 1,
100+
))
93101
}
94102

95103
impl SourceCode for [u8] {
@@ -206,8 +214,10 @@ mod tests {
206214
let src = String::from("foo\n");
207215
let contents = src.read_span(&(0, 4).into(), 0, 0)?;
208216
assert_eq!("foo\n", std::str::from_utf8(contents.data()).unwrap());
217+
assert_eq!(SourceSpan::from((0, 4)), *contents.span());
209218
assert_eq!(0, contents.line());
210219
assert_eq!(0, contents.column());
220+
assert_eq!(1, contents.line_count());
211221
Ok(())
212222
}
213223

@@ -216,8 +226,10 @@ mod tests {
216226
let src = String::from("foobar");
217227
let contents = src.read_span(&(3, 3).into(), 1, 1)?;
218228
assert_eq!("foobar", std::str::from_utf8(contents.data()).unwrap());
229+
assert_eq!(SourceSpan::from((0, 6)), *contents.span());
219230
assert_eq!(0, contents.line());
220231
assert_eq!(0, contents.column());
232+
assert_eq!(1, contents.line_count());
221233
Ok(())
222234
}
223235

@@ -226,8 +238,10 @@ mod tests {
226238
let src = String::from("foo\nbar\nbaz\n");
227239
let contents = src.read_span(&(4, 4).into(), 0, 0)?;
228240
assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
241+
assert_eq!(SourceSpan::from((4, 4)), *contents.span());
229242
assert_eq!(1, contents.line());
230243
assert_eq!(0, contents.column());
244+
assert_eq!(1, contents.line_count());
231245
Ok(())
232246
}
233247

@@ -236,8 +250,58 @@ mod tests {
236250
let src = String::from("foo\nbarbar\nbaz\n");
237251
let contents = src.read_span(&(7, 4).into(), 0, 0)?;
238252
assert_eq!("bar\n", std::str::from_utf8(contents.data()).unwrap());
253+
assert_eq!(SourceSpan::from((7, 4)), *contents.span());
254+
assert_eq!(1, contents.line());
255+
assert_eq!(3, contents.column());
256+
assert_eq!(1, contents.line_count());
257+
Ok(())
258+
}
259+
260+
#[test]
261+
fn end_of_line_before_newline() -> Result<(), MietteError> {
262+
let src = String::from("foo\nbar\nbaz\n");
263+
let contents = src.read_span(&(7, 0).into(), 0, 0)?;
264+
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
265+
assert_eq!(SourceSpan::from((7, 0)), *contents.span());
239266
assert_eq!(1, contents.line());
240267
assert_eq!(3, contents.column());
268+
assert_eq!(1, contents.line_count());
269+
Ok(())
270+
}
271+
272+
#[test]
273+
fn end_of_line_after_newline() -> Result<(), MietteError> {
274+
let src = String::from("foo\nbar\nbaz\n");
275+
let contents = src.read_span(&(8, 0).into(), 0, 0)?;
276+
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
277+
assert_eq!(SourceSpan::from((8, 0)), *contents.span());
278+
assert_eq!(2, contents.line());
279+
assert_eq!(0, contents.column());
280+
assert_eq!(1, contents.line_count());
281+
Ok(())
282+
}
283+
284+
#[test]
285+
fn end_of_file_with_newline() -> Result<(), MietteError> {
286+
let src = String::from("foo\nbar\nbaz\n");
287+
let contents = src.read_span(&(12, 0).into(), 0, 0)?;
288+
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
289+
assert_eq!(SourceSpan::from((12, 0)), *contents.span());
290+
assert_eq!(2, contents.line());
291+
assert_eq!(4, contents.column());
292+
assert_eq!(1, contents.line_count());
293+
Ok(())
294+
}
295+
296+
#[test]
297+
fn end_of_file_without_newline() -> Result<(), MietteError> {
298+
let src = String::from("foo\nbar\nbaz");
299+
let contents = src.read_span(&(11, 0).into(), 0, 0)?;
300+
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
301+
assert_eq!(SourceSpan::from((11, 0)), *contents.span());
302+
assert_eq!(2, contents.line());
303+
assert_eq!(3, contents.column());
304+
assert_eq!(1, contents.line_count());
241305
Ok(())
242306
}
243307

@@ -246,8 +310,10 @@ mod tests {
246310
let src = String::from("foo\r\nbar\r\nbaz\r\n");
247311
let contents = src.read_span(&(5, 5).into(), 0, 0)?;
248312
assert_eq!("bar\r\n", std::str::from_utf8(contents.data()).unwrap());
313+
assert_eq!(SourceSpan::from((5, 5)), *contents.span());
249314
assert_eq!(1, contents.line());
250315
assert_eq!(0, contents.column());
316+
assert_eq!(1, contents.line_count());
251317
Ok(())
252318
}
253319

@@ -259,8 +325,10 @@ mod tests {
259325
"foo\nbar\nbaz\n",
260326
std::str::from_utf8(contents.data()).unwrap()
261327
);
328+
assert_eq!(SourceSpan::from((4, 12)), *contents.span());
262329
assert_eq!(1, contents.line());
263330
assert_eq!(0, contents.column());
331+
assert_eq!(3, contents.line_count());
264332
Ok(())
265333
}
266334

@@ -272,8 +340,10 @@ mod tests {
272340
"\nfoo\nbar\nbaz\n\n",
273341
std::str::from_utf8(contents.data()).unwrap()
274342
);
343+
assert_eq!(SourceSpan::from((8, 14)), *contents.span());
275344
assert_eq!(2, contents.line());
276345
assert_eq!(0, contents.column());
346+
assert_eq!(5, contents.line_count());
277347
let span: SourceSpan = (8, 14).into();
278348
assert_eq!(&span, contents.span());
279349
Ok(())
@@ -287,10 +357,49 @@ mod tests {
287357
"one\ntwo\n\n",
288358
std::str::from_utf8(contents.data()).unwrap()
289359
);
360+
assert_eq!(SourceSpan::from((0, 9)), *contents.span());
290361
assert_eq!(0, contents.line());
291362
assert_eq!(0, contents.column());
292363
let span: SourceSpan = (0, 9).into();
293364
assert_eq!(&span, contents.span());
365+
assert_eq!(3, contents.line_count());
294366
Ok(())
295367
}
368+
369+
#[test]
370+
fn empty_source() -> Result<(), MietteError> {
371+
let src = String::from("");
372+
373+
let contents = src.read_span(&(0, 0).into(), 0, 0)?;
374+
assert_eq!("", std::str::from_utf8(contents.data()).unwrap());
375+
assert_eq!(SourceSpan::from((0, 0)), *contents.span());
376+
assert_eq!(0, contents.line());
377+
assert_eq!(0, contents.column());
378+
assert_eq!(1, contents.line_count());
379+
380+
Ok(())
381+
}
382+
383+
#[test]
384+
fn empty_source_out_of_bounds() {
385+
let src = String::from("");
386+
387+
let contents = src.read_span(&(0, 1).into(), 0, 0);
388+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
389+
390+
let contents = src.read_span(&(0, 2).into(), 0, 0);
391+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
392+
393+
let contents = src.read_span(&(1, 0).into(), 0, 0);
394+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
395+
396+
let contents = src.read_span(&(1, 1).into(), 0, 0);
397+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
398+
399+
let contents = src.read_span(&(2, 0).into(), 0, 0);
400+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
401+
402+
let contents = src.read_span(&(2, 1).into(), 0, 0);
403+
assert!(matches!(contents, Err(MietteError::OutOfBounds)));
404+
}
296405
}

0 commit comments

Comments
 (0)