Skip to content

Commit 2dd3255

Browse files
committed
Refactor thematic_break parsing
- Use uncons to simplify logic - Separate helper function def from case analysis - Simplify failure logic
1 parent 04611aa commit 2dd3255

File tree

1 file changed

+23
-23
lines changed

1 file changed

+23
-23
lines changed

src/parser.ml

+23-23
Original file line numberDiff line numberDiff line change
@@ -216,23 +216,26 @@ let ( ||| ) p1 p2 s = try p1 s with Fail -> p2 s
216216
let trim_leading_ws s = Sub.drop_while is_whitespace s
217217
let trim_trailing_ws s = Sub.drop_last_while is_whitespace s
218218
let trim_ws s = trim_leading_ws s |> trim_trailing_ws
219-
220219
let is_empty s = Sub.is_empty (trim_leading_ws s)
221220

222-
let thematic_break s =
223-
match Sub.head s with
224-
| Some (('*' | '_' | '-') as c) ->
225-
let rec loop n s =
226-
match Sub.head s with
227-
| Some c1 when c = c1 -> loop (succ n) (Sub.tail s)
228-
| Some w when is_whitespace w -> loop n (Sub.tail s)
229-
| Some _ -> raise Fail
230-
| None ->
231-
if n < 3 then raise Fail;
232-
Lthematic_break
233-
in
234-
loop 1 (Sub.tail s)
235-
| Some _ | None -> raise Fail
221+
(* See https://spec.commonmark.org/0.30/#thematic-breaks *)
222+
let thematic_break =
223+
let accept symb chars =
224+
let rec loop n s =
225+
match Sub.uncons s with
226+
| Some (c, tl) when symb = c -> loop (succ n) tl
227+
(* Themtic break chars can be separated by spaces *)
228+
| Some (w, tl) when is_whitespace w -> loop n tl
229+
(* Three or more of the same thematic break chars found *)
230+
| None when n >= 3 -> Lthematic_break
231+
| _ -> raise Fail
232+
in
233+
loop 1 chars
234+
in
235+
fun s ->
236+
match Sub.uncons s with
237+
| Some ((('*' | '_' | '-') as symb), rest) -> accept symb rest
238+
| Some _ | None -> raise Fail
236239

237240
let setext_heading s =
238241
match Sub.head s with
@@ -333,7 +336,7 @@ let atx_heading s =
333336
let s, a =
334337
match Sub.last s with Some '}' -> attribute_string s | _ -> (s, [])
335338
in
336-
let s = (trim_ws s) in
339+
let s = trim_ws s in
337340
let rec loop t =
338341
match Sub.last t with
339342
| Some '#' -> loop (Sub.drop_last t)
@@ -411,7 +414,7 @@ let info_string c s =
411414
let s, a =
412415
match Sub.last s with Some '}' -> attribute_string s | _ -> (s, [])
413416
in
414-
let s = (trim_ws s) in
417+
let s = trim_ws s in
415418
let rec loop s =
416419
match Sub.head s with
417420
(* TODO use is_whitespace *)
@@ -698,7 +701,7 @@ let tag_string s =
698701
let s, a =
699702
match Sub.last s with Some '}' -> attribute_string s | _ -> (s, [])
700703
in
701-
let s = (trim_ws s) in
704+
let s = trim_ws s in
702705
let rec loop s =
703706
match Sub.head s with
704707
(* TODO use is_whitespace *)
@@ -712,8 +715,7 @@ let tag_string s =
712715
let def_list s =
713716
let s = Sub.tail s in
714717
match Sub.head s with
715-
| Some w when is_whitespace w->
716-
Ldef_list (String.trim (Sub.to_string s))
718+
| Some w when is_whitespace w -> Ldef_list (String.trim (Sub.to_string s))
717719
| _ -> raise Fail
718720

719721
let indented_code ind s =
@@ -1861,9 +1863,7 @@ let link_reference_definition st : attributes Ast.link_def =
18611863
loop false
18621864
in
18631865
let ws1 st =
1864-
match next st with
1865-
| w when is_whitespace w -> ws st
1866-
| _ -> raise Fail
1866+
match next st with w when is_whitespace w -> ws st | _ -> raise Fail
18671867
in
18681868
ignore (sp3 st);
18691869
let label = link_label false st in

0 commit comments

Comments
 (0)