Skip to content

Commit fcb89d5

Browse files
authored
Fix fenceposting issue with docstring extraction (#191)
1 parent 9a40b0a commit fcb89d5

File tree

2 files changed

+70
-69
lines changed

2 files changed

+70
-69
lines changed

src/marginalia/parser.clj

+69-68
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,9 @@
355355
:else
356356
(dispatch-inner-form form raw nspace-sym)))
357357

358-
(defn extract-docstring [m raw nspace-sym]
359-
(let [raw (join "\n" (subvec raw (-> m :start dec) (:end m)))
360-
form (:form m)]
361-
(dispatch-form form raw nspace-sym)))
358+
(defn extract-docstring [{:keys [start end form]} raw-lines nspace-sym]
359+
(let [new-lines (join "\n" (subvec raw-lines (dec start) (min end (count raw-lines))))]
360+
(dispatch-form form new-lines nspace-sym)))
362361

363362
(defn- ->str [m]
364363
(-> (-> m :form .content)
@@ -371,82 +370,84 @@
371370
:start (:start f)
372371
:end (:end s)})
373372

374-
(defn comment? [o]
375-
(->> o :form (instance? Comment)))
373+
(defn comment? [{:keys [form]}]
374+
(instance? Comment form))
376375

377-
(defn code? [o]
378-
(and (->> o :form (instance? Comment) not)
379-
(->> o :form nil? not)))
376+
(defn code? [{:keys [form] :as o}]
377+
(and (not (nil? form))
378+
(not (comment? o))))
380379

381-
(defn adjacent? [f s]
382-
(= (-> f :end) (-> s :start dec)))
380+
(defn adjacent?
381+
"Two parsed objects are adjacent if the end of the first is followed by the start of the second."
382+
[{:keys [end] :as _first} {:keys [start] :as _second}]
383+
(= end (dec start)))
383384

384385
(defn arrange-in-sections [parsed-code raw-code]
385386
(loop [sections []
386-
f (first parsed-code)
387-
s (second parsed-code)
388-
nn (nnext parsed-code)
389-
nspace nil]
387+
f (first parsed-code)
388+
s (second parsed-code)
389+
nn (nnext parsed-code)
390+
nspace nil]
390391
(if f
391392
(cond
392-
;; ignore comments with only one semicolon
393-
(and (comment? f) (re-find #"^;(\s|$)" (-> f :form .content)))
394-
(recur sections s (first nn) (next nn) nspace)
395-
;; merging comments block
396-
(and (comment? f) (comment? s) (adjacent? f s))
397-
(recur sections (merge-comments f s)
398-
(first nn) (next nn)
399-
nspace)
400-
;; merging adjacent code blocks
401-
(and (code? f) (code? s) (adjacent? f s))
402-
(let [[fdoc fcode nspace] (extract-docstring f raw-code nspace)
403-
[sdoc scode _] (extract-docstring s raw-code nspace)]
404-
(recur sections (assoc s
405-
:type :code
406-
:raw (str (or (:raw f) fcode) "\n" scode)
407-
:docstring (str (or (:docstring f) fdoc) "\n\n" sdoc))
408-
(first nn) (next nn) nspace))
409-
;; adjacent comments are added as extra documentation to code block
410-
(and (comment? f) (code? s) (adjacent? f s))
411-
(let [[doc code nspace] (extract-docstring s raw-code nspace)]
412-
(recur sections (assoc s
413-
:type :code
414-
:raw (if *delete-lifted-comments*
415-
;; this is far from perfect but should work
416-
;; for most cases: erase matching comments
417-
;; and then remove lines that are blank
418-
(-> (reduce (fn [raw comment]
419-
(replace raw
420-
(str comment "\n")
421-
"\n"))
422-
code
423-
(:text f))
424-
(replace #"\n\s+\n" "\n"))
425-
code)
426-
:docstring (str doc "\n\n" (->str f)))
427-
(first nn) (next nn) nspace))
428-
;; adding comment section
429-
(comment? f)
430-
(recur (conj sections (assoc f :type :comment :raw (->str f)))
431-
s
432-
(first nn) (next nn)
433-
nspace)
434-
;; adding code section
435-
:else
436-
(let [[doc code nspace] (extract-docstring f raw-code nspace)]
437-
(recur (conj sections (if (= (:type f) :code)
438-
f
439-
{:type :code
440-
:raw code
441-
:docstring doc}))
442-
s (first nn) (next nn) nspace)))
393+
;; ignore comments with only one semicolon
394+
(and (comment? f) (re-find #"^;(\s|$)" (-> f :form .content)))
395+
(recur sections s (first nn) (next nn) nspace)
396+
;; merging comments block
397+
(and (comment? f) (comment? s) (adjacent? f s))
398+
(recur sections (merge-comments f s)
399+
(first nn) (next nn)
400+
nspace)
401+
;; merging adjacent code blocks
402+
(and (code? f) (code? s) (adjacent? f s))
403+
(let [[fdoc fcode nspace] (extract-docstring f raw-code nspace)
404+
[sdoc scode _] (extract-docstring s raw-code nspace)]
405+
(recur sections (assoc s
406+
:type :code
407+
:raw (str (or (:raw f) fcode) "\n" scode)
408+
:docstring (str (or (:docstring f) fdoc) "\n\n" sdoc))
409+
(first nn) (next nn) nspace))
410+
;; adjacent comments are added as extra documentation to code block
411+
(and (comment? f) (code? s) (adjacent? f s))
412+
(let [[doc code nspace] (extract-docstring s raw-code nspace)]
413+
(recur sections (assoc s
414+
:type :code
415+
:raw (if *delete-lifted-comments*
416+
;; this is far from perfect but should work
417+
;; for most cases: erase matching comments
418+
;; and then remove lines that are blank
419+
(-> (reduce (fn [raw comment]
420+
(replace raw
421+
(str comment "\n")
422+
"\n"))
423+
code
424+
(:text f))
425+
(replace #"\n\s+\n" "\n"))
426+
code)
427+
:docstring (str doc "\n\n" (->str f)))
428+
(first nn) (next nn) nspace))
429+
;; adding comment section
430+
(comment? f)
431+
(recur (conj sections (assoc f :type :comment :raw (->str f)))
432+
s
433+
(first nn) (next nn)
434+
nspace)
435+
;; adding code section
436+
:else
437+
(let [[doc code nspace] (extract-docstring f raw-code nspace)]
438+
(recur (conj sections (if (= (:type f) :code)
439+
f
440+
{:type :code
441+
:raw code
442+
:docstring doc}))
443+
s (first nn) (next nn) nspace)))
443444
sections)))
444445

445446
(defn parse [source-string]
446447
(let [make-reader #(java.io.BufferedReader.
447448
(java.io.StringReader. (str source-string "\n")))
448-
lines (vec (line-seq (make-reader)))
449-
reader (clojure.lang.LineNumberingPushbackReader. (make-reader))
449+
lines (vec (line-seq (make-reader)))
450+
reader (clojure.lang.LineNumberingPushbackReader. (make-reader))
450451
old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))]
451452
(try
452453
(set-comment-reader read-comment)

test/marginalia/parse_test.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
(deftest test-inline-literals
88
(is (= (count (marginalia.parser/parse "(ns test)")) 1))
9-
;; (is (= (count (marginalia.parser/parse "(ns test)\n123")) 1)) ;; still failing
9+
(is (= (count (marginalia.parser/parse "(ns test)\n123")) 1))
1010
(is (= (count (marginalia.parser/parse "(ns test)\n123\n")) 1))
1111
(is (= (count (marginalia.parser/parse "(ns test)\n\"string\"")) 1))
1212
(is (= (count (marginalia.parser/parse "(ns test)\n\"some string\"")) 1))

0 commit comments

Comments
 (0)