Here is a minimal set of dependencies for lib/markup.scm:string->sxml that, together with deps/irregex.scm, allow string->sxml to be exercised in the REPL, with the smallest number of moving parts and without needing a local instance. The append-element function is from lib/utils.scm and the rest from lib/markup.scm. The quotelink is as it was after "20 Feb, 2020 4 commits" and quotelink2 is the same with the regex from >>65.
$ cat test.scm
(define (append-element l . e)
(append l e))
(define *board* "prog")
(define *thread* "39")
(define (transform-rule name regex transform)
(define (dispatch op)
(cond ((eq? op 'name) name)
((eq? op 'regex) regex)
((eq? op 'transform) transform)))
dispatch)
(define (transform markup) (apply markup '(transform)))
(define (regex markup) (apply markup '(regex)))
(define (name markup) (apply markup '(name)))
(define (string->sxml markup s)
(define (string->sxml-rec s res)
(let ((match (irregex-search (regex markup) s)))
(cond ((string-null? s)
res)
((not match)
(append-element res s))
(else
(let* ((start (irregex-match-start-index match))
(end (irregex-match-end-index match))
(substr (irregex-match-substring match))
(s1 (substring s 0 start))
(s2 (substring s end (string-length s))))
(if (string-null? s1)
(string->sxml-rec
s2
(append-element res ((transform markup) substr)))
(if (and (eq? (name markup) 'del) ;; exception to escape spoiler inside code
(between-code? s1 s2))
(string->sxml-rec "" (append-element res (string-append s1 substr s2)))
(string->sxml-rec
s2
(append-element res s1 ((transform markup) substr))))))))))
(string->sxml-rec s '()))
;; edge false positive (between-code? "==code== ==code==" "==")
;; could add another pass of spoiler, but ok good-enough
(define (between-code? s1 s2)
(let ((m1 (irregex-search (irregex ".*==$|.*==[^ ]") s1)) ;opening code in s1
(m2 (irregex-search (irregex ".*[^ ]==") s1)) ;closing code in s1
(m3 (irregex-search (irregex "^==|.*?[^ ]==") s2)) ;closing code in s2
(imei irregex-match-end-index))
(if (and m1 m3 (or (not m2) (>= (imei m1) (imei m2))))
#t
#f)))
(define quotelink
(transform-rule
'quotelink
(irregex ">>([1-9][0-9]*|([1-9][0-9]*)-([1-9][0-9]*))(,([1-9][0-9]*|([1-9][0-9]*-[1-9][0-9]*)))*")
(lambda (sub) `(a (@ (href ,(string-append
"/" *board*
"/" *thread*
"/" (string-tail sub 2))))
,sub))))
(define quotelink2
(transform-rule
'quotelink
(irregex ">>(([1-9][0-9]{0,2})|(([1-9][0-9]{0,2})-([1-9][0-9]{0,2})))(,(([1-9][0-9]{0,2})|(([1-9][0-9]{0,2})-([1-9][0-9]{0,2})))){0,11}")
(lambda (sub) `(a (@ (href ,(string-append
"/" *board*
"/" *thread*
"/" (string-tail sub 2))))
,sub))))
The string->sxml equivalents of >>82:
$ mit-scheme --load irregex.scm --load test.scm
[...]
;Loading "irregex.scm"... done
;Loading "test.scm"... done
1 ]=> (string->sxml quotelink ">>1,3,5,111-222,300")
;Value 13: ((a (@ (href "/prog/39/1,3,5,111-222,300")) ">>1,3,5,111-222,300"))
1 ]=> (string->sxml quotelink2 ">>1,3,5,111-222,300")
;Value 14: ((a (@ (href "/prog/39/1,3,5,111")) ">>1,3,5,111") "-222,300")
1 ]=>
The outer iteration and the digit iteration are used, but the alternation is not, in violation of "leftmost, longest".