; regular expressions
(define-syntax list-match
(syntax-rules ()
((_ expr (pattern fender ... template) ...)
(let ((obj expr))
(cond ((list-match-aux obj pattern fender ...
(list template)) => car) ...
(else (error 'list-match "pattern failure")))))))
(define-syntax list-match-aux
(lambda (stx)
(define (underscore? x)
(and (identifier? x) (free-identifier=? x (syntax _))))
(syntax-case stx (quote quasiquote)
((_ obj pattern template)
(syntax (list-match-aux obj pattern #t template)))
((_ obj () fender template)
(syntax (and (null? obj) fender template)))
((_ obj underscore fender template)
(underscore? (syntax underscore))
(syntax (and fender template)))
((_ obj var fender template)
(identifier? (syntax var))
(syntax (let ((var obj)) (and fender template))))
((_ obj (quote datum) fender template)
(syntax (and (equal? obj (quote datum)) fender template)))
((_ obj (quasiquote datum) fender template)
(syntax (and (equal? obj (quasiquote datum)) fender template)))
((_ obj (kar . kdr) fender template)
(syntax (and (pair? obj)
(let ((kar-obj (car obj)) (kdr-obj (cdr obj)))
(list-match-aux kar-obj kar
(list-match-aux kdr-obj kdr fender template))))))
((_ obj const fender template)
(syntax (and (equal? obj const) fender template))))))
(define (range . args)
(case (length args)
((1) (range 0 (car args) (if (negative? (car args)) -1 1)))
((2) (range (car args) (cadr args) (if (< (car args) (cadr args)) 1 -1)))
((3) (let ((le? (if (negative? (caddr args)) >= <=)))
(let loop ((x(car args)) (xs '()))
(if (le? (cadr args) x)
(reverse xs)
(loop (+ x (caddr args)) (cons x xs))))))
(else (error 'range "unrecognized arguments"))))
(define-syntax assert
(syntax-rules ()
((assert expr result)
(if (not (equal? expr result))
(for-each display `(
#\newline "failed assertion:" #\newline
expr #\newline "expected: " ,result
#\newline "returned: " ,expr #\newline))))))
(define (rx-match? rx text)
(cond ((null? rx) #t)
((equal? (car rx) '(bol))
(match-here (cdr rx) (string->list text)))
(else (match rx (string->list text)))))
(define (match rx text)
(cond ((null? rx) #t)
((null? text) (match-here rx text))
(else (or (match-here rx text) (match rx (cdr text))))))
(define (match-here rx text)
(if (null? rx) #t
(case (caar rx)
((eol) (null? text))
((clo) (match-star rx text))
(else (and (pair? text)
(match-one (car rx) (car text))
(match-here (cdr rx) (cdr text)))))))
(define (match-star rx text)
(cond ((match-here (cdr rx) text) #t)
((and (pair? text) (match-one (cdar rx) (car text)))
(match-star rx (cdr text)))
(else #f)))
(define (match-one rx text)
(case (car rx)
((any) #t)
((lit) (char=? (cadr rx) text))
((ccl) (member text (cdr rx)))
((ncl) (not (member text (cdr rx))))))
(define (make-rx regexp)
(let loop ((first? #t) (rs (string->list regexp)) (zs '()))
(list-match rs
(() (reverse zs))
((#\$) (reverse (cons '(eol) zs)))
((#\^ . rest) first? (loop #f rest (cons '(bol) zs)))
((#\\ c \#* . rest)
(loop #f rest (cons (list 'clo 'lit
(case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
((#\\ c . rest)
(loop #f rest (cons (list 'lit
(case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
((#\[ #\^ . rest)
(list-match (get-class rest)
((class) (reverse (cons (cons 'ncl class) zs)))
((class rest)
(if (and (pair? rest) (char=? (car rest) #\*))
(loop #f (cdr rest) (cons (cons 'clo (cons 'ncl class)) zs))
(loop #f rest (cons (cons 'ncl class) zs))))))
((#\[ . rest)
(list-match (get-class rest)
((class) (reverse (cons (cons 'ccl class) zs)))
((class rest)
(if (and (pair? rest) (char=? (car rest) #\*))
(loop #f (cdr rest) (cons (cons 'clo (cons 'ccl class)) zs))
(loop #f rest (cons (cons 'ccl class) zs))))))
((#\. #\* . rest) (loop #f rest (cons (list 'clo 'any) zs)))
((#\. . rest) (loop #f rest (cons '(any) zs)))
((c #\* . rest) (loop #f rest (cons (list 'clo 'lit c) zs)))
((c . rest) (loop #f rest (cons (list 'lit c) zs)))
(else (error 'make-rx "unrecognized regular expression")))))
(define (get-class class)
(define (char-range a b)
(map integer->char
(range (char->integer a) (+ (char->integer b) 1))))
(let loop ((cs class) (zs '()))
(list-match cs
((#\] . rest) (pair? zs) (list zs rest))
((#\] . rest) (loop rest (cons #\] zs)))
((#\ c . rest) (loop rest (cons c zs)))
((a #\- b . rest)
(or (and (char-numeric? a) (char-numeric? b) (char<? a b))
(and (char-upper-case? a) (char-upper-case? b) (char<? a b))
(and (char-lower-case? a) (char-lower-case? b) (char<? a b)))
(loop rest (append (char-range a b) zs)))
((c . rest) (loop rest (cons c zs)))
(else (error 'get-class "unrecognized class element")))))
(define (test-rx)
(define (test pat str result)
(assert (rx-match? (make-rx pat) str) result))
(test "" "" #t) (test "" "a" #t) (test "a" "" #f)
(test "a" "a" #t) (test "a" "b" #f) (test "a" "ab" #t)
(test "a" "ba" #t) (test "a" "aa" #t) (test "ab" "a" #f)
(test "ab" "ab" #t) (test "ab" "ba" #f) (test "ab" "aab" #t)
(test "ab" "bab" #t) (test "ab" "abab" #t) (test "ab" "ac" #f)
(test "ab" "acb" #f) (test "\\n" "a\n" #t) (test "\\n" "abc" #f)
(test "\\t" "a\t" #t) (test "\\t" "abc" #f) (test "\\b" "abc" #t)
(test "\\b" "ac" #f) (test "a\\b" "ab" #t) (test "a\\b" "ba" #f)
(test "^ab" "abc" #t) (test "^ab" "aab" #f) (test "ab$" "ab" #t)
(test "ab$" "aab" #t) (test "ab$" "aba" #f) (test "^ab$" "ab" #t)
(test "^ab$" "abc" #f) (test "." "" #f) (test "." "a" #t)
(test "a." "a" #f) (test "a." "ab" #t) (test "a." "bb" #f)
(test "[abc]" "a" #t) (test "[abc]" "b" #t) (test "[abc]" "c" #t)
(test "[abc]" "d" #f) (test "[a-c]" "a" #t) (test "[a-c]" "b" #t)
(test "[a-c]" "c" #t) (test "[a-c]" "d" #f) (test "[A-C]" "A" #t)
(test "[A-C]" "B" #t) (test "[A-C]" "C" #t) (test "[A-C]" "D" #f)
(test "[1-3]" "1" #t) (test "[1-3]" "2" #t) (test "[1-3]" "3" #t)
(test "[1-3]" "4" #f) (test "[c-a]" "a" #t) (test "[c-a]" "b" #f)
(test "[-]" "-" #t) (test "[-]" "a" #f) (test "[\n]" "\n" #t)
(test "[\t]" "\t" #t) (test "[\n]" "a" #f) (test "[\t]" "a" #f)
(test "[a-cd-f]" "e" #t) (test "[a-cd-f]" "g" #f)
(test "[]]" "a" #f) (test "[]]" "[]" #t)
(test "[^abc]" "a" #f) (test "[^abc]" "b" #f) (test "[^abc]" "c" #f)
(test "[^abc]" "d" #t) (test "[^a-c]" "a" #f) (test "[^a-c]" "b" #f)
(test "[^a-c]" "c" #f) (test "[^a-c]" "d" #t) (test "[^A-C]" "A" #f)
(test "[^A-C]" "B" #f) (test "[^A-C]" "C" #f) (test "[^A-C]" "D" #t)
(test "[^1-3]" "1" #f) (test "[^1-3]" "2" #f) (test "[^1-3]" "3" #f)
(test "[^1-3]" "4" #t) (test "[^c-a]" "a" #f) (test "[^c-a]" "b" #t)
(test "[^-]" "-" #f) (test "[^-]" "a" #t) (test "[^\n]" "\n" #f)
(test "[^\t]" "\t" #f) (test "[^\n]" "a" #t) (test "[^\t]" "a" #t)
(test "[^a-cd-f]" "e" #f) (test "[^a-cd-f]" "g" #t)
(test "[^]]" "a" #t) (test "[^]]" "]" #f) (test "a*" "aaa" #t)
(test "a*" "bbb" #t) (test "bb*" "abc" #t) (test "aa*" "bc" #f)
(test "ab*c" "abbc" #t) (test "ab*c" "ac" #t) (test "ab*c" "abc" #t)
(test "a.*c" "abbc" #t) (test "a.*c" "ac" #t) (test "a.*c" "abc" #t)
(test "a[b-d]*e" "abcde" #t) (test "a[b-d]*e" "axe" #f)
(test "a*a*a" "a" #t) (test "a*a*a" "aaa" #t) (test "a*a*a" "b" #f)
)