; regular expressions, part 1
(define-syntax list-match
(syntax-rules ()
((_ expr (pattern fender ... template) ...)
(let ((obj expr))
(cond ((list-match-aux obj pattern fender ...
(list template)) => car) ...
(else (error 'list-match "pattern failure")))))))
(define-syntax list-match-aux
(lambda (stx)
(define (underscore? x)
(and (identifier? x) (free-identifier=? x (syntax _))))
(syntax-case stx (quote quasiquote)
((_ obj pattern template)
(syntax (list-match-aux obj pattern #t template)))
((_ obj () fender template)
(syntax (and (null? obj) fender template)))
((_ obj underscore fender template)
(underscore? (syntax underscore))
(syntax (and fender template)))
((_ obj var fender template)
(identifier? (syntax var))
(syntax (let ((var obj)) (and fender template))))
((_ obj (quote datum) fender template)
(syntax (and (equal? obj (quote datum)) fender template)))
((_ obj (quasiquote datum) fender template)
(syntax (and (equal? obj (quasiquote datum)) fender template)))
((_ obj (kar . kdr) fender template)
(syntax (and (pair? obj)
(let ((kar-obj (car obj)) (kdr-obj (cdr obj)))
(list-match-aux kar-obj kar
(list-match-aux kdr-obj kdr fender template))))))
((_ obj const fender template)
(syntax (and (equal? obj const) fender template))))))
(define (range . args)
(case (length args)
((1) (range 0 (car args) (if (negative? (car args)) -1 1)))
((2) (range (car args) (cadr args) (if (< (car args) (cadr args)) 1 -1)))
((3) (let ((le? (if (negative? (caddr args)) >= <=)))
(let loop ((x(car args)) (xs '()))
(if (le? (cadr args) x)
(reverse xs)
(loop (+ x (caddr args)) (cons x xs))))))
(else (error 'range "unrecognized arguments"))))
(define (make-rx regexp)
(let loop ((first? #t) (rs (string->list regexp)) (zs '()))
(list-match rs
(() (reverse zs))
((#\$) (reverse (cons '(eol) zs)))
((#\^ . rest) first? (loop #f rest (cons '(bol) zs)))
((#\\ c \#* . rest)
(loop #f rest (cons (list 'clo 'lit
(case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
((#\\ c . rest)
(loop #f rest (cons (list 'lit
(case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
((#\[ #\^ . rest)
(list-match (get-class rest)
((class) (reverse (cons (cons 'ncl class) zs)))
((class rest)
(if (and (pair? rest) (char=? (car rest) #\*))
(loop #f (cdr rest) (cons (cons 'clo (cons 'ncl class)) zs))
(loop #f rest (cons (cons 'ncl class) zs))))))
((#\[ . rest)
(list-match (get-class rest)
((class) (reverse (cons (cons 'ccl class) zs)))
((class rest)
(if (and (pair? rest) (char=? (car rest) #\*))
(loop #f (cdr rest) (cons (cons 'clo (cons 'ccl class)) zs))
(loop #f rest (cons (cons 'ccl class) zs))))))
((#\. #\* . rest) (loop #f rest (cons (list 'clo 'any) zs)))
((#\. . rest) (loop #f rest (cons '(any) zs)))
((c #\* . rest) (loop #f rest (cons (list 'clo 'lit c) zs)))
((c . rest) (loop #f rest (cons (list 'lit c) zs)))
(else (error 'make-rx "unrecognized regular expression")))))
(define (get-class class)
(define (char-range a b)
(map integer->char
(range (char->integer a) (+ (char->integer b) 1))))
(let loop ((cs class) (zs '()))
(list-match cs
((#\] . rest) (pair? zs) (list zs rest))
((#\] . rest) (loop rest (cons #\] zs)))
((#\ c . rest) (loop rest (cons c zs)))
((a #\- b . rest)
(or (and (char-numeric? a) (char-numeric? b) (char<? a b))
(and (char-upper-case? a) (char-upper-case? b) (char<? a b))
(and (char-lower-case? a) (char-lower-case? b) (char<? a b)))
(loop rest (append (char-range a b) zs)))
((c . rest) (loop rest (cons c zs)))
(else (error 'get-class "unrecognized class element")))))
(display (make-rx "[0-9][0-9]*")) (newline)
(display (make-rx "^..*$")) (newline)
(display (make-rx "hello")) (newline)
(display (make-rx "^ *hello *$")) (newline)
(display (make-rx "^[^x].*[0-9] *x$")) (newline)