[ create a new paste ] login | about

Project: programmingpraxis
Link: http://programmingpraxis.codepad.org/A4XmTLTu    [ raw code | output | fork ]

programmingpraxis - Scheme, pasted on Sep 6:
; regular expressions, part 1

(define-syntax list-match
  (syntax-rules ()
    ((_ expr (pattern fender ... template) ...)
      (let ((obj expr))
        (cond ((list-match-aux obj pattern fender ...
                (list template)) => car) ...
              (else (error 'list-match "pattern failure")))))))

(define-syntax list-match-aux
  (lambda (stx)
    (define (underscore? x)
      (and (identifier? x) (free-identifier=? x (syntax _))))
    (syntax-case stx (quote quasiquote)
      ((_ obj pattern template)
        (syntax (list-match-aux obj pattern #t template)))
      ((_ obj () fender template)
        (syntax (and (null? obj) fender template)))
      ((_ obj underscore fender template)
        (underscore? (syntax underscore))
        (syntax (and fender template)))
      ((_ obj var fender template)
        (identifier? (syntax var))
        (syntax (let ((var obj)) (and fender template))))
      ((_ obj (quote datum) fender template)
        (syntax (and (equal? obj (quote datum)) fender template)))
      ((_ obj (quasiquote datum) fender template)
        (syntax (and (equal? obj (quasiquote datum)) fender template)))
      ((_ obj (kar . kdr) fender template)
        (syntax (and (pair? obj)
                (let ((kar-obj (car obj)) (kdr-obj (cdr obj)))
                  (list-match-aux kar-obj kar
                        (list-match-aux kdr-obj kdr fender template))))))
      ((_ obj const fender template)
        (syntax (and (equal? obj const) fender template))))))

(define (range . args)
  (case (length args)
    ((1) (range 0 (car args) (if (negative? (car args)) -1 1)))
    ((2) (range (car args) (cadr args) (if (< (car args) (cadr args)) 1 -1)))
    ((3) (let ((le? (if (negative? (caddr args)) >= <=)))
           (let loop ((x(car args)) (xs '()))
             (if (le? (cadr args) x)
                 (reverse xs)
                 (loop (+ x (caddr args)) (cons x xs))))))
    (else (error 'range "unrecognized arguments"))))

(define (make-rx regexp)
  (let loop ((first? #t) (rs (string->list regexp)) (zs '()))
    (list-match rs
      (() (reverse zs))
      ((#\$) (reverse (cons '(eol) zs)))
      ((#\^ . rest) first? (loop #f rest (cons '(bol) zs)))
      ((#\\ c \#* . rest)
        (loop #f rest (cons (list 'clo 'lit
          (case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
      ((#\\ c . rest)
        (loop #f rest (cons (list 'lit
          (case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
      ((#\[ #\^ . rest)
        (list-match (get-class rest)
          ((class) (reverse (cons (cons 'ncl class) zs)))
          ((class rest)
            (if (and (pair? rest) (char=? (car rest) #\*))
                (loop #f (cdr rest) (cons (cons 'clo (cons 'ncl class)) zs))
                (loop #f rest (cons (cons 'ncl class) zs))))))
      ((#\[ . rest)
        (list-match (get-class rest)
          ((class) (reverse (cons (cons 'ccl class) zs)))
          ((class rest)
            (if (and (pair? rest) (char=? (car rest) #\*))
                (loop #f (cdr rest) (cons (cons 'clo (cons 'ccl class)) zs))
                (loop #f rest (cons (cons 'ccl class) zs))))))
      ((#\. #\* . rest) (loop #f rest (cons (list 'clo 'any) zs)))
      ((#\. . rest) (loop #f rest (cons '(any) zs)))
      ((c #\* . rest) (loop #f rest (cons (list 'clo 'lit c) zs)))
      ((c . rest) (loop #f rest (cons (list 'lit c) zs)))
      (else (error 'make-rx "unrecognized regular expression")))))

(define (get-class class)
  (define (char-range a b)
    (map integer->char
      (range (char->integer a) (+ (char->integer b) 1))))
  (let loop ((cs class) (zs '()))
    (list-match cs
      ((#\] . rest) (pair? zs) (list zs rest))
      ((#\] . rest) (loop rest (cons #\] zs)))
      ((#\ c . rest) (loop rest (cons c zs)))
      ((a #\- b . rest)
        (or (and (char-numeric? a) (char-numeric? b) (char<? a b))
            (and (char-upper-case? a) (char-upper-case? b) (char<? a b))
            (and (char-lower-case? a) (char-lower-case? b) (char<? a b)))
        (loop rest (append (char-range a b) zs)))
      ((c . rest) (loop rest (cons c zs)))
      (else (error 'get-class "unrecognized class element")))))

(display (make-rx "[0-9][0-9]*")) (newline)
(display (make-rx "^..*$")) (newline)
(display (make-rx "hello")) (newline)
(display (make-rx "^ *hello *$")) (newline)
(display (make-rx "^[^x].*[0-9] *x$")) (newline)


Output:
1
2
3
4
5
((ccl 0 1 2 3 4 5 6 7 8 9) (clo ccl 0 1 2 3 4 5 6 7 8 9))
((bol) (any) (clo any) (eol))
((lit h) (lit e) (lit l) (lit l) (lit o))
((bol) (clo lit  ) (lit h) (lit e) (lit l) (lit l) (lit o) (clo lit  ) (eol))
((bol) (ncl x) (clo any) (ccl 0 1 2 3 4 5 6 7 8 9) (clo lit  ) (lit x) (eol))


Create a new paste based on this one


Comments: