[ create a new paste ] login | about

Project: programmingpraxis
Link: http://programmingpraxis.codepad.org/sSob4fZM    [ raw code | fork ]

programmingpraxis - Scheme, pasted on Sep 6:
; regular expressions

(define-syntax list-match
  (syntax-rules ()
    ((_ expr (pattern fender ... template) ...)
      (let ((obj expr))
        (cond ((list-match-aux obj pattern fender ...
                (list template)) => car) ...
              (else (error 'list-match "pattern failure")))))))

(define-syntax list-match-aux
  (lambda (stx)
    (define (underscore? x)
      (and (identifier? x) (free-identifier=? x (syntax _))))
    (syntax-case stx (quote quasiquote)
      ((_ obj pattern template)
        (syntax (list-match-aux obj pattern #t template)))
      ((_ obj () fender template)
        (syntax (and (null? obj) fender template)))
      ((_ obj underscore fender template)
        (underscore? (syntax underscore))
        (syntax (and fender template)))
      ((_ obj var fender template)
        (identifier? (syntax var))
        (syntax (let ((var obj)) (and fender template))))
      ((_ obj (quote datum) fender template)
        (syntax (and (equal? obj (quote datum)) fender template)))
      ((_ obj (quasiquote datum) fender template)
        (syntax (and (equal? obj (quasiquote datum)) fender template)))
      ((_ obj (kar . kdr) fender template)
        (syntax (and (pair? obj)
                (let ((kar-obj (car obj)) (kdr-obj (cdr obj)))
                  (list-match-aux kar-obj kar
                        (list-match-aux kdr-obj kdr fender template))))))
      ((_ obj const fender template)
        (syntax (and (equal? obj const) fender template))))))

(define (range . args)
  (case (length args)
    ((1) (range 0 (car args) (if (negative? (car args)) -1 1)))
    ((2) (range (car args) (cadr args) (if (< (car args) (cadr args)) 1 -1)))
    ((3) (let ((le? (if (negative? (caddr args)) >= <=)))
           (let loop ((x(car args)) (xs '()))
             (if (le? (cadr args) x)
                 (reverse xs)
                 (loop (+ x (caddr args)) (cons x xs))))))
    (else (error 'range "unrecognized arguments"))))

(define-syntax assert
  (syntax-rules ()
    ((assert expr result)
      (if (not (equal? expr result))
          (for-each display `(
            #\newline "failed assertion:" #\newline
            expr #\newline "expected: " ,result
            #\newline "returned: " ,expr #\newline))))))

(define (rx-match? rx text)
  (cond ((null? rx) #t)
        ((equal? (car rx) '(bol))
          (match-here (cdr rx) (string->list text)))
        (else (match rx (string->list text)))))

(define (match rx text)
  (cond ((null? rx) #t)
        ((null? text) (match-here rx text))
        (else (or (match-here rx text) (match rx (cdr text))))))

(define (match-here rx text)
  (if (null? rx) #t
    (case (caar rx)
      ((eol) (null? text))
      ((clo) (match-star rx text))
      (else (and (pair? text)
                 (match-one (car rx) (car text))
                 (match-here (cdr rx) (cdr text)))))))

(define (match-star rx text)
  (cond ((match-here (cdr rx) text) #t)
        ((and (pair? text) (match-one (cdar rx) (car text)))
          (match-star rx (cdr text)))
        (else #f)))

(define (match-one rx text)
  (case (car rx)
    ((any) #t)
    ((lit) (char=? (cadr rx) text))
    ((ccl) (member text (cdr rx)))
    ((ncl) (not (member text (cdr rx))))))

(define (make-rx regexp)
  (let loop ((first? #t) (rs (string->list regexp)) (zs '()))
    (list-match rs
      (() (reverse zs))
      ((#\$) (reverse (cons '(eol) zs)))
      ((#\^ . rest) first? (loop #f rest (cons '(bol) zs)))
      ((#\\ c \#* . rest)
        (loop #f rest (cons (list 'clo 'lit
          (case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
      ((#\\ c . rest)
        (loop #f rest (cons (list 'lit
          (case c ((#\n) #\newline) ((#\t) #\tab) (else c))) zs)))
      ((#\[ #\^ . rest)
        (list-match (get-class rest)
          ((class) (reverse (cons (cons 'ncl class) zs)))
          ((class rest)
            (if (and (pair? rest) (char=? (car rest) #\*))
                (loop #f (cdr rest) (cons (cons 'clo (cons 'ncl class)) zs))
                (loop #f rest (cons (cons 'ncl class) zs))))))
      ((#\[ . rest)
        (list-match (get-class rest)
          ((class) (reverse (cons (cons 'ccl class) zs)))
          ((class rest)
            (if (and (pair? rest) (char=? (car rest) #\*))
                (loop #f (cdr rest) (cons (cons 'clo (cons 'ccl class)) zs))
                (loop #f rest (cons (cons 'ccl class) zs))))))
      ((#\. #\* . rest) (loop #f rest (cons (list 'clo 'any) zs)))
      ((#\. . rest) (loop #f rest (cons '(any) zs)))
      ((c #\* . rest) (loop #f rest (cons (list 'clo 'lit c) zs)))
      ((c . rest) (loop #f rest (cons (list 'lit c) zs)))
      (else (error 'make-rx "unrecognized regular expression")))))

(define (get-class class)
  (define (char-range a b)
    (map integer->char
      (range (char->integer a) (+ (char->integer b) 1))))
  (let loop ((cs class) (zs '()))
    (list-match cs
      ((#\] . rest) (pair? zs) (list zs rest))
      ((#\] . rest) (loop rest (cons #\] zs)))
      ((#\ c . rest) (loop rest (cons c zs)))
      ((a #\- b . rest)
        (or (and (char-numeric? a) (char-numeric? b) (char<? a b))
            (and (char-upper-case? a) (char-upper-case? b) (char<? a b))
            (and (char-lower-case? a) (char-lower-case? b) (char<? a b)))
        (loop rest (append (char-range a b) zs)))
      ((c . rest) (loop rest (cons c zs)))
      (else (error 'get-class "unrecognized class element")))))

(define (test-rx)
  (define (test pat str result)
    (assert (rx-match? (make-rx pat) str) result))
  (test "" "" #t) (test "" "a" #t) (test "a" "" #f)
  (test "a" "a" #t) (test "a" "b" #f) (test "a" "ab" #t)
  (test "a" "ba" #t) (test "a" "aa" #t) (test "ab" "a" #f)
  (test "ab" "ab" #t) (test "ab" "ba" #f) (test "ab" "aab" #t)
  (test "ab" "bab" #t) (test "ab" "abab" #t) (test "ab" "ac" #f)
  (test "ab" "acb" #f) (test "\\n" "a\n" #t) (test "\\n" "abc" #f)
  (test "\\t" "a\t" #t) (test "\\t" "abc" #f) (test "\\b" "abc" #t)
  (test "\\b" "ac" #f) (test "a\\b" "ab" #t) (test "a\\b" "ba" #f)
  (test "^ab" "abc" #t) (test "^ab" "aab" #f) (test "ab$" "ab" #t)
  (test "ab$" "aab" #t) (test "ab$" "aba" #f) (test "^ab$" "ab" #t)
  (test "^ab$" "abc" #f) (test "." "" #f) (test "." "a" #t)
  (test "a." "a" #f) (test "a." "ab" #t) (test "a." "bb" #f)
  (test "[abc]" "a" #t) (test "[abc]" "b" #t) (test "[abc]" "c" #t)
  (test "[abc]" "d" #f) (test "[a-c]" "a" #t) (test "[a-c]" "b" #t)
  (test "[a-c]" "c" #t) (test "[a-c]" "d" #f) (test "[A-C]" "A" #t)
  (test "[A-C]" "B" #t) (test "[A-C]" "C" #t) (test "[A-C]" "D" #f)
  (test "[1-3]" "1" #t) (test "[1-3]" "2" #t) (test "[1-3]" "3" #t)
  (test "[1-3]" "4" #f) (test "[c-a]" "a" #t) (test "[c-a]" "b" #f)
  (test "[-]" "-" #t) (test "[-]" "a" #f) (test "[\n]" "\n" #t)
  (test "[\t]" "\t" #t) (test "[\n]" "a" #f) (test "[\t]" "a" #f)
  (test "[a-cd-f]" "e" #t) (test "[a-cd-f]" "g" #f)
  (test "[]]" "a" #f) (test "[]]" "[]" #t)
  (test "[^abc]" "a" #f) (test "[^abc]" "b" #f) (test "[^abc]" "c" #f)
  (test "[^abc]" "d" #t) (test "[^a-c]" "a" #f) (test "[^a-c]" "b" #f)
  (test "[^a-c]" "c" #f) (test "[^a-c]" "d" #t) (test "[^A-C]" "A" #f)
  (test "[^A-C]" "B" #f) (test "[^A-C]" "C" #f) (test "[^A-C]" "D" #t)
  (test "[^1-3]" "1" #f) (test "[^1-3]" "2" #f) (test "[^1-3]" "3" #f)
  (test "[^1-3]" "4" #t) (test "[^c-a]" "a" #f) (test "[^c-a]" "b" #t)
  (test "[^-]" "-" #f) (test "[^-]" "a" #t) (test "[^\n]" "\n" #f)
  (test "[^\t]" "\t" #f) (test "[^\n]" "a" #t) (test "[^\t]" "a" #t)
  (test "[^a-cd-f]" "e" #f) (test "[^a-cd-f]" "g" #t)
  (test "[^]]" "a" #t) (test "[^]]" "]" #f) (test "a*" "aaa" #t)
  (test "a*" "bbb" #t) (test "bb*" "abc" #t) (test "aa*" "bc" #f)
  (test "ab*c" "abbc" #t) (test "ab*c" "ac" #t) (test "ab*c" "abc" #t)
  (test "a.*c" "abbc" #t) (test "a.*c" "ac" #t) (test "a.*c" "abc" #t)
  (test "a[b-d]*e" "abcde" #t) (test "a[b-d]*e" "axe" #f)
  (test "a*a*a" "a" #t) (test "a*a*a" "aaa" #t) (test "a*a*a" "b" #f)
)


Output:
No errors or program output.


Create a new paste based on this one


Comments: