; j k rowling
(define (make-matrix rows columns . value)
(do ((m (make-vector rows)) (i 0 (+ i 1)))
((= i rows) m)
(if (null? value)
(vector-set! m i (make-vector columns))
(vector-set! m i (make-vector columns (car value))))))
(define (matrix-rows x) (vector-length x))
(define (matrix-cols x) (vector-length (vector-ref x 0)))
(define (matrix-ref m i j) (vector-ref (vector-ref m i) j))
(define (matrix-set! m i j x) (vector-set! (vector-ref m i) j x))
(define-syntax for
(syntax-rules ()
((for (var first past step) body ...)
(let ((ge? (if (< first past) >= <=)))
(do ((var first (+ var step)))
((ge? var past))
body ...)))
((for (var first past) body ...)
(let* ((f first) (p past) (s (if (< first past) 1 -1)))
(for (var f p s) body ...)))
((for (var past) body ...)
(let* ((p past)) (for (var 0 p) body ...)))))
(define (for-each-port reader proc . port)
(let ((p (if (null? port) (current-input-port) (car port))))
(let loop ((item (reader p)))
(if (not (eof-object? item))
(begin (proc item) (loop (reader p)))))))
(define (make-hash . args)
; (make-hash . hash eql?) -- return a newly-allocated empty hash table;
; the hash and eql? functions are optional, but if either is provided
; both must be; defaults are a universal hash function and equal?
; a hash table h is a function that takes a message and zero or more
; arguments; the insert, delete and update messages return a new function,
; so (set! h (h 'message args)) updates hash table h as requested
; (h 'lookup key) -- retrieves from hash table h the (cons key value)
; pair with the given key, or null
; (h 'insert key value) -- inserts a (cons key value) pair in hash table
; h, overwriting any previous value associated with the key
; (h 'delete key) -- removes from hash table h the (cons key value) pair
; with the given key, if it exists
; (h 'update key proc default) -- proc is a function that takes a key and
; value as arguments and returns a new value; if the key is present in
; hash table h, update calls proc with the key and its associated value
; and stores the value returned by proc in place of the original value,;
; otherwise update inserts a new (cons key default) pair in hash table h
; (h 'enlist) -- returns the (cons key value) pairs in hash table h as a list
; (h 'size) -- returns the number of (cons key value) pairs in hash table h
(define (uhash x) ; universal hash function
(define (mod n) (modulo n 4294967296))
(cond ((boolean? x) (if x 357913941 460175067))
((symbol? x) (hash (symbol->string x)))
((char? x) (char->integer x))
((integer? x) (mod x))
((real? x)
(let* ((r (inexact->exact x))
(n (numerator r))
(d (denominator r)))
(mod (+ n (* 37 d)))))
((rational? x) (mod (+ (numerator x) (* 37 (denominator x)))))
((complex? x)
(mod (+ (hash (real-part x)) (* 37 (hash (imag-part x))))))
((null? x) 477338855)
((pair? x)
(let loop ((x x) (s 0))
(if (null? x) s
(loop (cdr x) (mod (+ (* 31 s) (hash (car x))))))))
((vector? x)
(let loop ((i (- (vector-length x) 1)) (s 0))
(if (negative? i) s
(loop (- i 1) (mod (+ (* 31 s) (hash (vector-ref x i))))))))
((string? x)
(let loop ((i (- (string-length x) 1)) (s 0))
(if (negative? i) s
(loop (- i 1) (mod (+ (* 31 s) (hash (string-ref x i))))))))
((procedure? x) (error 'hash "can't hash procedure"))
((port? x) (error 'hash "can't hash port"))
(else (error 'hash "don't know how to hash object"))))
(define (scramble h) ; ensure minimum 20 bit result from hash function
(if (< h 4096) (* h 1048573) (if (< h 1048576) (* h 4093) h)))
(define (empty) (vector (make-vector w (list)) (list) (list)))
(define (vect t) (vector-ref t 0))
(define (lkid t) (vector-ref t 1))
(define (rkid t) (vector-ref t 2))
(define (get t i) ; fetch value from bucket i of tree t
(if (<= u i) (error 'get "out of bounds")
(let loop ((t t) (q (+ (quotient i w) 1)))
(if (= q 1) (vector-ref (vect t) (modulo i w))
(loop (if (even? (modulo q w)) (lkid t) (rkid t))
(quotient q 2))))))
(define (put t i v) ; store value v in bucket i, return new t
(cond ((< u i) (error 'put "out of bounds"))
((< i u) ; replace current value
(let loop ((t t) (q (+ (quotient i w) 1)))
(cond ((= q 1) (let ((x (vect t)))
(vector-set! x (modulo i w) v)
(vector x (lkid t) (rkid t))))
((even? q) (vector (vect t)
(loop (lkid t) (quotient q 2)) (rkid t)))
(else (vector (vect t)
(lkid t) (loop (rkid t) (quotient q 2)))))))
((zero? (modulo u w)) (set! u (+ u 1)) ; add new segment
(let loop ((t t) (q (+ (quotient i w) 1)))
(cond ((= q 1) (let ((x (make-vector w (list))))
(vector-set! x 0 v) (vector x (list) (list))))
((even? q) (vector (vect t)
(loop (lkid t) (quotient q 2)) (rkid t)))
(else (vector (vect t)
(lkid t) (loop (rkid t) (quotient q 2)))))))
(else (set! u (+ u 1)) ; expand within current segment
(let loop ((t t) (q (+ (quotient i w) 1)))
(cond ((= q 1) (let ((x (vect t)))
(vector-set! x (modulo i w) v)
(vector x (lkid t) (rkid t))))
((even? q) (vector (vect t)
(loop (lkid t) (quotient q 2)) (rkid t)))
(else (vector (vect t)
(lkid t) (loop (rkid t) (quotient q 2)))))))))
(define (hirem t) ; remove last bucket from t, return new t
(if (zero? u) (error 'hirem "out of bounds"))
(set! u (- u 1))
(if (zero? (modulo u w))
(let loop ((t t) (q (+ (quotient u w) 1))) ; remove last segment
(cond ((= q 1) (list))
((even? q) (vector (vect t)
(loop (lkid t) (quotient q 2)) (rkid t)))
(else (vector (vect t)
(lkid t) (loop (rkid t) (quotient q 2))))))
(let loop ((t t) (q (+ (quotient u w) 1)))
; remove last bucket within last segment
(cond ((= q 1) (let ((x (vect t)))
(vector-set! x (modulo u w) (list))
(vector x (lkid t) (rkid t))))
((even? q) (vector (vect t)
(loop (lkid t) (quotient q 2)) (rkid t)))
(else (vector (vect t)
(lkid t) (loop (rkid t) (quotient q 2))))))))
(define (index k) ; index of bucket, whether before or after split
(let* ((h (scramble (hash k))) (h-mod-m (modulo h m)))
(if (< h-mod-m p) (modulo h (+ m m)) h-mod-m)))
(define (grow t) ; split bucket, move some keys to new bucket
(let ((old p) (new (+ p m)))
(set! p (+ p 1))
(when (= p m) (set! m (* 2 m)) (set! p 0))
(let loop ((xs (get t old)) (ys (list)) (zs (list)))
(cond ((null? xs)
(set! t (put t old ys))
(set! t (put t new zs)))
((= (index (caar xs)) new)
(loop (cdr xs) ys (cons (car xs) zs)))
(else (loop (cdr xs) (cons (car xs) ys) zs))))
t))
(define (shrink t) ; coalesce last bucket, move all keys
(set! p (- p 1))
(when (< p 0) (set! m (quotient m 2)) (set! p (- m 1)))
(set! t (put t p (append (get t p) (get t (- u 1)))))
(set! t (hirem t))
t)
(define (lookup t k) ; return key/value pair, or null
(let loop ((bs (get t (index k))))
(cond ((null? bs) (list)) ; not found
((eql? (caar bs) k) (car bs)) ; found
(else (loop (cdr bs)))))) ; keep looking
(define (enlist t) ; return all key/value pairs in a list
(do ((i 0 (+ i 1)) (xs (list) (append (get t i) xs))) ((= i u) xs)))
(define (insert t k v) ; insert new key/value pair, or replace value
(if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))
(let ((b (index k)))
(let loop ((bs (get t b)) (xs (list)))
(cond ((null? bs) ; insert new key/value pair
(set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)
((eql? (caar bs) k) ; replace existing value
(set! t (put t b (cons (cons k v) (append (cdr bs) xs)))) t)
(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking
(define (delete t k) ; delete key/value pair if key exists
(if (and (< n u) (< (/ s u) lo)) (set! t (shrink t)))
(let ((b (index k)))
(let loop ((bs (get t b)) (xs (list)))
(cond ((null? bs) xs) ; not in table, nothing to do
((eql? (caar bs) k) ; in table, delete
(set! s (- s 1)) (set! t (put t b (append (cdr bs) xs))) t)
(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking
(define (update t k p v) ; update value, or add new key/value pair
(if (and (positive? u) (< hi (/ s u))) (set! t (grow t)))
(let ((b (index k)))
(let loop ((bs (get t b)) (xs (list)))
(cond ((null? bs) ; not in table, insert
(set! s (+ s 1)) (set! t (put t b (cons (cons k v) xs))) t)
((eql? (caar bs) k) ; in table, update
(set! t (put t b (cons (cons k (p k (cdar bs)))
(append (cdr bs) xs)))) t)
(else (loop (cdr bs) (cons (car bs) xs))))))) ; keep looking
(define (new t) (lambda (message . args) (dispatch t message args)))
(define (dispatch t message args) ; perform requested function
(define (arity n)
(if (not (= (length args) n)) (error 'dispatch "incorrect arity")))
(case message ; includes synonymns for some messages
((display debug) ; for debugging
(display "u = ") (display u)
(display "; m = ") (display m)
(display "; p = ") (display p)
(display "; s = ") (display s) (newline)
(do ((i 0 (+ i 1))) ((= i u))
(display i) (display ": ")
(display (get t i)) (newline)))
((lookup fetch get) (arity 1) (apply lookup t args))
((insert store put insert! store! put!)
(arity 2) (new (apply insert t args)))
((delete remove delete! remove!)
(arity 1) (new (apply delete t args)))
((update update!)
(arity 3) (new (apply update t args)))
((size count length) (arity 0) s)
((enlist to-list) (arity 0) (enlist t))))
(define w 64) ; width of a segment of the growable array
(define u 64) ; number of buckets currently in use
(define n 64) ; minimum number of buckets in hash table
(define m 64) ; current maximum number of buckets (Larson's maxp = n * 2^l)
; initialize u, n and m to w; 64 or 256 are good values to use
(define p 0) ; pointer to next bucket to be split 0 .. m-1
(define s 0) ; number of key/value pairs currently in table
(define lo 1) ; minimum load factor (average chain length is 2)
(define hi 3) ; maximum load factor (average chain length is 2)
; (/ hi lo) must be strictly greater than 2
; set hash and eql? based on arguments or default
(define hash #f) (define eql? #f) ; placeholders
(cond ((= (length args) 2) (set! hash (car args)) (set! eql? (cadr args)))
(else (set! hash uhash) (set! eql? equal?)))
(new (empty))) ; main function
(define (read-word p) ; next maximal sequence of letters from current input
(let loop ((c (read-char p)) (cs (list)))
(cond ((eof-object? c) (if (null? cs) c (list->string (reverse cs))))
((char-alphabetic? c) (loop (read-char) (cons (char-downcase c) cs)))
((pair? cs) (list->string (reverse cs)))
(else (loop (read-char) cs)))))
(define (prep file-name) ; list of 100 most-frequent words in file-name
(define (lt? a b)
(if (= (cdr a) (cdr b))
(string<? (car a) (car b))
(< (cdr b) (cdr a))))
(let ((t (make-hash)))
(with-input-from-file file-name
(lambda ()
(for-each-port read-word
(lambda (word)
(set! t (t 'update word (lambda (k v) (+ v 1)) 1))))))
(let loop ((n 100) (ws (sort lt? (t 'enlist))) (prev 0) (zs (list)))
(cond ((null? ws) (reverse zs))
((= (cdar ws) prev)
(loop (- n 1) (cdr ws) prev
(cons (append (car zs) (list (caar ws))) (cdr zs))))
((<= n 0) (reverse zs))
(else (loop (- n 1) (cdr ws) (cdar ws) (cons (list (caar ws)) zs)))))))
(define (comp1 w1 w2) ; compare two word lists on number of swaps to make equal
(define (make-assoc ws)
(let loop ((k 1) (ws ws) (zs (list)))
(if (null? ws) zs
(loop (+ k (length (car ws))) (cdr ws)
(append (map (lambda (w) (cons w k)) (car ws)) zs)))))
(define (lookup w ws) (cond ((assoc w ws) => cdr) (else 1000)))
(let ((w1 (make-assoc w1)) (w2 (make-assoc w2)))
(let loop ((w w1) (s 0))
(if (pair? w)
(loop (cdr w) (+ s (min (abs (- (cdar w) (lookup (caar w) w2))) 100)))
(let loop ((w w2) (s s))
(if (null? w) s
(loop (cdr w) (+ s (if (= (lookup (caar w) w1) 1000) 100 0)))))))))
(define (equal xs ys) ; assume xs and ys are sorted
(let loop ((xs xs) (ys ys) (z 0))
(cond ((or (null? xs) (null? ys)) z)
((string<? (car xs) (car ys)) (loop (cdr xs) ys z))
((string<? (car ys) (car xs)) (loop xs (cdr ys) z))
(else (loop (cdr xs) (cdr ys) (+ z 1))))))
(define (comp2 w1 w2) ; compare two word lists on longest common subsequence
(let* ((x-len (length w1)) (y-len (length w2))
(x1 (+ x-len 1)) (y1 (+ y-len 1))
(xv (list->vector w1)) (yv (list->vector w2))
(m (make-matrix x1 y1)))
(for (x 0 x1)
(for (y 0 y1)
(if (or (zero? x) (zero? y))
(matrix-set! m x y 0)
(let ((e (equal (vector-ref xv (- x 1))
(vector-ref yv (- y 1)))))
(if (positive? e)
(matrix-set! m x y (+ e (matrix-ref m (- x 1) (- y 1))))
(matrix-set! m x y (max (matrix-ref m (- x 1) y)
(matrix-ref m x (- y 1)))))))))
(matrix-ref m x-len y-len)))