; from the standard prelude

(define (string-downcase str)
  (list->string
    (map char-downcase
      (string->list str))))

(define-syntax assert
  (syntax-rules ()
    ((assert expr result)
      (if (not (equal? expr result))
          (for-each display `(
            #\newline "failed assertion:" #\newline
            expr #\newline "expected: " ,result
            #\newline "returned: " ,expr #\newline))))))

; porter stemming
; http://tartarus.org/~martin/PorterStemmer/index.html
; word is stored as a list of characters in reverse order

; #t if end of cs is consonant, else #f
(define (consonant? cs)
  (case (car cs)
    ((#\a #\e #\i #\o #\u) #f)
    ((#\y) (if (null? (cdr cs)) #t (vowel? (cdr cs))))
    (else #t)))

; #t if end of cs is vowel, else #f
(define (vowel? cs) (not (consonant? cs)))

; calculate number of instances of vowel followed by consonant
(define (measure cs)
  (let loop ((m 0) (cs cs) (zs '()) (prev? #t))
    (cond ((pair? cs)
            (loop m (cdr cs) (cons (if (consonant? cs) #\c #\v) zs) prev?))
          ((null? zs) m)
          ((char=? (car zs) #\v) (loop m cs (cdr zs) #f))
          ((not prev?) (loop (+ m 1) cs (cdr zs) #t))
          (else (loop m cs (cdr zs) #t)))))

; #t if any character is vowel, else #f
(define (v-stem? cs)
  (let loop ((cs cs))
    (cond ((null? cs) #f)
          ((vowel? cs) #t)
          (else (loop (cdr cs))))))

; #t if cs ends with two identical consonants, else #f
(define (dbl-cons? cs)
  (cond ((or (null? cs) (null? (cdr cs))) #f)
        ((not (char=? (car cs) (cadr cs))) #f)
        (else (consonant? cs))))

; #t if cs ends consonant-vowel-consonant (not wxy), else #f
(define (cvc? cs)
  (cond ((< (length cs) 3) #f)
        ((vowel? cs) #f)
        ((consonant? (cdr cs)) #f)
        ((vowel? (cddr cs)) #f)
        ((char=? (car cs) #\w) #f)
        ((char=? (car cs) #\x) #f)
        ((char=? (car cs) #\y) #f)
        (else #t)))

; remove str from end of cs, else #f
(define (ends? cs str)
  (let loop ((cs cs) (ss (reverse (string->list str))))
    (cond ((null? cs) #f)
          ((null? ss) cs)
          ((char=? (car cs) (car ss))
            (loop (cdr cs) (cdr ss)))
          (else #f))))

(define (q str)
  (lambda (cs)
    (if (not (v-stem? cs))
        (append (reverse (string->list str)) cs)
        (cond ((ends? cs "at") => (s "ate"))
              ((ends? cs "bl") => (s "ble"))
              ((ends? cs "iz") => (s "ize"))
              ((and (dbl-cons? cs)
                    (not (member (car cs) '(#\l #\s #\z))))
                (cdr cs))
              ((and (= (measure cs) 1) (cvc? cs))
                (cons #\e cs))
              (else cs)))))

(define (r str1 str2)
  (lambda (cs)
    (if (< 0 (measure cs))
        (append (reverse (string->list str2)) cs)
        (append (reverse (string->list str1)) cs))))

(define (s str)
  (lambda (cs)
    (append (reverse (string->list str)) cs)))

(define (t str)
  (lambda (cs)
    (if (< 1 (measure cs)) cs
      (append (reverse (string->list str)) cs))))

(define (step1a cs) ; plural
  (cond ((null? cs) cs) ((null? (cdr cs)) cs)
        ((ends? cs "sses") => (s "ss"))
        ((ends? cs "ies" ) => (s "i" ))
        ((ends? cs "ss"  ) => (s "ss"))
        ((ends? cs "s"   ) => (lambda (cs) cs))
        (else cs)))

(define (step1b cs) ; past participle
  (cond ((null? cs) cs) ((null? (cdr cs)) cs)
        ((ends? cs "eed") => (r "eed" "ee"))
        ((ends? cs "ed" ) => (q "ed"))
        ((ends? cs "ing") => (q "ing"))
        (else cs)))

(define (step1c cs) ; clean up
  (cond ((null? cs) cs) ((null? (cdr cs)) cs)
        ((ends? cs "y") =>
          (lambda (cs)
            (if (v-stem? cs)
                (cons #\i cs)
                (cons #\y cs))))
        (else cs)))

(define (step2 cs) ; double suffices
  (if (or (null? cs) (null? (cdr cs))) cs
    (case (cadr cs)
      ((#\a) (cond ((ends? cs "ational") => (r "ational" "ate" ))
                   ((ends? cs "tional" ) => (r "tional"  "tion"))
                   (else cs)))
      ((#\c) (cond ((ends? cs "enci"   ) => (r "enci"    "ence"))
                   ((ends? cs "anci"   ) => (r "anci"    "ance"))
                   (else cs)))
      ((#\e) (cond ((ends? cs "izer"   ) => (r "izer"    "ize" ))
                   (else cs)))
      ((#\g) (cond ((ends? cs "logi"   ) => (r "logi"    "log" ))
                   (else cs)))
      ((#\l) (cond ((ends? cs "bli"    ) => (r "bli"     "ble" ))
                   ((ends? cs "alli"   ) => (r "alli"    "al"  ))
                   ((ends? cs "entli"  ) => (r "entli"   "ent" ))
                   ((ends? cs "eli"    ) => (r "eli"     "e"   ))
                   ((ends? cs "ousli"  ) => (r "ousli"   "ous" ))
                   (else cs)))
      ((#\o) (cond ((ends? cs "ization") => (r "ization" "ize" ))
                   ((ends? cs "ation"  ) => (r "ation"   "ate" ))
                   ((ends? cs "ator"   ) => (r "ator"    "ate" ))
                   (else cs)))
      ((#\s) (cond ((ends? cs "alism"  ) => (r "alism"   "al"  ))
                   ((ends? cs "iveness") => (r "iveness" "ive" ))
                   ((ends? cs "fulness") => (r "fulness" "ful" ))
                   ((ends? cs "ousness") => (r "ousness" "ous" ))
                   (else cs)))
      ((#\t) (cond ((ends? cs "aliti"  ) => (r "aliti"   "al"  ))
                   ((ends? cs "iviti"  ) => (r "iviti"   "ive" ))
                   ((ends? cs "biliti" ) => (r "biliti"  "ble" ))
                   (else cs)))
      (else cs))))

(define (step3 cs) ; -ic, -full, -ness, and others
  (if (or (null? cs) (null? (cdr cs))) cs
    (case (car cs)
      ((#\e) (cond ((ends? cs "icate"  ) => (r "icate"   "ic"  ))
                   ((ends? cs "ative"  ) => (r "ative"   ""    ))
                   ((ends? cs "alize"  ) => (r "alize"   "al"  ))
                   (else cs)))
      ((#\i) (cond ((ends? cs "iciti"  ) => (r "iciti"   "ic"  ))
                   (else cs)))
      ((#\l) (cond ((ends? cs "ical"   ) => (r "ical"    "ic"  ))
                   ((ends? cs "ful"    ) => (r "ful"     ""    ))
                   (else cs)))
      ((#\s) (cond ((ends? cs "ness"   ) => (r "ness"    ""    ))
                   (else cs)))
      (else cs))))

(define (step4 cs) ; -ant, -ence and others with measure > 1
  (if (or (null? cs) (null? (cdr cs))) cs
    (case (cadr cs)
      ((#\a) (cond ((ends? cs "al"     ) => (t "al"   )) (else cs)))
      ((#\c) (cond ((ends? cs "ance"   ) => (t "ance" ))
                   ((ends? cs "ence"   ) => (t "ence" )) (else cs)))
      ((#\e) (cond ((ends? cs "er"     ) => (t "er"   )) (else cs)))
      ((#\i) (cond ((ends? cs "ic"     ) => (t "ic"   )) (else cs)))
      ((#\l) (cond ((ends? cs "able"   ) => (t "able" ))
                   ((ends? cs "ible"   ) => (t "ible" )) (else cs)))
      ((#\n) (cond ((ends? cs "ant"    ) => (t "ant"  ))
                   ((ends? cs "ement"  ) => (t "ement"))
                   ((ends? cs "ment"   ) => (t "ment" ))
                   ((ends? cs "ent"    ) => (t "ent"  )) (else cs)))
      ((#\o) (cond ((ends? cs "ion"    ) => (lambda (cs)
                                              (if (and (or (char=? (car cs) #\s)
                                                           (char=? (car cs) #\t))
                                                       (< 1 (measure cs)))
                                                  cs
                                                  (append (reverse (string->list "ion")) cs))))
                   ((ends? cs "ou"     ) => (t "ou"   )) (else cs)))
      ((#\s) (cond ((ends? cs "ism"    ) => (t "ism"  )) (else cs)))
      ((#\t) (cond ((ends? cs "ate"    ) => (t "ate"  ))
                   ((ends? cs "iti"    ) => (t "iti"  )) (else cs)))
      ((#\u) (cond ((ends? cs "ous"    ) => (t "ous"  )) (else cs)))
      ((#\v) (cond ((ends? cs "ive"    ) => (t "ive"  )) (else cs)))
      ((#\z) (cond ((ends? cs "ize"    ) => (t "ize"  )) (else cs)))
      (else cs))))

(define (step5a cs) ; words ending in #\e
  (if (not (char=? (car cs) #\e)) cs
    (let ((m (measure (cdr cs))))
      (if (< 1 m) (cdr cs)
        (if (and (= m 1) (not (cvc? (cdr cs)))) (cdr cs) cs)))))

(define (step5b cs) ; words ending in double #\l
  (if (and (< 1 (measure cs)) (char=? (car cs) #\l) (char=? (cadr cs) #\l))
      (cdr cs) cs))

(define (stem word)
  (if (< (string-length word) 3) (string-downcase word)
    (list->string (reverse
      (step5b (step5a (step4 (step3 (step2 (step1c (step1b (step1a
        (reverse (string->list (string-downcase word)))))))))))))))

(define (test-stem)
  (define (cs str) (reverse (string->list str)))
  (assert (step1a (cs "caresses"       )) (cs "caress"     ))
  (assert (step1a (cs "ponies"         )) (cs "poni"       ))
  (assert (step1a (cs "ties"           )) (cs "ti"         ))
  (assert (step1a (cs "caress"         )) (cs "caress"     ))
  (assert (step1a (cs "cats"           )) (cs "cat"        ))
  (assert (step1b (cs "feed"           )) (cs "feed"       ))
  (assert (step1b (cs "agreed"         )) (cs "agree"      ))
  (assert (step1b (cs "plastered"      )) (cs "plaster"    ))
  (assert (step1b (cs "bled"           )) (cs "bled"       ))
  (assert (step1b (cs "motoring"       )) (cs "motor"      ))
  (assert (step1b (cs "sing"           )) (cs "sing"       ))
  (assert (step1b (cs "conflated"      )) (cs "conflate"   ))
  (assert (step1b (cs "troubled"       )) (cs "trouble"    ))
  (assert (step1b (cs "sized"          )) (cs "size"       ))
  (assert (step1b (cs "hopping"        )) (cs "hop"        ))
  (assert (step1b (cs "tanned"         )) (cs "tan"        ))
  (assert (step1b (cs "falling"        )) (cs "fall"       ))
  (assert (step1b (cs "hissing"        )) (cs "hiss"       ))
  (assert (step1b (cs "fizzed"         )) (cs "fizz"       ))
  (assert (step1b (cs "failing"        )) (cs "fail"       ))
  (assert (step1b (cs "filing"         )) (cs "file"       ))
  (assert (step1c (cs "happy"          )) (cs "happi"      ))
  (assert (step1c (cs "sky"            )) (cs "sky"        ))
  (assert (step2  (cs "relational"     )) (cs "relate"     ))
  (assert (step2  (cs "conditional"    )) (cs "condition"  ))
  (assert (step2  (cs "rational"       )) (cs "rational"   ))
  (assert (step2  (cs "valenci"        )) (cs "valence"    ))
  (assert (step2  (cs "hesitanci"      )) (cs "hesitance"  ))
  (assert (step2  (cs "digitizer"      )) (cs "digitize"   ))
  (assert (step2  (cs "conformabli"    )) (cs "conformable"))
  (assert (step2  (cs "radicalli"      )) (cs "radical"    ))
  (assert (step2  (cs "differentli"    )) (cs "different"  ))
  (assert (step2  (cs "vileli"         )) (cs "vile"       ))
  (assert (step2  (cs "analogousli"    )) (cs "analogous"  ))
  (assert (step2  (cs "vietnamization" )) (cs "vietnamize" ))
  (assert (step2  (cs "predication"    )) (cs "predicate"  ))
  (assert (step2  (cs "operator"       )) (cs "operate"    ))
  (assert (step2  (cs "feudalism"      )) (cs "feudal"     ))
  (assert (step2  (cs "decisiveness"   )) (cs "decisive"   ))
  (assert (step2  (cs "hopefulness"    )) (cs "hopeful"    ))
  (assert (step2  (cs "callousness"    )) (cs "callous"    ))
  (assert (step2  (cs "formaliti"      )) (cs "formal"     ))
  (assert (step2  (cs "sensitiviti"    )) (cs "sensitive"  ))
  (assert (step2  (cs "sensibiliti"    )) (cs "sensible"   ))
  (assert (step3  (cs "triplicate"     )) (cs "triplic"    ))
  (assert (step3  (cs "formative"      )) (cs "form"       ))
  (assert (step3  (cs "formalize"      )) (cs "formal"     ))
  (assert (step3  (cs "electriciti"    )) (cs "electric"   ))
  (assert (step3  (cs "electrical"     )) (cs "electric"   ))
  (assert (step3  (cs "hopeful"        )) (cs "hope"       ))
  (assert (step3  (cs "goodness"       )) (cs "good"       ))
  (assert (step4  (cs "revival"        )) (cs "reviv"      ))
  (assert (step4  (cs "allowance"      )) (cs "allow"      ))
  (assert (step4  (cs "inference"      )) (cs "infer"      ))
  (assert (step4  (cs "airliner"       )) (cs "airlin"     ))
  (assert (step4  (cs "gyroscopic"     )) (cs "gyroscop"   ))
  (assert (step4  (cs "adjustable"     )) (cs "adjust"     ))
  (assert (step4  (cs "defensible"     )) (cs "defens"     ))
  (assert (step4  (cs "irritant"       )) (cs "irrit"      ))
  (assert (step4  (cs "replacement"    )) (cs "replac"     ))
  (assert (step4  (cs "adjustment"     )) (cs "adjust"     ))
  (assert (step4  (cs "dependent"      )) (cs "depend"     ))
  (assert (step4  (cs "adoption"       )) (cs "adopt"      ))
  (assert (step4  (cs "homologou"      )) (cs "homolog"    ))
  (assert (step4  (cs "communism"      )) (cs "commun"     ))
  (assert (step4  (cs "activate"       )) (cs "activ"      ))
  (assert (step4  (cs "angulariti"     )) (cs "angular"    ))
  (assert (step4  (cs "homologous"     )) (cs "homolog"    ))
  (assert (step4  (cs "effective"      )) (cs "effect"     ))
  (assert (step4  (cs "bowdlerize"     )) (cs "bowdler"    ))
  (assert (step5a (cs "probate"        )) (cs "probat"     ))
  (assert (step5a (cs "rate"           )) (cs "rate"       ))
  (assert (step5a (cs "cease"          )) (cs "ceas"       ))
  (assert (step5b (cs "controll"       )) (cs "control"    ))
  (assert (step5b (cs "roll"           )) (cs "roll"       ))
  (assert (stem       "relate"          )     "relat"       )
  (assert (stem       "probate"         )     "probat"      )
  (assert (stem       "conflate"        )     "conflat"     )
  (assert (stem       "pirate"          )     "pirat"       )
  (assert (stem       "prelate"         )     "prelat"      )
  (assert (stem       "derivate"        )     "deriv"       )
  (assert (stem       "activate"        )     "activ"       )
  (assert (stem       "demonstrate"     )     "demonstr"    )
  (assert (stem       "necessitate"     )     "necessit"    )
  (assert (stem       "renovate"        )     "renov"       )
  (assert (stem       "archprelate"     )     "archprel"    )
  (assert (stem       "generalizations" )     "gener"       )
  (assert (stem       "oscillators"     )     "oscil"       )
)

(test-stem) ; no news is good news
