;;; -*- Mode: LISP; Syntax: Common-lisp; Base: 10; Package: DICT -*-

; Must discard alternative pronunciations (after , )
; discard optional chars (inside parens)
; parse overstrikes, and convert to Klattese.

; There is not a clear mapping to Klattese because Webster phonetic alphabet
; includes some non-English phonemes.

(defun make-extensible-string (&key (initial-length 64))
	(make-array initial-length :element-type 'character :adjustable T :fill-pointer 0))

(defun parse-webster-pron-alist (list)
  (sort
    (mapcar
      (lambda (pair)
	(let ((source (butlast pair))
	      (klattese (first (last pair))))
	  (check-type klattese (or character null) "a character")
	  (check-type source list "a list")
	  (cons
	    (let ((string (make-extensible-string :initial-length 6)))
	      (dolist (ele source)
		(cond
		  ((characterp ele)
		   (vector-push-extend ele string))
		  ((and (stringp ele)
			(= (length ele) 1))
		   (vector-push-extend (elt ele 0) string))
		  ((and (stringp ele)
			(= (length ele) 2))
		   (vector-push-extend (elt ele 0) string)
		   (vector-push-extend #\| string)
		   (vector-push-extend #\back-space string)
		   (vector-push-extend #\B string)
		   (vector-push-extend (elt ele 1) string))
		  ((and (listp ele)
			(= (length ele) 2)
			(every 'characterp ele))
		   (vector-push-extend (elt ele 0) string)
		   (vector-push-extend #\back-space string)
		   (vector-push-extend (elt ele 1) string))
		  (t
		   (error "Invalid Webster Pron: ~A in ~A."
			  ele pair))))
	      string)
	    klattese)))
      list)
    #'> :key (lambda (pair) (length (car pair)))
    ))

(defvar webster-pron-to-klattese-alist
	(parse-webster-pron-alist
	  '(
	    (#\' #\')				;primary stress
	    (#\. #\`)				;secondary
	    ("n_" #\G)				;the velar nasal
	    (#\* #\x)				;schwa
	    ("o-" "e-" #\e)			;non English
	    ("u-" "e-" #\i)			;non English
	    (#\- NIL)				;syllable separator
	    ((#\( #\A) #\* (#\) #\A) #\l #\L)	;syllabic L
	    ((#\( #\A) #\* (#\) #\A) #\n #\N)	;syllabic N
	    ((#\( #\A) #\* (#\) #\A) #\m #\M)	;syllabic M
	    ((#\( #\A) #\n (#\) #\A) #\N)	;French Nasalization.
	    (#\a #\@)				;map
	    ("a-" #\e)				;day
	    ("a:" #\c)				;bother, cot
	    ("a." #\@)				;(?) aunt, ask
	    ("a" "u." #\W)			;now, out
	    (#\b #\b)
	    ("c" "h" #\C)
	    (#\d #\d)
	    (#\e #\E)				;set
	    ("e-" #\i)
	    (#\f #\f)
	    (#\g #\g)
	    (#\h #\h)
	    (#\h #\w #\w)			; maybe wrong...voiced w
	    (#\i #\I)				;tip
	    ("i-" #\A)				;in our Klattese.. Someplaces may be Y
	    (#\j #\J)
	    (#\k #\k)
	    ("k_" #\k)
	    (#\l #\l)
	    (#\m #\m)
	    (#\n #\n)
						; superscript N ignored
	    ("o-" #\o)
	    ("o." #\c)				;???
						; oe ignored
						; oe overbar ignored
	    ("o." "i" #\O)
	    (#\p #\p)
	    (#\r #\r)
	    (#\s #\s)
	    (#\s #\h #\S)
	    (#\t #\t)
	    (#\t #\h #\T)
	    ("t_" "h_" #\D)
	    ("u:" #\u)
	    ("u." #\U)
						; ue ignored
						; ue overbar ignored
	    (#\v #\v)
	    (#\w #\w)
	    (#\y #\y)
	    (#\z #\z)
	    (#\z #\h #\Z)
	    (#\( #\()
	    (#\) #\))
	    ))
  "alist from string in Webster to klattese character")

(defun remove-optional-pronunciation-characters (string)
  "if there are parens in the string remove them and anything else in between"
  (if (not (position #\( string :test #'char=))
      string					;no problem
      (do* ((out "")
	    (paren-end 0 (+ (position #\) string :test #'char= :start paren-start) 1))
	    (paren-start (position #\( string :test #'char= :start paren-end)
			 (position #\( string :test #'char= :start paren-end))
	    )
	   (nil)
	(setf out (concatenate 'string out (subseq string paren-end paren-start)))
	(if (null paren-start) (return out)))))

(defun webster-pron-to-klattese* (string)
    (do* ((out (make-extensible-string :initial-length 16))
	  (i 0)
	  match)
	 ((= i (length string)) out)
      (setf match  (find nil webster-pron-to-klattese-alist
			 :test (lambda (ignore x) (string-equal x string :start2 i
								:end2 (+ i (length x))))
			 :key #'car))
      (if (null match)
	  (error "Couldn't translate Webster pronunciation because of the character ~C in ~A"
		 (elt string i) string)
	  (incf i (length (car match)))
	  (when (cdr match)
	    (vector-push-extend (cdr match) out)))))

(defun strip-secondary-pronunciation (string)
  (if (search ", " string)
	    (subseq string 0 (search ", " string))
	    string))

(defun webster-pron-to-klattese (webster-pron)
  "Translate a Webster pronunciation to Klattese"
  (remove-optional-pronunciation-characters
    (webster-pron-to-klattese*
      (strip-secondary-pronunciation webster-pron))))


#|| ;Testing

(defun www (word)
  "Get Klattese pronunciation of word"
  (let ((info (get-webster-info word)))
    (when info
      (let* ((pron  (webster-entry-pronunciation info))
	     (klat (webster-pron-to-klattese pron)))
	(format t "~&~A -> ~A~%" pron klat)
	(dectalk:kspeak (zl:string klat))
	klat))))


||#