;;; -*- Mode: LISP; Syntax: Common-Lisp; Base: 10; Package: DICT -*-

; Fix the pronunciations according to the changes in ALL.BADPRON

(defvar corrections-pathname (pathname "DICTIONARY:INFO;ALL.BADPRON"))

(defun fix-prons ()
  (with-open-file (stream corrections-pathname)
    (fix-pron-loop stream)))

(defun fix-pron-loop (stream)
  (do ((name (read stream NIL NIL) (read stream NIL NIL)))
      ((null name))
    (read-line stream)			;Eat blank line
    (format t "~&~A" name)
    (let ((fixes  (collect-group-pron-corrections stream)))
      (cond
	((null fixes)
	 (format t "  No corrections.~%")
	 )
	(t
	 (format t "  ~D correction~:P" (length fixes))
	 (rewrite-pron-file (pathname-for-word (string name)) fixes))))))

; ( WORD . SENSE) . FIX
(defun collect-group-pron-corrections (stream)
  "Return list of corrections for one file"
  (let ((list))
    (do ((line (read-line stream NIL "") (read-line stream NIL "")))
	((zerop (length line)) (reverse list))
      (when (not (char= #\* (aref line 0)))
	(push (parse-correction-line line) list)))))

; This is what a line in ALL.BADPRON looks like

#||
(setq l1 "chill 1 :  'chil-in|B_-le|B")
(setq l2 "Chinatown  :  'chi|B--n*-.tau.n")
||#


(defun parse-correction-line (string)
  (let* ((colon-pos (position #\: string))
	 (digit-pos (position "123456789" string
			      :test (lambda (digits character)
				      (find character digits))
			      :end colon-pos)))
    ;; The Homonym number, if it appears, must be surrounded by space
    (unless (and digit-pos
		 (> digit-pos 0)
		 (char-equal #\space (aref string (1- digit-pos)))
		 (char-equal #\space (aref string (1+ digit-pos))))
      (setf digit-pos nil))
    (cons
      (cons
	(subseq string 0
		(+ 1
		   (position #\space string :from-end t :end (or digit-pos colon-pos)
			     :test-not #'char-equal)))
	(and digit-pos
	     (cons (read-from-string string T nil :start digit-pos :end colon-pos) nil)))
      (subseq string (position #\space string :test-not #'char= :start (+ colon-pos 1))))))


(defun rewrite-pron-file (pathname fixes)
  (let ((fix-file (make-pathname :type "E" :defaults pathname))
	next-word
	next-sense
	next-pron
	(errors? nil)
	(next-pron! nil))
    (labels
      ((advance ()
	 (let ((next-fix (pop fixes)))
	   (setf next-word (caar next-fix))
	   (setf next-sense (cadar next-fix))
	   (setf next-pron (cdr next-fix)))))
      (advance)
      (with-open-file (in pathname :direction :input)
	(with-open-file (out fix-file :direction :output)
	  (do ((line (read-line in NIL NIL) (read-line in NIL NIL)))
	      ((null line))
	    (cond
	      ((and (string-equal line "F:" :end1 2)
		    (string-equal line next-word :start1 2 :end1 (+ 2 (length next-word)))
		    (or (null next-sense)
			(char-equal (int-char (+ (char-int #\0) next-sense))
				    (aref line (+ (length next-word) 3)))))
	       (setf next-pron! T)
	       (write-line line out))
	      ((and (string-equal line "P:" :end1 2)
		    next-pron!)
	       (write-string "P:" out)
	       (write-line next-pron out)
	       (setf next-pron! nil)
	       (advance))
	      (t
	       (write-line line out))))))
      (when (not (null fixes))
	(format *error-output*
		"Some fixes didn't apply for ~A: ~A~%" pathname fixes)
	(setf errors? t))
      (unless errors?
	(rename-file fix-file pathname)))))


; Dif version, because TCP drops connection too often.  Store all fixes at once,
; and be able to re-start after error.

(defvar last-full-file "CLAYMORE")

(defvar fixup-list (get-pron-fixing-list last-full-file))

(defun fix-prons (&aux name)
  (do ((fix (car fixup-list) (car fixup-list)))
      ((null fixup-list))
    (setf name (car fix))
    (rewrite-pron-file (pathname-for-word name) (cdr fix))
    (format t "~A: ~D change~:P~%" name (length  (cdr fix)))
    (setf last-full-file name)
    (pop fixup-list)))

(defun get-pron-fixing-list (last)
  "Return list of all fixes for files after LAST"
  (let (list)
    (with-open-file (stream corrections-pathname)
      (do ((name (read stream NIL NIL) (read stream NIL NIL)))
	  ((null name) (reverse list))
	(read-line stream)			;Eat blank line
	(format t "~&~A" name)
	(let ((fixes  (collect-group-pron-corrections stream)))
	  (if (string> name last)
	      (cond
		((null fixes)
		 (format t "  No corrections.~%"))
		(t
		 (format t "  ~D correction~:P" (length fixes))
		 (push (cons (string name) fixes) list)))
	      (format t " - Ignored~%")))))))
