;;; -*- Mode: LISP; Base: 10; Package: DICT; Syntax: Common-lisp -*-

; Hacking the Webster dictionary.
; James Raymond Davis November 1985

; Other Webster programs have been written:
;  PIG:USRD$:[JCMA]WEBSTER.LSP  (John C.  Mallery)
;  OZ:<PAO.LISPM>WEBSTER.LISP   (Pat A. O'Donnell)
;  OZ:<PAO.LISPM>NWEBSTER.LISP

(defvar *word-file-names*
	(with-open-file (file "MEDIA-LAB://local//lib//dictionary//files.lst")
	  (let ((list))
	    (do ((line (read-line file NIL NIL) (read-line file NIL NIL)))
		((null line) (reverse list))
	      (push line list))))
  "List of names of dictionary files")


; linear search through 220 items (but they're sorted alphabetically, so maybe we should try binary)
(defun filename-for-word (word)
  "Return the name of the file that should contain WORD"
  (do ((subl *word-file-names* (cdr subl)))
      ((or (null (cdr subl))
	   (and (string-not-lessp word (first subl))
		(string-lessp word (first (cdr subl)))))
       (first subl))))
		

(defvar dictionary-raw-pathname (pathname "MEDIA-LAB:/local/lib/dictionary/words/")
  "The directory holding all Webster files")

(defun pathname-for-word (word)
  "Return the pathname for the file that contains WORD"
  (make-pathname :host (pathname-host dictionary-raw-pathname)
		 :directory (pathname-directory dictionary-raw-pathname)
		 :name  (filename-for-word word)
		 :raw-type "d"))		;I really mean lowercase!




; WITH-OPEN-FILE gets an error while closing.
(defmacro with-open-TCP-file ((file pathname) &body body)
  `(let ((,file (open ,pathname)))
     (unwind-protect
	,@body
       (zl:ignore-errors
	 (close ,file)))))

(defun get-word-information (word)
  "Return Webster-entry for a word"
  ; Linear Search through a file.  Yuck
  (with-open-TCP-file (stream (pathname-for-word word))
    (let ((first-line 
	    (do ((line (read-line stream NIL NIL) (read-line stream NIL NIL)))
		((or (null line)
		     (and (string-equal line "F:" :end1 2)
			  (string-equal line word :start1 2 :end1 (+ 2 (length word)))))
		 line))))
      (when first-line
	(parse-word-entry
	  (make-initial-word-entry first-line) stream)))))

(export 'get-word-information)

;; Memoized version

(setf (get 'get-webster-info 'memoized-table) (make-hash-table :test #'equal))

(defun get-webster-info (word)
  (setf word (string-upcase word))
  (let ((info (gethash word (get 'get-webster-info 'memoized-table))))
    (when (null info)
      (setf info (get-word-information word))
      (setf (gethash word (get 'get-webster-info 'memoized-table)) info))
    info))

(export 'get-webster-info)

;;; Parsing the info in the file

; "Note that, as the  records are V format,  a nominal 80 character
;  record  ending  in a  blank  will, in fact  be  written as  a  79
;  character record.  Thus any routines  which read these files must
;  provide a blank when  a C: record follows  a 79 character record."
;    - the documentation


(defun make-initial-word-entry (line)
  (let ((entry  (make-webster-entry)))
    (multiple-value-bind (entname homono prefsuf dotss accmarks pos posjoin pos2)
	(extract-fields  line :start 2)
      (setf (webster-entry-name entry) entname)
      (when (and pos (not (zerop (length pos))))
	(setf (webster-entry-part-of-speech entry) (read-from-string pos)))
      ; not used
      homono
      prefsuf
      dotss
      accmarks
      posjoin
      pos2
      )
    entry))




(defun isolate-words (string &key (delims '(#\space #\, #\. #\;)) (start 0))
  "Return list of words from string"
  (let (end list)
    (do ((i start (and end (position delims string :start end
				     :test-not (lambda (list character)
						 (find character list :test #'char=))))))
	((null i) (reverse list))
      (setf end (position delims string :start i :test (lambda (list character)
							 (find character list :test #'char=))))
      (push (subseq string i end) list))))



(defun parse-word-entry (info stream)
  "Parse STREAM, extracting information, placing results into WEBSTER-ENTRY info"
  ; Since the first line has already been read we lose that info.
  (setf (webster-entry-definition info) "")
  (labels
    (
     (discard-line ()
       "Discard all chars on this line"
       (do ((char (read-char stream) (read-char stream)))
	   ((char-equal char #\Return))))
     (append-definition (string)
       (setf (webster-entry-definition info)
	     (concatenate 'string (webster-entry-definition info) string)))
     )
    (do ((char (read-char stream NIL NIL)  (read-char stream NIL NIL))
	 ignore?				;reading uninteresting line?
	 v-hack?				; must append space?
	 )
	((null char) info)
      (read-char stream)			;discard colon that always follows
      (zl:selector char char-equal
	(#\F (return info))			;found next def, we're through
	(#\R (return info))			;Runon, remainder uninteresting
	(#\P
	 (setf (webster-entry-pronunciation info) (parse-webster-pronunciation stream)))
	(#\E					;Etymology
	 (discard-line)   (setf ignore? T))
	(#\L					; "Label"  (??)
	 (discard-line)   (setf ignore? T))
	(#\V					;Variant spelling
	 (discard-line)   (setf ignore? T))
	(#\S					;Synonyms
	 (discard-line) (setf ignore? T))
	(#\X
	 (discard-line) (setf ignore? T))
	(#\D
	 (let ((line (read-line stream)))
	   (setf v-hack? (= (length line) 77.))
	   (setf ignore? nil)
	   (when (> (length (webster-entry-definition info)) 0)
	     (append-definition ".  "))
	   (append-definition
	     (subseq line
		     ;; skip the first four fields
		     (do ((pos 0 (position #\; line :test #'char= :start (+ pos 1)))
			  (i 0 (1+ i)))
			 ((= i 4) (+ pos 1)))))))
	(#\C
	 (if ignore?
	     (discard-line)
	     (if v-hack?
		 (append-definition " "))
	     (let ((line  (read-line stream)))
	       (setf v-hack? (= (length line) 77.))	;+ 2 for C: = 79.
	       (append-definition line))))
	(otherwise
	  (error "Couldn't parse line ~C:~A~%" char (read-line stream)))))))


; To be improved.  Must discard alternative pronunciations (after , )
; discard optional chars (inside parens)
; parse overstrikes, and convert to Klattese.
(defun parse-webster-pronunciation (stream)
  (read-line stream))



