;;; -*- Mode: LISP; Syntax: Common-Lisp; Base: 10; Package: DICT -*-

;;; Use the TCP DICTIONARY service on AMT

(neti:define-protocol :DICTIONARY (:DICTIONARY :byte-stream)
    (:invoke-with-stream-and-close
      (stream word &optional pronunciation-only?)

      ;; Ask for a definition
      (write-string "DEFINE " stream)
	(write-string (string-upcase word) stream)
	(write-byte #o15 stream)		;TERPRI is not good enough
	(write-byte #o12 stream)
	(zl:send stream :force-output)		;this is crucial

	(if pronunciation-only?
	    (parse-dictionary-reply-for-pronunciation stream)
	    (parse-dictionary-reply stream))))



(tcp:add-tcp-port-for-protocol :DICTIONARY 3012)

(defun parse-dictionary-reply (stream)
  (declare (values list-of-entries see-also-list))
  (labels
    ((line ()
       (do ((char (read-char stream) (read-char stream))
	    (string (make-array 10 :element-type 'string-char :fill-pointer 0)))
	   ((or (char= char #\null)
		(char= char (int-char #o15)))
	    (if (char= char #\null)
		NIL
		(read-char stream) string))
	 (vector-push-extend char string)))
     (find-definition-start (line pos)
       ;; find a colon preceeded by either a space, a digit, or a letter preceeded by a digit
       (do ((start (position #\: line :start pos)
		   (position #\: line :start (+ 1 start)))
	    win)
	   ((or win (null start)) win)
	 (setf win (cond
		     ((char= (aref line (- start 1)) #\space)
		      (- start 1))
		     ((find (aref line (- start 1)) '(#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0))
		      (- start 1))
		     ((and (char= (aref line (- start 1)) #\a)
			   (find (aref line (- start 2)) '(#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)))
		      (- start 2))))))
     )
    
    (let ((reply (line)))
      (when (zerop (search "DEFINITION" reply :test #'string-equal))
	(let ((list)
	      (see-also)
	      (cref-count (read-from-string reply T nil :start (length "DEFINITION"))))
	  
	  ;; Collect the cross references, if any
	  (dotimes (i cref-count)
	    (let ((line (line)))
	      (push (subseq line (+ (position #\space line) 1)) see-also)))
	  
	  ;; read all the entry's that follow
	  (do ((line (line) (line))
	       pronunciation			;store first pronunciation we find
	       start end			;handy for searches
	       line-has-etymology?		;if so ignore part of it
	       entry				;current entry
	       )
	      ((null line) (when entry (push entry list)))
	    ;; expect new entry speech to have no leading blank space
	    (if (not (char= (aref line 0) #\space))
		(progn				;begin a new entry
		  (when entry			;save old one if any
		    (push entry list))
		  ;; expect just one blank space after the entry number, and then the word
		  ;; sometimes there is no entry number.  the word appears right away
		  (setf start (if (find (aref line 0)  '(#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0))
				  (+ 1 (position #\space line))
				  0))
		  (setf end (position #\space line :start start))
		  (setf entry (make-webster-entry :name (subseq line start end)))
		  
		  ;; Expect that pronunciation only appears in the first word of the group
		  (when (null pronunciation)
		    ;; Expect it delimited by backslashes
		    (setf start (+ (position #\\ line) 1))
		    (setf end (position #\\ line :start (+ start 1)))
		    (setf pronunciation (subseq line start end)))
		  (setf (webster-entry-pronunciation entry) pronunciation)
		  
		  ;; expect part of speech next
		  (setf start (position #\space line :start (+ end 1) :test-not #'char=))
		  (setf end (position #\space line :start start))
		  (setf (webster-entry-part-of-speech entry) (subseq line start end))
		  
		  ;; Etymology is next.  It begins with an open square bracket but has no reliable end
		  ;; (or that may be a bug in Server).  The only way we know the end is to find the beginning
		  ;; of the first definition, which will begin with a colon.
		  
		  (setf start (find-definition-start line end))
		  (if (null start)
		      (setf line-has-etymology? T)	;continued on next line
		      (setf (webster-entry-definition entry) (subseq line start))
		      (setf line-has-etymology? nil))
		  )
		(progn
		  (if line-has-etymology?
		      (progn
			(setf start (find-definition-start line 0))
			(if (null start)
			    (setf line-has-etymology? T)
			    (setf (webster-entry-definition entry) (subseq line start))
			    (setf line-has-etymology? nil)))
		      (progn
			(setf (webster-entry-definition entry)
			      (concatenate 'string (webster-entry-definition entry)
					   (subseq line (position #\space line :test-not #'char=)))))))))
	  (values (reverse list) (reverse see-also))
	  )))))

(defun parse-dictionary-reply-for-pronunciation (stream)
  (declare (values pronunciation))
  (labels
    ((line ()
       (do ((char (read-char stream) (read-char stream))
	    (string (make-array 10 :element-type 'string-char :fill-pointer 0)))
	   ((or (char= char #\null)
		(char= char (int-char #o15)))
	    (if (char= char #\null)
		NIL
		(read-char stream) string))
	 (vector-push-extend char string))))
    
    (let ((reply (line)))
      (when (zerop (search "DEFINITION" reply :test #'string-equal))
	(dotimes (i (read-from-string reply T nil :start (length "DEFINITION")))
	  (line))
	(setf reply (line))
	(let* (( start (+ (position #\\ reply) 1))
	       ( end (position #\\ reply :start (+ start 1))))
	  (subseq reply start end))))))

(defun get-word-information-tcp (word)
  (neti:invoke-service-on-host :DICTIONARY (si:parse-host "AMT") word))

(defun get-word-pronunciation (word)
  (neti:invoke-service-on-host :DICTIONARY (si:parse-host "AMT") word T))

#||


(mapc 'describe (get-word-information-tcp "ball"))

(mapc 'describe (get-word-information-tcp "ingenious"))

(get-word-pronunciation "ball")

(get-word-pronunciation "ingenious")

||#

