;;; Copyright (c) 1991--2004 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `licence.txt' for conditions. ;;; modifications for YADU - April 1997 ;;; bug fixes etc 1995 ;;; July 1996 - cacheing glbs ;;; structure mod to allow glbs to be calculated (in-package :lkb) ;;; ;;; generic assoc() and member() are mightier than is typically needed; supply ;;; simplified versions instead. (27-sep-99 - oe) ;;; (defmacro sassoc (element list) `(loop for foo in (the list ,list) when (eq (first (the cons foo)) ,element) return foo)) (defmacro smember (element list) `(loop for foo in (the list ,list) thereis (eq ,element foo))) ;;; ;;; For each type we need: ;;; ;;; name ;;; parents - ie immediate supertypes - a set of types ;;; constraint - a feature structure stored either in a fully ;;; expanded form or just as the feature structure ;;; specific to the type ;;; tdfs - the full typed default feature structure constraint ;;; ;;; For implementation purposes we also have: ;;; ;;; constraint-mark - unification generation last time the constraint ;;; was returned not completely-copied ;;; daughters - immediate subtypes - a set of types. ;;; appfeats - appropriate features - a set of features which can be ;;; derived from the constraint (top-level-features-of constraint) ;;; but cached in order to type untyped feature structures efficiently. ;;; ancestors - all the supertypes of a type - immediate or otherwise ;;; marks - see marks.lsp ;;; constraint-spec - the user specified unifications ;;; default-spec - the user specified default unifications ;;; local constraint - the fs derived from the user specified ;;; unifications ;;; inherited constraint - the fs after inheritance but before ;;; type inference - for debugging - zeroed ;;; after expanding all constraints ;;; atomic-p - t if the type has no appropriate features and none of ;;; its subtypes have any appropriate features ;;; July 1996 ;;; glbp - t if type was automatically created ;;; May 1997 ;;; descendants - for glb stuff (defstruct ltype name parents constraint (constraint-mark nil) tdfs comment daughters appfeats enumerated-p ancestors marks constraint-spec default-spec local-constraint inherited-constraint atomic-p glbp descendants shrunk-p visible-p ; for display in type hierarchy bit-code ; for glb computation ) (defmethod common-lisp:print-object ((instance ltype) stream) (if *print-readably* ;; print so object can be read back into lisp (call-next-method) ;; usual case (progn (write-string "#Type<" stream) (write-string (string (ltype-name instance)) stream) (write-char #\> stream)))) (defstruct (leaf-type (:include ltype)) (expanded-p nil)) (defvar *types* (make-hash-table :test #'eq)) (defparameter *ordered-type-list* nil) (defparameter *ordered-glbtype-list* nil) (defparameter *default-abbreviations* nil) (defvar *types-changed* nil) (defvar *lexicon-changed* nil) (defvar *type-reload-p* nil) (defun clear-types nil (clear-type-cache) ; must be done before types table is cleared (disable-type-interactions) (clrhash *types*) (setf *ordered-type-list* nil) (setf *ordered-glbtype-list* nil) ; (clear-leaf-types *leaf-types*) ;; no longer needed here (clear-feature-table) (clear-expanded-lex) #+:allegro (when (and *gc-before-reload* *type-reload-p*) (excl:gc t)) (setf *type-reload-p* t)) (defun clear-types-for-patching-constraints nil (clear-type-cache) (clear-feature-table) (clear-expanded-lex)) (defun clear-type-visibility nil (maphash #'(lambda (key entry) (declare (ignore key)) (setf (ltype-visible-p entry) nil)) *types*)) (defun collect-type-names nil (let ((type-names nil)) (maphash #'(lambda (name entry) (declare (ignore entry)) (push name type-names)) *types*) type-names)) (defmacro get-type-entry (name) `(gethash ,name *types*)) (defun is-valid-type (name) (or (get-type-entry name) (stringp name))) (defun string-type-p (type-name) ;; AAC 30/12/94 ;; allow for no string type (and *string-type* (or (eq type-name *string-type*) (member (get-type-entry type-name) (retrieve-ancestors *string-type*) :test #'eq)))) (defun get-known-type-entry (name) (or (gethash name *types*) (error "~%Unknown type ~A" name))) (defun set-type-entry (name new-entry) (setf (gethash name *types*) new-entry)) (defun remove-type-entry (name) ;;; effectively invalidates type but ;;; assumes all the tricky stuff is taken care of elsewhere ... (setf (gethash name *types*) nil)) (defun constraint-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun constraint-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint type-record) (error "~%~A is not a valid type" type-name)))) (defun tdfs-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-tdfs type-record) (error "~%~A is not a valid type" type-name)))) (defun default-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-default-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun appropriate-features-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-appfeats type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-ancestors (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-ancestors type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-descendants (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-descendants type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-parents (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-parents type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-daughters (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-daughters type-record) (error "~%~A is not a valid type" type-name)))) (defun subtype-p (type1 type2) (if (stringp type1) (string-type-p type2) (member (get-type-entry type2) (retrieve-ancestors type1) :test #'eq))) (defun safe-subtype-p (type1 type2) ;; ;; be robust on invalid type names and fail gracefully (i.e. no error()) ;; (ignore-errors (subtype-p type1 type2))) (defun atomic-type-p (type-name) (or (stringp type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-atomic-p type-record) (error "~%~A is not a valid type" type-name))))) ;;; glb computation - entry point is greatest-common-subtype ;;; Type unification performed by lookup in a global vector. Index is ;;; numeric combination of sxhash values of the two type names ;;; (symbols): (sxhash(t1)&&1023)<<10 + sxhash(t2)&&1023 where the two ;;; types are ordered on their sxhash values - so that either order results ;;; in the same table entry being retrieved. Must also test the types ;;; themselves in case another pair of types has the same key. So a vector ;;; entry is (type1 type2 . (subype . constraintp)) (defparameter *type-cache* (make-array (* 1024 1024) :initial-element nil)) (defun clear-type-cache nil ;; it's probably best to clear this out occasionally - definitely after ;; loading a grammar and before parsing since different pairs of types will ;; be exercised (let ((arr *type-cache*)) (declare (simple-vector arr)) (dotimes (x (* 1024 1024)) (setf (svref arr x) nil)))) (defmacro sxhash-one-symbol (x) ;; Hash values may change after a GC in MCL, but this is safe -- just ;; results in cache misses `(#+mcl ccl::%%eqhash #-mcl sxhash (the symbol ,x))) (defmacro type-cache-index (x) ;; least significant 10 bits of sxhash(x) `(logand (the fixnum (sxhash-one-symbol ,x)) 1023)) (defmacro type-cache-entry (x y) `(svref *type-cache* (the fixnum (+ (the fixnum (ash (the fixnum ,x) 10)) (the fixnum ,y))))) (defmacro cached-greatest-common-subtype (type1 type2) `(let ((t1 ,type1) (t2 ,type2)) (if (eq t1 *toptype*) ;; avoid caching on top type - result will always be other type t2 (if (eq t2 *toptype*) t1 (let* ((i1 (type-cache-index t1)) (i2 (type-cache-index t2))) (when (> (the fixnum i2) (the fixnum i1)) (rotatef i1 i2) (rotatef t1 t2)) (let* ((entry (type-cache-entry i1 i2)) (found (dolist (e entry) (when (and (eq (car e) t1) (eq (cadr e) t2)) (return (cddr e)))))) (if found (values (car found) (cdr found)) (multiple-value-bind (subtype constraintp) (full-greatest-common-subtype t1 t2) (setf (type-cache-entry i1 i2) (nconc entry (list (list* t1 t2 (cons subtype constraintp))))) (values subtype constraintp))))))))) #| ;;; investigate effectiveness of greatest common subtype cache (let ((max 0) (longest nil)) (dotimes (x (* 1024 1024)) (let ((entry (svref *type-cache* x))) (when (> (length entry) max) (setq max (length entry) longest entry)))) (values max longest)) |# (defun greatest-common-subtype (type1 type2) ;; implemented as a memo function. In practice we won't see anything ;; like all possible combinations of arguments so best not to ;; attempt to pre-compute the cache contents ;; ;; we expect both args to be lisp atoms, but either or both could be ;; strings/string type. String types are not cached (cond ((eq type1 type2) type1) ((arrayp type1) ; a string? (when (or (equal type1 type2) (string-type-p type2)) type1)) ((arrayp type2) (when (string-type-p type1) type2)) (t (cached-greatest-common-subtype type1 type2)))) (defun full-greatest-common-subtype (type1 type2) (let ((t1 (get-type-entry type1)) (t2 (get-type-entry type2))) (cond ((eq type1 type2) type1) ((member t2 (ltype-ancestors t1) :test #'eq) type1) ((member t1 (ltype-ancestors t2) :test #'eq) type2) (t (let* ((type1-desc (ltype-descendants t1)) (type2-desc (ltype-descendants t2)) (common-subtypes (intersection type1-desc type2-desc :test #'eq))) (when common-subtypes (let ((greatest-common-subtype-list (intersection common-subtypes (reduce #'intersection (mapcar #'(lambda (subtype) (cons subtype (ltype-ancestors subtype))) common-subtypes)) :test #'eq))) (cond ((not (cdr greatest-common-subtype-list)) (let ((gcsubtype-entry (car greatest-common-subtype-list))) (values (ltype-name gcsubtype-entry) (if (extra-constraint-p gcsubtype-entry t1 t2) t)))) ;; return true as the second value if there is a ;; constraint that may have to be unified in (greatest-common-subtype-list (error "~%~A and ~A have multiple common subtypes ~A" type1 type2 (mapcar #'(lambda (x) (ltype-name x)) greatest-common-subtype-list))) (t (error "~%Error found in type hierarchy")))))))))) (defun extra-constraint-p (gcsubtype t1 t2) ;;; test is whether any ancestor of the gcsubtype which ;;; isn't also an ancestor of the types being unified ;;; or the gcsubtype itself introduce any extra information ;;; on the constraint. (or (ltype-local-constraint gcsubtype) (let ((t1ancs (ltype-ancestors t1)) (t2ancs (ltype-ancestors t2))) (dolist (type (ltype-ancestors gcsubtype)) (when (and (not (eq type t1)) (not (eq type t2)) (ltype-local-constraint type) (not (member type t1ancs :test #'eq)) (not (member type t2ancs :test #'eq))) (return t)))))) ;;; called from generalisation (defun least-common-supertype (x y) (cond ((equal x y) x) ((stringp x) (if (stringp y) *string-type* (least-common-supertype *string-type* y))) ((stringp y) (least-common-supertype *string-type* x)) ((subtype-p x y) y) ((subtype-p y x) x) (t (let ((z (intersection (cons x (mapcar #'ltype-name (retrieve-ancestors x))) (cons y (mapcar #'ltype-name (retrieve-ancestors y)))))) (cond ((null z) (error "~%Types ~A and ~A have no common ancestor" x y)) ((= (length z) 1) (car z)) ((member x z) x) ((member y z) y) (t (let ((lcs-list (remove-ancestors z))) (cond ((null lcs-list) (error "~%Types ~A and ~A have no common ancestor" x y)) ((= (length lcs-list) 1) (car lcs-list)) (t (error "~%Types ~A and ~A have multiple common ancestors ~A" x y lcs-list)))))))))) (defun remove-ancestors (int-list) (do* ((done nil (cons initial done)) (initial (car int-list) (car (set-difference new-int-list done))) (new-int-list (set-difference int-list (mapcar #'ltype-name (retrieve-ancestors initial))) (set-difference new-int-list (mapcar #'ltype-name (retrieve-ancestors initial))))) ((null (set-difference new-int-list (cons initial done))) new-int-list))) ;;; The following utility functions assume that no cycles are present (defun get-real-types (type) (let ((type-entry (get-type-entry type))) (if (ltype-glbp type-entry) (loop for parent in (ltype-parents type-entry) append (get-real-types parent)) (list type)))) ;;; We need a record of the maximal type at which a particular ;;; feature is introduced. The following are called from functions ;;; in checktypes.lsp (defvar *feature-list* (make-hash-table :test #'eq)) (defvar *feature-minimal-type* (make-hash-table :test #'eq)) (defun clear-feature-table nil (clrhash *feature-minimal-type*) (clrhash *feature-list*)) (defun maximal-type-of (feature) (gethash feature *feature-list*)) (defun set-feature-entry (feature type) (setf (gethash feature *feature-list*) type)) (defun check-feature-table nil (let ((ok t)) (maphash #'(lambda (feature type-list) (cond ((> (length type-list) 1) (format t "~%Feature ~A is introduced at multiple types ~A" feature type-list) (setf ok nil)) (t (set-feature-entry feature (car type-list))))) *feature-list*) ok)) (defun maximal-type-of-list (features) (when features (reduce #'(lambda (x y) (unless (or (null x) (null y)) (greatest-common-subtype x y))) (mapcar #'maximal-type-of features)))) ;; Remove obsolete pointers from type constraints so that the garbage ;; collector can purge the structures they point to. (defun gc-types nil (maphash #'(lambda (name type) (declare (ignore name)) (when (ltype-tdfs type) (compress-dag (tdfs-indef (ltype-tdfs type)))) (compress-dag (ltype-constraint type)) (compress-dag (ltype-local-constraint type))) *types*)) ;;; Try to reduce the amount of space used by the expanded type hierarchy (defun clear-glbs nil (gc-types) (maphash #'(lambda (name type) (when (search "GLBTYPE" (symbol-name name)) (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*)) (defun used-types (type) (let ((used (mapcar #'(lambda (x) (u-value-type (unification-rhs x))) (ltype-constraint-spec type)))) (when used (remove-duplicates used)))) (defun purge-constraints nil (gc-types) (let* ((leaves (mapcar #'(lambda (x) (gethash x *types*)) (slot-value *leaf-types* 'leaf-types))) (parents (reduce #'union (mapcar #'ltype-parents leaves))) (referred (reduce #'union (mapcar #'used-types leaves))) (save (union parents referred))) (maphash #'(lambda (name type) (unless (member (symbol-name name) save) ;; (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*))) (defun types-to-xml (&key (stream t) file) (loop with stream = (if file (open file :direction :output :if-exists :supersede :if-does-not-exist :create) stream) for type being each hash-value in *types* for name = (ltype-name type) for parents = (ltype-parents type) for daughters = (ltype-daughters type) do (format stream "~% ~%" name) (loop for parent in parents do (format stream " ~%" parent)) (format stream " ~% ~%") (loop for daughter in daughters do (format stream " ~%" daughter)) (format stream " ~%~%") finally (when file (close stream))))