;;; Copyright (c) 1992-2001 John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen ;;; see LICENSE for conditions ;;; Rewritten for new unifier (in-package :lkb) ;;; generalisation, equality and subsumption of fs ;;; Jan 1995 - made equal-wffs-p and subsumes-wffs-p work ;;; on non well typed fs (defun mark-dag-with-backwards-paths (dag firstp backwards-path) ;; mark nodes with lists of paths - generalisation will be reentrant ;; in those paths that both the originals were reentrant in ;; Unmarking afterwards is done implicitly at next invalidation of visit ;; marks, before the visit mark fields are used next time ;; We may be inside a unification context, so we have to deref. The dag ;; may possibly be cyclic, so we check for that ;; !!! note that paths put on nodes are backwards, so they have to be ;; reversed if they are actually used as paths rather than just for ;; comparison (macrolet ((mark-subdags (arcs) `(dolist (arc ,arcs) (mark-dag-with-backwards-paths (dag-arc-value arc) firstp (cons (dag-arc-attribute arc) backwards-path))))) (setq dag (deref-dag dag)) (cond ((eq (dag-copy dag) :inside) (when (or *unify-debug* *unify-debug-cycles*) (format t "~%Generalisation failed: cycle found at < ~{~A ~^: ~}>" (reverse backwards-path))) (throw '*fail* nil)) (t ;; take into account that a subdag may be shared between the two ;; input dags, possibly in different respective places - deal with ;; this by storing separate sets of paths for each input dag in the ;; two halves of a cons cell (let* ((visit-cell (or (dag-visit dag) (setf (dag-visit dag) (cons nil nil)))) (already (if firstp (car visit-cell) (cdr visit-cell)))) (if firstp (setf (car visit-cell) (cons backwards-path already)) (setf (cdr visit-cell) (cons backwards-path already))) (unless already (setf (dag-copy dag) :inside) (mark-subdags (dag-arcs dag)) (mark-subdags (dag-comp-arcs dag)) (setf (dag-copy dag) nil))))))) ;;; generalisation - a new dag is returned and neither dag1 nor dag2 is ;;; modified ;;; Could return nil in case where a circularity is detected in one of the ;;; input dags. This is checked when marking nodes with paths (defvar *reentrant-sets* nil) (defun generalise-dags (dag1 dag2) (if *within-unification-context-p* (catch '*fail* (invalidate-visit-marks) (mark-dag-with-backwards-paths dag1 t nil) (mark-dag-with-backwards-paths dag2 nil nil) (let ((result-dag (create-dag)) (*reentrant-sets* nil)) (generalise-dags-1 dag1 dag2 result-dag nil) ;; (mapc #'print *reentrant-sets*) (loop for reentrant-set in *reentrant-sets* do (let ((first-path (create-path-from-feature-list (reverse (car reentrant-set))))) (loop for other-path in (cdr reentrant-set) do (unify-paths first-path result-dag (create-path-from-feature-list (reverse other-path)) result-dag)))) (copy-dag result-dag))) (with-unification-context (dag1) (generalise-dags dag1 dag2)))) (defun generalise-dags-1 (dag1 dag2 result-dag path) ;; new dag is created by side effects - a list of reentrancy specs is ;; created but not used until the end ;; only the result-dag is modified destructively - neither input dag is ;; changed (apart from working data put in the visit fields) (setq dag1 (deref-dag dag1)) (setq dag2 (deref-dag dag2)) (setq result-dag (deref-dag result-dag)) ;; we can't take a short-cut here if the two dags are eq, and just insert ;; the contents of one of them into the result, since we may already ;; have a partial structure for the result (built by the unification in of ;; type constraints) (generalise-dags-2 dag1 dag2 result-dag path)) (defun generalise-dags-2 (dag1 dag2 result-dag path) (let* ((dag-type1 (unify-get-type dag1)) (dag-type2 (unify-get-type dag2)) (reentrant-labels ;; intersect dag1 paths in dag1 with dag2 paths in dag2 (generalise-path-intersection (car (dag-visit dag1)) (cdr (dag-visit dag2))))) (when (cdr reentrant-labels) (pushnew reentrant-labels *reentrant-sets* :test #'equal)) (let* ((lcsupertype (least-common-supertype dag-type1 dag-type2)) (constraint (if (symbolp lcsupertype) (may-copy-constraint-of lcsupertype)))) (setf (dag-new-type result-dag) lcsupertype) (when constraint (let ((res (catch '*fail* (progn (unify1 result-dag constraint path) t)))) (unless res (error "Unification with constraint of type ~A (lcsupertype ~ of ~A and ~A) failed at path < ~{~A ~^: ~}>" lcsupertype dag-type1 dag-type2 (reverse path)))) ;; result-dag might just have been forwarded so dereference it again (setq result-dag (deref-dag result-dag))) (when (and (dag-arcs dag1) (dag-arcs dag2)) (generalise-subparts dag1 dag2 result-dag path))))) (defun generalise-path-intersection (paths1 paths2) (let ((res nil)) (dolist (p1 paths1) (dolist (p2 paths2) (when (equal p1 p2) (push p1 res) (return)))) res)) (defun generalise-subparts (dag1 dag2 real-result-dag path) (macrolet ((generalise-arcs (arcs) `(dolist (arc ,arcs) (let* ((label (dag-arc-attribute arc)) (v1 (unify-get-dag-value dag1 label)) (v2 (unify-get-dag-value dag2 label)) (new-path (cons label path))) (declare (dynamic-extent new-path)) (if (and v1 v2) (generalise-dags-1 v1 v2 (dag-arc-value arc) new-path) (format t "~&Attribute ~A missing in one or both inputs to ~A" label 'generalise-dags)))))) (generalise-arcs (dag-arcs real-result-dag)) (generalise-arcs (dag-comp-arcs real-result-dag)))) ;;; Subsumption test that's optionally bidirectional. The forwardp and backwardp ;;; arguments encode which subsumption direction(s) to test. Forwardp is "does dag1 ;;; subsume dag2?" and backwardp vice versa. Returns three values: whether dag1 subsumes ;;; dag2, whether dag2 subsumes dag1, and a generalising dag if appropriate (see below). ;;; ;;; The basic algorithm is one-pass, simultaneously traversing the two dags: at each ;;; node visited in dag1 insert pointer to corresponding dag2 node. If we reach a dag1 ;;; node that already has a pointer this corresponds to a reentrancy in dag1 - if ;;; the pointer isn't eq to the current dag2 node then this is a reentrancy in dag1 ;;; that isn't present in dag2, so dag1 can't subsume dag2. Also of course check for ;;; type subsumption on each node. ;;; ;;; This algorithm is described more formally by Malouf, Carroll & Copestake (2000) ;;; ;;; The basic algorithm is extended with two further options to support local ambiguity ;;; packing by creating a new 'representative' dag for a packed node in cases when ;;; straightforward forward/backward subsumption fails. If genp is true then we ;;; attempt to produce a dag that is a generalisation of the input dags, but which is ;;; close in specificity to them to keep the the search space constrained. In this ;;; generalisation approach we select the more general of the types at each ;;; corresponding node; if lcsp is true then in addition we may generalise to least ;;; common supertypes. Restrictions to maintain sufficient specificity are described ;;; under subsume-wffs-p. (defvar *subsume-debug* nil) ; c.f. *unify-debug* #+:sbcl (declaim (sb-ext:always-bound *subsume-debug*)) (defun dag-subsumes-p (dag1 dag2 &optional (forwardp t) backwardp genp lcsp) ;; NB may not be called within a unification context; therefore since it's outside, ;; we know the dags can't be cyclic (so no need to check) nor can there be any ;; temporary dag structure that we need to take account of - moreover this ;; allows subsumption itself to use the temp slots to record intermediate results (incf (statistics-subsumptions *statistics*)) (invalidate-visit-marks) (with-unification-context (dag1) (let (f b fg bg) (flet ((dag-subsumes-p1 (dag1 dag2) ;; need m-v-setq to get return values out correctly if recording fail paths (multiple-value-setq (f b fg bg) (subsume-wffs-p dag1 dag2 forwardp backwardp genp genp lcsp)))) (declare (inline dag-subsumes-p1) (dynamic-extent #'dag-subsumes-p1)) (if *recording-fail-paths-p* (call-with-fail-paths-recording #'dag-subsumes-p1 dag1 dag2) (dag-subsumes-p1 dag1 dag2))) (when *subsume-debug* ;; the result message needs to be a bit nuanced since we might have been called to ;; check subsumption in one direction but not the other (format t "~&Subsumption check ~A~%" (cond ((and f b) "succeeded in both directions") (f "succeeded in forward direction") (b "succeeded in backward direction") ((and forwardp backwardp) "failed in both directions") (forwardp "failed in forward direction") (backwardp "failed in backward direction") (t "failed")))) (values f b ;; only return a generalisation if there's neither forward nor backward subsumption (cond ((or f b) nil) (fg (copy-out-generalisation dag1 nil)) (bg (copy-out-generalisation dag2 t))))))) (defconstant +generalise-mark+ 41) (defun copy-out-generalisation (dag bg) (labels ((fix-up-temp-slots (d) (unless (eql (dag-copy d) +generalise-mark+) (setf (dag-copy d) +generalise-mark+) (setf (dag-comp-arcs d) nil) (when bg (setf (dag-new-type d) (dag-visit d))) ; NB new-type -> NIL if dag-visit empty (dolist (a (dag-arcs d)) (fix-up-temp-slots (dag-arc-value a)))))) (fix-up-temp-slots dag) (copy-dag dag))) (defun dag-equal-p (dag1 dag2) ;; as outlined by Malouf, Carroll & Copestake - but sub-optimal when dags are not equal, ;; since it does not return immediately when one of forwardp or backwardp becomes nil; ;; not called extensively so in practice doesn't really matter (multiple-value-bind (forwardp backwardp) (dag-subsumes-p dag1 dag2 t t) (and forwardp backwardp))) (defun subsume-wffs-p (dag1 dag2 forwardp backwardp fgenp bgenp lcsp) ;; forwardp, backwardp are true when it's possible that dag1 subsumes dag2, and ;; vice-versa respectively - when the possibility has been ruled out the relevant ;; variable is set to false; we return false as soon as all possibilities have been ;; ruled out ;; beyond the standard subsumption test, we may attempt to produce a generalisation ;; of the two dags; at present, this generalisation is only done if the set of ;; reentrancies in one of the dags is equal to or subsumes the set in the other dag ;; * fgenp, bgenp are true if we can produce such a generalisation from dag1 or dag2 ;; respectively; in addition to the reentrancy restriction, each pair of corresponding ;; types in the two dags must be in a subsumption relationship, and we may replace the ;; more specific type in one of the dags with the more general type in the other if ;; the 2 types have the same set of appropriate features ;; * lcsp is true if in addition to the above we can produce a generalisation in which ;; corresponding types in dag1 and dag2 that are not in a subsumption relationship are ;; generalised to their least common supertype; we also require all 3 types involved to ;; have the same set of appropriate features (labels ((print-reentrancy-fail (direction path which) (format t "~&Subsumption ~A due to reentrancy at < ~{~A ~^: ~}> in ~A FS~%" direction (reverse path) which)) (print-type-fail (direction type1 type2 path) (format t "~&Subsumption ~A between ~A and ~A at < ~{~A ~^: ~}>~%" direction type1 type2 (reverse path))) (length= (l1 l2) (cond ((endp l1) (endp l2)) ((endp l2) nil) (t (length= (cdr l1) (cdr l2))))) (subsume-nodes (dag1 dag2 path &aux (donep nil)) (declare (type dag dag1 dag2)) (when (or forwardp fgenp) (let ((c1 (dag-copy dag1))) (cond ((null c1) (setf (dag-copy dag1) dag2)) ((eq c1 dag2) (setq donep t)) (t (when *subsume-debug* (print-reentrancy-fail "not forward" path "first")) (unless (or backwardp bgenp) ;; even when recording fail paths, don't continue beyond this point since ;; any failures below here are already recorded (albeit via a different path) (return-from subsume-wffs-p nil)) (setq forwardp nil fgenp nil))))) (when (or backwardp bgenp) (let ((c2 (dag-comp-arcs dag2))) ; not copy slot in case dags share nodes (cond ((null c2) (setf (dag-comp-arcs dag2) dag1)) ((eq c2 dag1) (setq donep t)) (t (when *subsume-debug* (print-reentrancy-fail "not backward" path "second")) (unless (or forwardp fgenp) (return-from subsume-wffs-p nil)) (setq backwardp nil bgenp nil))))) (cond ;; the flag donep improves on the published algorithm, avoiding repeated processing ;; below a pair of nodes we've visited previously due to reentrancies (donep) ((eq dag1 dag2) ;; when the dags are eq we still need to traverse them to record reentrancies, ;; but other processing can be bypassed (and we don't need to update path) (dolist (arc (dag-arcs dag1)) (subsume-nodes (dag-arc-value arc) (dag-arc-value arc) path))) (t (subsume-types dag1 dag2 path)))) (subsume-types (dag1 dag2 path) (let ((t1 (dag-type dag1)) ; can't take account of new-type since would make non-gen wrong (t2 (dag-type dag2))) (unless (or (eq t1 t2) (and (stringp t1) (stringp t2) (string= t1 t2))) (let ((gcs (greatest-common-subtype t1 t2)) lcs) (cond ((eq gcs t1) ; so t2 is a supertype of t1 (when *subsume-debug* (print-type-fail "not forward" t1 t2 path)) (setq forwardp nil) (cond ((and fgenp (not (dag-new-type dag1)) ; didn't take account of this so give up (length= (dag-arcs dag1) (dag-arcs dag2))) (setf (dag-new-type dag1) t2)) ((or backwardp bgenp *recording-fail-paths-p*) (setq fgenp nil)) ; continue, and don't record this as a failure (t (return-from subsume-wffs-p nil)))) ((eq gcs t2) (when *subsume-debug* (print-type-fail "not backward" t1 t2 path)) (setq backwardp nil) (cond ((and bgenp (not (dag-visit dag2)) ; not new-type slot in case dags share nodes (length= (dag-arcs dag2) (dag-arcs dag1))) (setf (dag-visit dag2) t1)) ((or forwardp fgenp *recording-fail-paths-p*) (setq bgenp nil)) (t (return-from subsume-wffs-p nil)))) (t (when *subsume-debug* (print-type-fail "relationship absent" t1 t2 path)) (setq forwardp nil backwardp nil) (cond ((and lcsp (or (and fgenp (not (dag-new-type dag1))) (and bgenp (not (dag-visit dag2)))) (length= (dag-arcs dag1) (dag-arcs dag2)) (length= (dag-arcs dag1) (appropriate-features-of (setq lcs (least-common-supertype t1 t2))))) (setf (dag-new-type dag1) lcs (dag-visit dag2) lcs)) (*recording-fail-paths-p* (record-fail-path path) ; unequivocal failure, so record (setq fgenp nil bgenp nil) (return-from subsume-types)) ; don't recurse into arcs (t (return-from subsume-wffs-p nil)))))))) (subsume-arcs dag1 dag2 path)) (subsume-arcs (dag1 dag2 path) ;; recurse into arcs; we get to this point if either (1) forwards and/or backwards ;; subsumption or generalisation is still possible, or (2) none of these are possible ;; but we're recording fail paths and t1/t2 are equal or one subsumes the other (let* ((arcs2 (dag-arcs dag2)) (arcs2-tail arcs2)) (dolist (arc1 (dag-arcs dag1)) (let ((f1 (dag-arc-attribute arc1))) (block subsume-arc (do ((tail arcs2-tail (cdr tail))) ; start just beyond previous match ((atom tail)) #1=(when (eq (dag-arc-attribute (car tail)) f1) (let ((new-path (cons f1 path))) (declare (dynamic-extent new-path)) (subsume-nodes (dag-arc-value arc1) (dag-arc-value (car tail)) new-path)) (setq arcs2-tail (cdr tail)) (return-from subsume-arc))) (do ((tail arcs2 (cdr tail))) ((eq tail arcs2-tail)) #1#))))))) (declare (notinline print-reentrancy-fail print-type-fail subsume-types subsume-arcs)) (subsume-nodes dag1 dag2 nil) (values forwardp backwardp fgenp bgenp))) #| ;;; For LDB indexing need to create minimal path value equations (defvar *canonical-paths* nil) (defun canonicalise-fs (fs) (setf *canonical-paths* nil) (canonicalise-fs-aux fs nil nil) (nreverse *canonical-paths*)) (defun canonicalise-fs-aux (fs predictions path) ;; predictions is a list of items of the form ;; (list-of-features type) ;; e.g. ((NIL NE-ORTH) ((HD-ORTH) WORD-ORTH) ((TL-ORTH) E-ORTH)) ;; where these are the values for features predicted by the types ;; met on this path so far ;; ;; path is the path so far, in reverse order (let* ((real-dag (follow-pointers fs)) (type (type-of-fs real-dag))) (unless (member type (loop for pred in predictions when (null (car pred)) collect (cdr pred))) (push (make-unification :lhs (create-path-from-feature-list (reverse path)) :rhs (make-u-value :type type)) *canonical-paths*)) (unless (is-atomic real-dag) (let ((new-predictions (append (extract-paths-for-canonical-rep (constraint-of type) nil) predictions))) (loop for label in (top-level-features-of real-dag) do (let ((new-dag (get-dag-value real-dag label))) (canonicalise-fs-aux new-dag (update-predictions new-predictions label) (cons label path)))))))) (defun update-predictions (predictions feature) (loop for pred in predictions when (eql (caar pred) feature) collect (cons (cdar pred) (cdr pred)))) (defun extract-paths-for-canonical-rep (fs path) ;;; given a FS extracts a list of all paths in the form ;;; required by the predictions in canonicalise-fs-aux (let* ((real-dag (follow-pointers fs)) (type (type-of-fs real-dag))) (cons (cons (reverse path) type (unless (is-atomic real-dag) (loop for label in (top-level-features-of real-dag) append (let ((new-dag (get-dag-value real-dag label))) (extract-paths-for-canonical-rep new-dag (cons label path)))))))) (defun query-canonical-rep (path value path-so-far) ;;; returns a list of path value pairs which can be treated as disjuncts ;;; to query the LDB ;;; Currently this will overgenerate queries (if path (let ((max-type (maximal-type-of (car path)))) (unless max-type (error "~%Unknown feature ~A~%" (car path))) (append (loop for type in (cons max-type (mapcar #'ltype-name (retrieve-descendants max-type))) nconc (let ((type-rest (extract-paths-for-canonical-rep (constraint-of type) nil))) (if (member (cons path value) type-rest :test #'equal) (list (cons (reverse path-so-far) (list type)))))) (query-canonical-rep (cdr path) value (cons (car path) path-so-far)))))) |#