;;; Copyright (c) 1997--2022 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `LICENSE' for conditions. ;;; LKB ;;; ;;; Base level unification of dags ;;; ;;; Copyright Rob Malouf, John Carroll 1997-1998 All Rights Reserved. ;;; CSLI, Stanford University (in-package :lkb) (eval-when (:compile-toplevel :load-toplevel :execute) (export '(with-unification-context))) ;;; Each temporary value set by the unifier is marked with a unification ;;; generation counter. That way, we can invalidate all temporary values ;;; simply by incrementing the counter. A couple of TFS operations involve ;;; saving then restoring the counter; counter increments therefore have ;;; to be based off the previous maximum otherwise we might mistakenly ;;; repeat an old value. (#+:sbcl sb-ext:defglobal #-:sbcl defvar *unify-generation* 1) (#+:sbcl sb-ext:defglobal #-:sbcl defvar *unify-generation-max* 1) (defvar *visit-generation* 1) (defvar *visit-generation-max* 1) #+:sbcl (declaim (sb-ext:always-bound *visit-generation* *visit-generation-max*)) (declaim (fixnum *unify-generation* *unify-generation-max* *visit-generation* *visit-generation-max*)) (defun invalidate-marks () (setq *unify-generation* (1+ (max *unify-generation-max* *unify-generation*))) (setq *unify-generation-max* *unify-generation*)) (defun invalidate-visit-marks () (setq *visit-generation* (1+ (max *visit-generation-max* *visit-generation*))) (setq *visit-generation-max* *visit-generation*)) ;;; ;;; establish way to return information about failure (rather than printing ;;; the nature of the failure right away). (2-mar-99 - oe@csli) ;;; (defparameter %failure% nil) (defvar *unify-wffs* nil) (defvar *expanding-types* nil "used to indicate that we're within a unification which may try to access a constraint we haven't calculated yet") (defvar *within-unification-context-p* nil) (defvar *unify-debug* nil) (defvar *unify-debug-cycles* nil) ; report when cycle found? #+:sbcl (declaim (sb-ext:always-bound *within-unification-context-p* *unify-debug* *unify-debug-cycles*)) (defvar *safe-not-to-copy-p* nil) #+:sbcl (declaim (sb-ext:always-bound *safe-not-to-copy-p*)) (defvar *recording-fail-paths-p* nil) #+:sbcl (declaim (sb-ext:always-bound *recording-fail-paths-p*)) (defconstant +inside-mark+ 10) ; a fixnum, arbitrarily chosen (defconstant +done-mark+ 11) ; ditto (defmacro node-mark-p (x) `(typep ,x 'fixnum)) ;;; The structure for representing a feature structure node. Structure instances must ;;; only be created via make-dag (or its callers) below, and slots prefixed ;;; with x- must be accessed only via the macros dag-forward etc below, not directly. ;;; Seven slots is a happy number in x86-64 SBCL (1.3.11 onwards), since the resulting ;;; structure object takes up 64 bytes - as reported by sb-ext:primitive-object-size. ;;; Not such a happy number in ARM64 SBCL: 80 bytes. (declaim (inline make-dag-x make-safe-dag-x)) (eval-when (:compile-toplevel :load-toplevel :execute) ; for create-pool call below (defstruct (dag (:constructor make-dag-x (type arcs)) (:copier copy-dag-x)) type arcs ;; generation counter for the subsequent 3 slots holding temporary structure (x-generation 0 :type fixnum) ;; new type computed during unification, or representative for fs's equivalence class (x-new-type/forward nil) ;; new arcs computed during unification, or counterpart dag in subsume-wffs-p (x-comp-arcs nil) ;; copy of this fs, or a mark (an integer) used to direct processing (x-copy nil) ;; independent slot to allow other fs operations to be interleaved with unification (x-visit-slot nil)) (defstruct (safe-dag (:include dag) (:constructor make-safe-dag-x (type arcs)) (:predicate dag-safe-p)) ;; instead of adding a boolean slot 'safe-p' to dag structure, save a slot by ;; making safe dags a subtype of dags )) #+:sbcl (declaim (sb-ext:freeze-type dag safe-dag)) ;;; first attempt at dag recycling. to reduce creation of garbage, keep a pool ;;; of (safe) dag instances. while parsing (i.e. when creating temporary data) ;;; use dags from the pool rather than allocating new ones. the parser has to ;;; record the initial pool pointer (on entry) and can then reset the pointer ;;; once the parse has completed. this should reduce dynamic allocation quite ;;; significantly (at the minor cost of slightly increased initial image size); ;;; garbage collection time should drop accordingly. (17-jul-99 - oe) ;;; ;;; some general thoughts on dag recycling: ;;; ;;; processing typically is organized in discrete intervals, e.g. parsing one ;;; sentence or generating from one input. within one interval, large amounts ;;; of temporary data are created that mostly become garbage as the interval ;;; is completed. a big chunk of garbage results from dag nodes allocated in ;;; the unifier. after one parse is completed, say, and the chart is emptied, ;;; all references (pointers) to edges created during that parse are dropped; ;;; accordingly, references to feature structures associated with those edges ;;; disappear; accordingly, references to dag nodes within those structures. ;;; ;;; although the processor after completion of a parse knows explicitly that ;;; all the temporary data will no longer be used, there is no direct way to ;;; take advantage of this knowledge. instead, the application has to wait for ;;; the garbage collector to eventually infer that knowledge (viz. from lack ;;; of references to those objects) from the overall state of the Lisp system, ;;; and only then dispose of the garbage; call this indirect memory management. ;;; ;;; the dag pool is an attempt to do direct memory management and allow the ;;; processor to use knowledge about temporary data that is no longer used. ;;; the pool is a generic data structure that has the following slots: ;;; ;;; - size: overall size (in number of objects) of this pool; ;;; - position: current fill pointer; ;;; - data: vector of objects held in this pool; ;;; - constructur: function to allocate new objects when pool has exhausted; ;;; - garbage: counter for pool accesses after exhaustion. ;;; ;;; currently, there is one pool for dag nodes that is initialized (to a size ;;; of *dag-pool-size*) in `dag.lsp'; i.e. *dag-pool-size* many dag nodes are ;;; allocated at load() time and stored in the .data. vector. the dag node ;;; constructor make-dag() on each call tries to retrieve a dag object from the ;;; pool; if the pool is not exhausted it returns a pointer to the dag node at ;;; the current pool position and increments the fill pointer; if allocation ;;; from the pool fails, it falls back into calling the actual constructor; it ;;; will allocate a new object which will ultimately become garbage (in this ;;; case the .garbage. counter is incremented to record that we overshot the ;;; pool size). ;;; ;;; the tricky thing is to decide on the interval boundaries and seal the pool ;;; from leakage. leakage here means that after the pool position was reset, ;;; someone outside of the interval can (will) still make reference to a dag ;;; from the pool. suppose that dag recycling was on in interactive mode: the ;;; pool pointer is reset (to 0) on entry into the parser. since people may ;;; have feature structure windows open browsing results from a previous parse, ;;; dag recycling could mean that parts of those feature structures suddenly ;;; are changed, because dag nodes have been reused (nb: it is not clear, the ;;; problem cannot be solved; it seems most of the display of previous results ;;; is closed or frozen anyway, when new input is parsed; in Allegro at least). ;;; ;;; in batch parsing, on the contrary, it can be guaranteed that once the next ;;; parse is started there will be no reference to edges created earlier. the ;;; global *dag-recycling-p* is used to signal whether dag recycling shall be ;;; used (i.e. whether dag node allocation requests try the pool first). ;;; ;;; (7-sep-99 - oe) ;;; ;;; JAC 10-Apr-2021 - Unfortunately, pooling makes it more difficult to release ;;; temporary dag structure for GC after each parse, and also hinders attempts ;;; to keep dags created during parsing in the youngest GC generation (for the ;;; same reason). Pooling should therefore be disabled on platforms on which we ;;; do these things. #+:pooling (eval-when (:compile-toplevel :load-toplevel :execute) (defstruct pool (size 0 :type fixnum) (position 0 :type fixnum) (constructor #'(lambda ()) :type function) (data (vector nil) :type simple-vector) (garbage 0 :type fixnum))) #-:pooling (defun pool-p (x) (declare (ignore x)) nil) #+:pooling (defun create-pool (size constructor) (#+:allegro excl:tenuring #-:allegro progn (let ((pool (make-pool :size size :constructor constructor)) (data (make-array (1+ size)))) (loop for i from 0 to (max 0 (- size 1)) do (setf (svref data i) (funcall constructor))) (setf (svref data size) nil) ; must explicitly initialise as nil (setf (pool-data pool) data) pool))) #+:pooling (defvar *dag-pool* (create-pool *dag-pool-size* #'(lambda () (make-safe-dag-x nil nil)))) #-:pooling (defvar *dag-pool* nil) #+:pooling (defun reset-pool (pool &key forcep compressp) (when (or forcep compressp) (loop with data = (pool-data pool) for i from 0 to (pool-position pool) for dag = (svref data i) when dag do (setf (dag-type dag) nil) (setf (dag-arcs dag) nil) (when compressp (compress-dag dag :recursivep nil)))) (setf (pool-position pool) 0) (setf (pool-garbage pool) 0)) #+:pooling (defun reset-pools (&key forcep compressp) (loop for pool in (list *dag-pool*) do (reset-pool pool :forcep forcep :compressp compressp))) #+:pooling (defmacro pool-next (pool) `(let* ((pool ,pool) (position (pool-position pool)) (next (svref (pool-data pool) position))) (cond (next (setf (pool-position pool) (1+ position)) next) (t (incf (pool-garbage pool)) (funcall (pool-constructor pool)))))) ;;; Dag creation (defvar *dag-recycling-p* nil) (declaim (inline make-dag)) (defun make-dag (&key type arcs) ;; Make a dag node, recording in it whether it is safe not to be copied. ;; False for nodes in rules, lexical entries, and type constraints - whenever ;; we create a new FS, we must create a new copy of such nodes otherwise multiple ;; uses of the same rule etc in the same FS could incorrectly become reentrant. ;; True for nodes in edges created during parsing and generation - because the same ;; edge can't be used more than once in a single analysis. (if *safe-not-to-copy-p* (progn #+:pooling (if *dag-recycling-p* (make-safe-recycled-dag-x type arcs) (make-safe-dag-x type arcs)) #-:pooling (make-safe-dag-x type arcs)) (make-dag-x type arcs))) #+:pooling (defun make-safe-recycled-dag-x (type arcs) (let ((new (pool-next *dag-pool*))) (setf (dag-type new) type) (setf (dag-arcs new) arcs) ;; in case this dag is actually from the pool, clear out its scratch ;; slots - cheap and potentially reduces dynamic memory footprint (setf (dag-x-generation new) 0) (setf (dag-x-new-type/forward new) nil) (setf (dag-x-comp-arcs new) nil) (setf (dag-x-copy new) nil) new)) ;;; Dag slot access. In an ideal world these macros would be functions declaimed ;;; inline. However, a compiler is free to ignore inline declarations; if it ignored ;;; them in this case then performance would degrade to an unacceptable extent. (defmacro current-generation-p (dag) `(= (dag-x-generation ,dag) *unify-generation*)) (defmacro ensure-current-generation ((dag) &body slot-clearers) (let ((gen (gensym)) (current (gensym))) `(let ((,gen (dag-x-generation ,dag)) (,current *unify-generation*)) (unless (= ,gen ,current) (setf (dag-x-generation ,dag) ,current) ,@slot-clearers)))) (defmacro dag-forward (dag) ;; what would be separate structure slots for the forwarding pointer and new-type share ;; a single slot, since in 'Tomabechi World' a new type value is irrelevant if the dag ;; node forwards to another - although when we're wanting a forward value we need to ;; check there's actually a dag occupying this slot rather than an LKB type `(let ((d ,dag)) (when (current-generation-p d) (let ((v (dag-x-new-type/forward d))) (if (dag-p v) v nil))))) (defsetf dag-forward (dag) (new) ;; it's perfectly OK if there's an LKB type already here in the shared slot since the ;; new forwarding pointer makes it redundant `(progn (ensure-current-generation (,dag) (setf (dag-x-comp-arcs ,dag) nil) (setf (dag-x-copy ,dag) nil)) (setf (dag-x-new-type/forward ,dag) ,new))) (defmacro dag-new-type (dag) ;; the caller is expecting the shared new-type/forward slot to contain either nil or ;; an LKB type (a symbol or string) - but if instead there's a valid (this-generation) ;; forwarding pointer then the dag hasn't been dereferenced properly; in that case all ;; bets are off `(let ((d ,dag)) (when (current-generation-p d) (dag-x-new-type/forward d)))) (defsetf dag-new-type (dag) (new) `(progn (ensure-current-generation (,dag) (setf (dag-x-comp-arcs ,dag) nil) (setf (dag-x-copy ,dag) nil)) (setf (dag-x-new-type/forward ,dag) ,new))) (defmacro dag-comp-arcs (dag) `(let ((d ,dag)) (when (current-generation-p d) (dag-x-comp-arcs d)))) (defsetf dag-comp-arcs (dag) (new) `(progn (ensure-current-generation (,dag) (setf (dag-x-new-type/forward ,dag) nil) (setf (dag-x-copy ,dag) nil)) (setf (dag-x-comp-arcs ,dag) ,new))) (defmacro dag-copy (dag) `(let ((d ,dag)) (when (current-generation-p d) (dag-x-copy d)))) (defsetf dag-copy (dag) (new) `(progn (ensure-current-generation (,dag) (setf (dag-x-new-type/forward ,dag) nil) (setf (dag-x-comp-arcs ,dag) nil)) (setf (dag-x-copy ,dag) ,new))) (defmacro dag-visit (dag) `(let ((v (dag-x-visit-slot ,dag))) (when (and (consp v) (eql (car v) *visit-generation*)) (cdr v)))) (defsetf dag-visit (dag) (new) `(let ((v (dag-x-visit-slot ,dag))) (if (consp v) (setf (car v) *visit-generation* (cdr v) ,new) (progn (setf (dag-x-visit-slot ,dag) (cons *visit-generation* ,new)) ,new)))) (defun clone-dag (dag) ;; make a top-level copy of dag or safe-dag - to permit permanent slots (type ;; or arcs) to be modified in-place (copy-dag-x dag)) ;;; Arc creation and access (defmacro make-dag-arc (&key attribute value) ;; a cons not a structure, which would increase memory requirements unacceptably `(cons ,attribute ,value)) (defmacro dag-arc-attribute (arc) `(car ,arc)) (defmacro dag-arc-value (arc) `(cdr ,arc)) ;;; Deref-dag follows forwarding pointers during a unification operation. ;;; Outside unification, the follow-pointers function should be called instead ;;; (it could still be called inside a unification context e.g. when printing ;;; out dags during debugging). It can afford to do some useful consistency ;;; checks. (defmacro deref-dag (dag) `(the dag (let ((d ,dag)) ;; equivalent to ;; (loop for f = (dag-forward d) if f do (setq d f) else return d) ;; but restructured to ensure there's no superfluous dag type check (let ((gen (dag-x-generation d)) (current *unify-generation*)) (locally (declare (type dag d)) ; holds throughout the loop (loop (if (= gen current) (let ((f (dag-x-new-type/forward d))) (if (dag-p f) (setq d f gen (dag-x-generation f)) (return d))) (return d)))))))) (defun follow-pointers (dag) (cond ((not (dag-p dag)) (error "~A inconsistency: called with non-dag argument" 'follow-pointers)) (*within-unification-context-p* (deref-dag dag)) ((or (dag-new-type dag) (dag-comp-arcs dag) (dag-forward dag)) (error "~A inconsistency: dag contains temporary structure outside ~ context of a set of unification operations" 'follow-pointers)) (t dag))) ;;; Convenient macros for dag creation (defmacro create-typed-dag (type) `(make-dag :type ,type :arcs nil)) (defmacro create-dag () `(make-dag :type *toptype* :arcs nil)) ;;; Some basic properties of dags ;;; NB take care inside unification to account for any temporary structure ;;; (deref pointer, new-type, comp-arcs) (defmacro has-features (dag) `(consp (dag-arcs ,dag))) (defmacro type-of-fs (dag) `(dag-type ,dag)) (defun top-level-features-of (dag) (mapcar #'(lambda (arc) (dag-arc-attribute arc)) (dag-arcs dag))) (declaim (inline get-dag-value)) (defun get-dag-value (dag attribute) (dolist (arc (dag-arcs dag) nil) (when (eq attribute (dag-arc-attribute arc)) (return-from get-dag-value (dag-arc-value arc))))) (declaim (notinline get-dag-value)) ; so only inline when declared locally (declaim (inline unify-get-dag-value)) (defun unify-get-dag-value (dag attribute) (dolist (arc (dag-arcs dag)) (when (eq attribute (dag-arc-attribute arc)) (return-from unify-get-dag-value (dag-arc-value arc)))) (dolist (arc (dag-comp-arcs dag)) (when (eq attribute (dag-arc-attribute arc)) (return-from unify-get-dag-value (dag-arc-value arc)))) nil) (declaim (notinline unify-get-dag-value)) ;;; Unify dags ;;; Interface functions for the unifier. Unify-dags returns the ;;; result of unifying the the second argument with the first argument. ;;; Unifiable-dags-p returns T if that unification would be ;;; successful without actual building the result. Neither damages the input ;;; feature structures. ;;; ;;; Marks work as follows: ;;; - the unify mark is updated AFTER every series of unifications (with or ;;; without a final copy operation) so that any temporary changes to ;;; feature structures disappear ;;; - the visit mark is updated BEFORE any new series of visits so code ;;; does not see any old marks (defmacro with-unification-context ((dag) &body body) ;; caller must call copy-dag explicitly at end - before any other ;; unification attempt - if result is needed ;; NB unwind-protect is needed to deal properly with local exits (e.g. ;; return-from) out of body, as well as throws and errors (declare (ignore dag)) `(if *within-unification-context-p* (error "Entered a nested unification context - should not happen") (let ((*within-unification-context-p* t)) (unwind-protect (progn ,@body) (invalidate-marks))))) (defun unify-dags (dag1 dag2) (if *within-unification-context-p* (when (if *recording-fail-paths-p* (call-with-fail-paths-recording #'(lambda (dag1 dag2) (catch '*fail* (unify1 dag1 dag2 nil))) dag1 dag2) (catch '*fail* (unify1 dag1 dag2 nil))) (if (or *unify-debug* *unify-debug-cycles*) (if (cyclic-dag-p dag1) ;; for the :return variant of *unify-debug* don't print anything since the ;; caller is expected to manage all output nil (progn (when (and *unify-debug* (not (eq *unify-debug* :return))) (format t "~%Unification succeeded")) dag1)) dag1)) (with-unification-context (dag1) (when (unify-dags dag1 dag2) (copy-dag dag1))))) (defun unifiable-dags-p (dag1 dag2) (if *within-unification-context-p* (when (if *recording-fail-paths-p* (call-with-fail-paths-recording #'(lambda (dag1 dag2) (catch '*fail* (unify1 dag1 dag2 nil))) dag1 dag2) (catch '*fail* (unify1 dag1 dag2 nil))) (if (cyclic-dag-p dag1) nil (progn (when (and *unify-debug* (not (eq *unify-debug* :return))) (format t "~%Unification succeeded")) t))) (with-unification-context (dag1) (unifiable-dags-p dag1 dag2)))) ;;; This is the heart of the unification algorithm, which is based on Hideto ;;; Tomabechi's paper in ACL 1991. We implement a version of the algorithm ;;; that is arguably easier to understand and reason about than Tomabechi's; ;;; it's presented and explained by Carroll & Malouf (1999) 'Efficient graph ;;; unification for parsing feature-based grammars'. MS, Stanford University. ;;; ;;; We walk through the two feature structures checking for compatibility and ;;; setting 'forward' pointers. As soon as we find a problem, we stop. If we ;;; get through the whole structure without finding a problem, then we can ;;; copy out the result by following the forward pointers in the first unifact. ;;; ;;; On each individual unification attempt we mark the dag on entry and unmark ;;; it on exit, so we can detect if we have ended up inside cyclic structure. ;;; This is a variant of the three colour algorithm for detecting a cycle in ;;; a directed graph: we're using depth first search, +inside-mark+ as the ;;; 'grey' entry mark (indicating that the node is on the stack), and other ;;; means (forwarding pointers) to avoid revisiting nodes. ;;; ;;; To store cases where a new constraint is unified in, evaluate ;;; ;;; (defparameter *recording-constraints-p* nil ;;; "needed for LilFes conversion") ;;; (defvar *type-constraint-list* nil) ;;; ;;; and in unify2, insert ;;; (when *recording-constraints-p* ;;; (pushnew new-type *type-constraint-list* :test #'eq)) (defmacro unify-get-type (dag) `(let ((d ,dag)) (or (dag-new-type d) (dag-type d)))) (defun unify1 (dag1 dag2 path) (labels ((unify2 (dag1 dag2 path) (declare (type dag dag1 dag2)) ; guaranteed by callers, holds throughout unify2 (let ((t1 (unify-get-type dag1)) (t2 (unify-get-type dag2))) (multiple-value-bind (new-type constraintp) (if (eq t1 t2) t1 (greatest-common-subtype t1 t2)) (if new-type (progn (unless (eq new-type t1) (setf (dag-new-type dag1) new-type)) (when (and constraintp *unify-wffs*) (setq dag1 (unify-in-constraint dag1 new-type path))) ;; as well as testing for no arcs we also have to test for no comp-arcs; ;; in Tomabechi's original algorithm a node without arcs would get ;; forwarded and so would have no chance to acquire any comp-arcs - BUT our ;; unify-paths etc. functions could previously have added to comp-arcs so ;; we do need the additional comp-arcs tests (cond ((and (null (dag-arcs dag1)) (null (dag-comp-arcs dag1))) (unless (eq new-type t2) (setf (dag-new-type dag2) new-type)) (setf (dag-forward dag1) dag2)) ((and (null (dag-arcs dag2)) (null (dag-comp-arcs dag2))) (setf (dag-forward dag2) dag1)) ((eql (dag-copy dag1) +inside-mark+) (when (or *unify-debug* *unify-debug-cycles*) (unify1-failure :cycle path)) (throw '*fail* nil)) (t (setf (dag-forward dag2) dag1) (setf (dag-copy dag1) +inside-mark+) (unify-arcs dag1 dag2 path) (setf (dag-copy dag1) nil) dag1))) (progn (when *unify-debug* (unify1-failure :clash path t1 t2)) (if *recording-fail-paths-p* (progn (record-fail-path path) dag1) (throw '*fail* nil))))))) ;; (unify-in-constraint (dag1 new-type path) ;; might have to copy constraint to prevent separate uses in this unification ;; becoming reentrant (let ((constraint (if *expanding-types* (possibly-new-constraint-of new-type) ; throws *fail* if faulty (may-copy-constraint-of new-type)))) (if *unify-debug* (or (catch '*fail* (unify2 dag1 constraint path)) (progn (unify1-failure :constraints path new-type) (throw '*fail* nil))) (unify2 dag1 constraint path)))) ; not unify1 since deref unnecessary ;; (unify-arcs (dag1 dag2 path) (let* ((arcs1 (dag-arcs dag1)) (arcs1-tail arcs1) (comp-arcs1 (dag-comp-arcs dag1)) (new-arcs1 comp-arcs1)) (flet ((process-arcs (arcs2) (dolist (arc2 arcs2) (let* ((f2 (dag-arc-attribute arc2)) (arc1 (block find-attribute ;; arcs1 'almost-sorted' so start traversal just beyond previous match (do ((tail arcs1-tail (cdr tail))) ((atom tail)) #1=(when (eq (dag-arc-attribute (car tail)) f2) (setq arcs1-tail (cdr tail)) (return-from find-attribute (car tail)))) (do ((tail arcs1 (cdr tail))) ((eq tail arcs1-tail)) #1#) ;; comp-arcs1 in reverse order and usually short so do normal traversal (dolist (a comp-arcs1) (when (eq (dag-arc-attribute a) f2) (return-from find-attribute a))) nil))) (if arc1 (let ((new-path (cons f2 path))) (declare (dynamic-extent new-path)) (unify1 (dag-arc-value arc1) (dag-arc-value arc2) new-path)) (push arc2 new-arcs1)))))) (process-arcs (dag-arcs dag2)) (let ((comp-arcs2 (dag-comp-arcs dag2))) (when comp-arcs2 (process-arcs (reverse comp-arcs2)))) ; processing a reversed copy restores order (unless (eq new-arcs1 comp-arcs1) (setf (dag-comp-arcs dag1) new-arcs1)))))) ;; (declare (notinline unify-in-constraint)) ; since comparatively rarely called (let ((dag1 (deref-dag dag1)) (dag2 (deref-dag dag2))) (if (eq dag1 dag2) dag1 (unify2 dag1 dag2 path))))) (defun unify1-failure (kind path &optional t1 t2) (let ((path (reverse path))) ; NB path has been stack-allocated (if (eq *unify-debug* :return) (setq %failure% (list kind path t1 t2)) (let ((msg (ecase kind (:constraints (when *expanding-types* (format t "~%Problem in ~A" *expanding-types*)) (format nil "Unification with constraint of type ~A failed at path < ~{~A ~^: ~}>" t1 path)) (:cycle (format nil "Unification failed due to encountering a cycle at path < ~{~A ~^: ~}>" path)) (:clash (format nil "Unification of ~A and ~A failed at path < ~{~A ~^: ~}>" t1 t2 path))))) ;; :window pops up a message dialog - but even in this case also print it in ;; Lkb Top as a more permanent record (when (eq *unify-debug* :window) (show-message-window msg)) (format t "~%~A" msg))))) (defun unify-arcs (dag1 dag2 path) (declare (type dag dag1 dag2) (list path)) ; guaranteed by unify2 (let* ((arcs1 (dag-arcs dag1)) (arcs1-tail arcs1) (comp-arcs1 (dag-comp-arcs dag1)) (new-arcs1 comp-arcs1)) (flet ((process-arcs (arcs2) (dolist (arc2 arcs2) (let* ((f2 (dag-arc-attribute arc2)) (arc1 (block find-attribute ;; due to type inheritance, attributes in arcs lists are usually in ;; similar order, so start arcs1 traversal just beyond previous match (do ((tail arcs1-tail (cdr tail))) ((atom tail)) #1=(when (eq (dag-arc-attribute (car tail)) f2) (setq arcs1-tail (cdr tail)) (return-from find-attribute (car tail)))) (do ((tail arcs1 (cdr tail))) ((eq tail arcs1-tail)) #1#) ;; comp-arcs usually short so just do a straightforward traversal (dolist (a comp-arcs1) (when (eq (dag-arc-attribute a) f2) (return-from find-attribute a))) nil))) (if arc1 (let ((new-path (cons f2 path))) (declare (dynamic-extent new-path)) (unify1 (dag-arc-value arc1) (dag-arc-value arc2) new-path)) (push arc2 new-arcs1)))))) (let ((arcs2 (dag-arcs dag2)) (comp-arcs2 (dag-comp-arcs dag2))) (when arcs2 (process-arcs arcs2)) (when comp-arcs2 ;; comp-arcs is built backwards, so process a reversed copy (process-arcs (reverse comp-arcs2)))) (unless (eq new-arcs1 comp-arcs1) (setf (dag-comp-arcs dag1) new-arcs1))))) ;;; Only called when type hierarchy is being expanded. Therefore need to check ;;; that type-name argument actually has a well-formed constraint, and fail ;;; informatively if not. (defun possibly-new-constraint-of (type-name) (let ((new-constraint (wf-constraint-of type-name))) (if new-constraint (copy-dag-completely new-constraint) (progn (when *unify-debug* (if (eq *unify-debug* :return) (setf %failure% (list :illformed-constraint nil type-name nil nil)) (format t "Problem in ~A due to ~A" *expanding-types* type-name))) (throw '*fail* nil))))) ;;; Similar to possibly-new-constraint-of, but for use after type hierarchy ;;; expansion, and also implements a cache for each type. The return value is a ;;; 'fresh' copy if we've previously returned the constraint during this unification ;;; attempt, otherwise the constraint itself. Copies are cached and we cycle ;;; through them, creating fresh ones if needed. Cached constraints are not ;;; 'safe' wrt copying, i.e. the dags in them must not be created as safe ;;; dags, otherwise they could incorrectly become reentrant. (defun may-copy-constraint-of (type-name) (let* ((type-record (get-type-entry type-name)) (constraint (ltype-constraint type-record)) (cache (ltype-constraint-mark type-record)) (*safe-not-to-copy-p* nil) (*dag-recycling-p* nil)) (unless (consp cache) (setq cache (list* 0 nil nil)) ; mark, unused, used (setf (ltype-constraint-mark type-record) cache)) (cond ((not (eql (car cache) *unify-generation*)) #+:cdebug (format t "~&may-copy-constraint-of(): `~(~a~)' cache hit; new cycle;~%" type-name) (setf (car cache) *unify-generation*) (when (cddr cache) (setf (cadr cache) ; old used copies, if any, become ready for re-use (nconc (cddr cache) (cadr cache)))) (setf (cddr cache) nil) constraint) ; first return constraint itself ((cadr cache) #+:cdebug (format t "~&may-copy-constraint-of(): `~(~a~)' cache hit;~%" type-name) (let ((pre (pop (cadr cache)))) (push pre (cddr cache)) ; previously computed copy becomes used pre)) (t #+:cdebug (format t "~&may-copy-constraint-of(): `~(~a~)' cache miss (~d);~%" type-name (+ (length (cadr cache)) (length (cddr cache)))) (let ((new (copy-dag-completely constraint))) (push new (cddr cache)) ; new copy becomes used new))))) ;;; Copy feature structure after a successful unification, respecting forward ;;; pointers set by the unifier. ;;; ;;; In the copied feature structure, subgraphs are shared with the input dags ;;; where possible, as described by Malouf, Carroll & Copestake (2000). A new ;;; dag node is created only if one of the following 4 criteria holds: there are ;;; comp-arcs, the dag's type has changed, a descendant node has been copied, or ;;; the dag is not 'safe' (i.e. it is part of the grammar or lexicon). ;;; ;;; For the cyclic dag failure diagnostics, the feature path is allocated on the stack ;;; and passed down the call chain. An alternative would be to accumulate the path ;;; while returning _up_ the call chain after a cycle is detected; not as good, ;;; since it would require an extra test and return route after each call. (defun copy-dag (dag) (if *within-unification-context-p* (catch '*cyclic* (copy-dag1 dag nil)) (error "~A called outside a unification context - should not happen" 'copy-dag))) (defun copy-dag1 (dag path &aux copy) (declare (type dag dag)) (setq dag (deref-dag dag)) (setq copy (dag-copy dag)) (cond ((eql copy +inside-mark+) (when (or *unify-debug* *unify-debug-cycles*) (copy-dag1-failure path)) (throw '*cyclic* nil)) ((and copy (not (node-mark-p copy))) ; already copied? copy) (t (labels ((copy-dag1-arcs (arcs) ;; in arcs, we can share each arc and the top-level list structure but we must ;; not modify anything ;; share the list tail beyond the last arc that is copied; do this by recursing ;; down the arcs list - rather than iterating - in order to build the new list ;; starting at the end. Approach is inspired by Strandh & Durand (2015) ;; 'Processing List Elements in Reverse Order', 8th European Lisp Symposium (when arcs (let* ((arc (car arcs)) (new-path (cons (dag-arc-attribute arc) path)) (tail (copy-dag1-arcs (cdr arcs))) ; recurse to end before starting to copy (v (copy-dag1 (dag-arc-value arc) new-path))) (declare (dynamic-extent new-path)) (cond ((not (eq v (dag-arc-value arc))) (cons (make-dag-arc :attribute (dag-arc-attribute arc) :value v) tail)) ((not (eq tail (cdr arcs))) (cons arc tail)) ; share arc (t arcs))))) ; share top-level list structure (copy-dag1-comp-arcs (comp-arcs arcs) ;; in comp-arcs, we can share each arc but not modify them; we could also modify ;; the top-level list structure, but no benefit in doing that (dolist (arc comp-arcs arcs) (let* ((new-path (cons (dag-arc-attribute arc) path)) (v (copy-dag1 (dag-arc-value arc) new-path))) (declare (dynamic-extent new-path)) (push ; unify-arcs built comp-arcs in reversed order - push rectifies this (if (eq v (dag-arc-value arc)) arc (make-dag-arc :attribute (dag-arc-attribute arc) :value v)) arcs))))) (declare (notinline copy-dag1-arcs copy-dag1-comp-arcs)) (let ((arcs (dag-arcs dag)) (comp-arcs (dag-comp-arcs dag)) (new-type (dag-new-type dag))) (setf (dag-copy dag) +inside-mark+) (when arcs (setq arcs (copy-dag1-arcs arcs))) (when comp-arcs (setq arcs (copy-dag1-comp-arcs comp-arcs arcs))) (setf (dag-copy dag) (if (or (not (eq arcs (dag-arcs dag))) ; comp-arcs non-empty or a descendant copied? new-type (not (dag-safe-p dag))) (make-dag :type (or new-type (dag-type dag)) :arcs arcs) dag))))))) (defun copy-dag1-failure (path) (let ((path (reverse path))) ; NB path has been stack-allocated (if (eq *unify-debug* :return) (setq %failure% (list :cycle path)) (let ((msg (format nil "~%Copy failed due to encountering a cycle at path < ~{~A ~^: ~}>" path))) (when (eq *unify-debug* :window) (show-message-window msg)) (format t "~%~A" msg))))) #| ;;; Simpler version, which shares individual arcs where possible but does not share any ;;; top level structure in arcs lists. With it, parsing allocates 10% more memory and ;;; is about the same factor slower. (defun copy-dag1 (dag path &aux copy) (setq dag (deref-dag dag)) (setq copy (dag-copy dag)) (cond ((eql copy +inside-mark+) (when (or *unify-debug* *unify-debug-cycles*) (copy-dag1-failure path)) (throw '*cyclic* nil)) ((and copy (not (node-mark-p copy))) ; already copied? copy) (t (let ((new-arcs nil) (descendant-copied-p nil)) (setf (dag-copy dag) +inside-mark+) (flet ((copy-dag1-arcs (arcs) (dolist (arc arcs) (let* ((new-path (cons (dag-arc-attribute arc) path)) (v (copy-dag1 (dag-arc-value arc) new-path))) (declare (dynamic-extent new-path)) (push (if (eq v (dag-arc-value arc)) arc (progn (setq descendant-copied-p t) (make-dag-arc :attribute (dag-arc-attribute arc) :value v))) new-arcs))))) (copy-dag1-arcs (dag-comp-arcs dag)) (copy-dag1-arcs (dag-arcs dag))) (setf (dag-copy dag) (let ((new-type (dag-new-type dag))) (if (or (dag-comp-arcs dag) ; test criteria 1-4 in Malouf et al. new-type descendant-copied-p (not (dag-safe-p dag))) (make-dag :type (or new-type (dag-type dag)) :arcs (nreverse new-arcs)) dag))))))) |# ;;; Physically copy a dag - leaves the source dag untouched. ;;; Assumes all forwarding pointers, comp-arcs, and new-type have been dealt ;;; with already (i.e. by copy-dag) ;;; Use -visit field not -copy since may be called from within unify (defun copy-dag-completely (dag) (let ((toptype-dag (create-dag))) (labels ((copy-dag-completely1 (dag) (or (dag-visit dag) (progn (setf (dag-visit dag) toptype-dag) ; 3/98 - avoid non-termination if dag cyclic (let ((arcs (loop for arc in (dag-arcs dag) collect (make-dag-arc :attribute (dag-arc-attribute arc) :value (copy-dag-completely1 (dag-arc-value arc)))))) (setf (dag-visit dag) (make-dag :type (dag-type dag) :arcs arcs))))))) (invalidate-visit-marks) (copy-dag-completely1 dag)))) ;;; facilitate ambiguity packing: the parameter *packing-restrictor* contains ;;; a list of features to be erased wherever they occur. still, the ;;; PAGE style restrictor (allowing full path specifications and re-entrancies ;;; in the restrictor by using a restrictor dag as a `mask') would be a good ;;; thing to have. berthold, for the german grammar, reports the cannot get ;;; full ROI from packing, since he would require a more powerful restrictor ;;; to eliminate a feature in some context but not in others, since otherwise ;;; there is too much inconsistency in the forest. something to do with ;;; partial VP fronting, and he wants it for PET anyway, but still worth for us ;;; to keep in mind. (30-oct-04; oe) ;;; ;;; We also need to know about minimal types for feature values for packing. ;;; The following code caches the values as accessed (defun copy-dag-partially (dag) (labels ((copy-dag-partially1 (dag path) (or (dag-visit dag) (let* ((restrictp (and (consp path) (find (first path) (the list *packing-restrictor*) :test #'eq))) (type (if restrictp (minimal-type-for (first path)) (dag-type dag))) (arcs (dag-arcs dag)) (arcs-restricted (if (or restrictp (null arcs)) nil (loop for feat in (ltype-appfeats (get-type-entry type)) for arc = (find feat arcs :key #'(lambda (a) (dag-arc-attribute a)) :test #'eq) when arc collect (make-dag-arc :attribute feat :value (let ((new-path (cons feat path))) (declare (dynamic-extent new-path)) (copy-dag-partially1 (dag-arc-value arc) new-path))))))) (declare (list arcs)) (setf (dag-visit dag) (make-dag :type type :arcs arcs-restricted)))))) (invalidate-visit-marks) (copy-dag-partially1 dag nil))) (defun minimal-type-for (feature) (or (gethash feature *feature-minimal-type*) (let* ((introduction (maximal-type-of feature)) (constraint (and introduction (constraint-of introduction))) (type (or (and constraint (type-of-fs (get-dag-value constraint feature))) *toptype*))) (setf (gethash feature *feature-minimal-type*) type)))) ;;; Test for cycles without constructing a permanent result - in contrast to copy-dag. ;;; Only makes sense for this test to be run inside a unification context, after ;;; unify and possibly copying operations. Would be pointless as a standalone test ;;; because there's no way to actually construct a permanent cyclic dag (except by ;;; writing code that misuses the dag machinery in some way). ;;; ;;; The algorithm uses the 'copy' slot in dag nodes to mark the path of arcs it's ;;; currently going down, and also to avoid re-checking parts of the dag that have ;;; previously been copied, since copying does its own cyclic check. (defun cyclic-dag-p (dag) (labels ((cyclic-dag-p1 (dag &aux copy) (declare (type dag dag)) (setq dag (deref-dag dag)) (setq copy (dag-copy dag)) (cond ((eql copy +done-mark+) nil) ((eql copy +inside-mark+) '(t)) ; cycle detected ((and copy (not (node-mark-p copy))) ; has copy-dag already been here and checked? nil) ((or (dag-arcs dag) (dag-comp-arcs dag)) (setf (dag-copy dag) +inside-mark+) (macrolet ((check-arcs (arcs) `(dolist (arc ,arcs) (let ((c (cyclic-dag-p1 (dag-arc-value arc)))) (when c (return-from cyclic-dag-p1 (cons (dag-arc-attribute arc) c))))))) (check-arcs (dag-arcs dag)) (check-arcs (dag-comp-arcs dag))) (setf (dag-copy dag) +done-mark+) nil)))) (if *within-unification-context-p* (let ((c (cyclic-dag-p1 dag))) (when c (when (or *unify-debug* *unify-debug-cycles*) (setq c (butlast c)) ; remove final t flag to leave path (if (eq *unify-debug* :return) (setq %failure% (list :cycle c)) (let ((msg (format nil "Unification failed due to cyclic check finding cycle at path < ~{~A ~^: ~}>" c))) (when (eq *unify-debug* :window) (show-message-window msg)) (format t "~%~A" msg)))) t)) (error "~A called outside a unification context - should not happen" 'cyclic-dag-p)))) ;; Remove the marks left by cyclic-dag-p (defun fix-dag (dag) (setq dag (deref-dag dag)) (when (dag-copy dag) (setf (dag-copy dag) nil) (dolist (arc (dag-arcs dag)) (fix-dag (dag-arc-value arc))) (dolist (arc (dag-comp-arcs dag)) (fix-dag (dag-arc-value arc))) dag)) ;;; Change the top-level type of a dag. Only overwrite new-type if necessary (defun retype-dag (dag new-type) (if *within-unification-context-p* (let ((d (deref-dag dag))) (unless (eq (dag-type d) new-type) (setf (dag-new-type d) new-type)) d) (let ((d (clone-dag dag))) (setf (dag-type d) new-type) d))) (defun destructively-retype-dag (dag new-type) (setf (dag-type dag) new-type) dag) ;;; WFFs ;;; Well-formedness checking ;;; we assume we have a feature structure which we want to convert ;;; to a well-formed fs if possible. ;;; 1. Push down type so all features are appropriate ;;; 2. Recurse on features ;;; 3. Unify with constraint of new type ;;; (given that this is well-formed) ;;; ;;; _fix_me_ ;;; see comments on process-unifications() regarding the need for a separate ;;; unification context here; i should email john and ann about this, i guess. ;;; (7-dec-06; oe) (defun create-wffs (fs &optional (contextp t)) ;; non-destructive! ;; returns nil on failure (cond (contextp (with-unification-context (fs) (create-wffs fs nil))) (t (invalidate-visit-marks) (let ((res (make-well-formed fs nil))) (when res (copy-dag fs)))))) (defun make-well-formed (fs features-so-far &optional type-name) ;; this code could no doubt be sped up quite a lot by being more ;; careful about multiple lookups etc (let ((real-dag (deref-dag fs))) (or (dag-visit real-dag) ; been here before? (let ((current-type (unify-get-type real-dag))) (setf (dag-visit real-dag) t) (if (atomic-type-p current-type) (if (not (has-features real-dag)) ;; atomic types are well-formed by definition ;; as long as there aren't any features on the dag t (progn (format t "~%Error in ~A: ~% Atomic type ~A specified to have features at ~:A" type-name current-type (reverse features-so-far)) nil)) (let ((fs-type (find-type-of-fs real-dag current-type type-name features-so-far))) (if fs-type (cond ((and type-name (or (eq fs-type type-name) (subtype-p fs-type type-name))) (format t "~%Error in ~A: ~% Type ~A occurs in constraint ~ for type ~A at ~:A" type-name fs-type type-name (reverse features-so-far)) nil) (t (really-make-well-formed real-dag fs-type features-so-far type-name))) nil))))))) (defun find-type-of-fs (real-dag current-type id path) (let* ((existing-features (top-level-features-of real-dag)) (possible-type (if existing-features (maximal-type-of-list existing-features) *toptype*))) (cond ((null possible-type) (format t "~%Error in ~S:~% No possible type for features ~S at path ~:S" (or id "unknown") existing-features (reverse path)) nil) ((not existing-features) current-type) ((greatest-common-subtype current-type possible-type)) (t (format t "~%Error in ~A:~% Type of fs ~A at path ~:A is incompatible with ~ features ~:A which have maximal type ~A" (or id "unknown") current-type (reverse path) existing-features possible-type) nil)))) (defun really-make-well-formed (real-dag fs-type features-so-far type-name) (when (really-make-features-well-formed real-dag features-so-far type-name) (let ((constraint ;; !!! outside here must stay within current visiting generation (let ((*visit-generation* *visit-generation*)) ;; if we're making a type wf then type-name is non-nil - in this case ;; we must always copy constraints before unifying in to ensure that ;; no types contain structure in common in the end ;; - otherwise we only need a copy in cases where a particular type ;; constraint is being used more than once (if type-name (catch '*fail* (possibly-new-constraint-of fs-type)) (may-copy-constraint-of fs-type))))) (cond ((null constraint) ;; (format t "~%No well-formed constraint for ~A" fs-type) ;; we've already warned about this, and duplication is annoying nil) ((progn (setq real-dag (retype-dag real-dag fs-type)) (unify-wffs real-dag constraint type-name))) (t (format t "~%Error in ~A:~% Unification with constraint of ~A failed at path ~:A" (or type-name "unknown") fs-type (reverse features-so-far)) nil))))) (defun really-make-features-well-formed (real-dag features-so-far type-name) (loop for arc in (dag-arcs real-dag) always (let ((path (cons (dag-arc-attribute arc) features-so-far))) (declare (dynamic-extent path)) (make-well-formed (dag-arc-value arc) path type-name)))) ;;; It is possible for two wffs to be unified and the result to need ;;; the constraint of the resulting type to be unified in - ;;; indicated by *unify-wffs* being t (defun unify-wffs (dag1 dag2 &optional expanding-types) (let ((*unify-wffs* t) (*expanding-types* expanding-types)) (unify-dags dag1 dag2))) (defun unifiable-wffs-p (dag1 dag2) (let ((*unify-wffs* t)) (unifiable-dags-p dag1 dag2))) (defun unify-wffs-with-fail-messages (dag1 dag2 path &optional window-p) ;; support interactive unification checking, which informs the user where ;; unification failed (declare (ignore path)) (let ((*unify-debug* (if window-p :window t))) (unify-wffs dag1 dag2))) (defun find-substructures-subsumed-by (dag type &optional path) (let ((dag (deref-dag dag))) (append (when (subtype-or-equal (type-of-fs dag) type) (list (cons path dag))) (loop for arc in (dag-arcs dag) for next = (cons (dag-arc-attribute arc) path) nconc (find-substructures-subsumed-by (dag-arc-value arc) type next))))) (defun dag-to-list (dag &key end key) (labels ((collect (d end) (let ((head (existing-dag-at-end-of d *list-head*)) (tail (existing-dag-at-end-of d *list-tail*))) (when (and head tail (not (eq d end))) (let ((value (if key (funcall key head) head))) (cons value (collect tail end))))))) (let ((list (get-dag-value dag *diff-list-list*)) (last (get-dag-value dag *diff-list-last*))) (if (and list last) (collect list (or end last)) (collect dag end))))) ;;; Get rid of pointers to any temporary dag structure (defun compress-dag (dag &key (recursivep t)) ;; In this function we're concerned with the raw dag structure, not the FS it ;; represents, so we don't even consider dereferencing the dag. ;; We use the generation counter to check whether this dag structure has already ;; been compressed (when (and (dag-p dag) (if *within-unification-context-p* (/= (dag-x-generation dag) *unify-generation*) (or (/= (dag-x-generation dag) 0) (dag-x-visit-slot dag)))) (setf (dag-x-generation dag) (if *within-unification-context-p* *unify-generation* 0)) (setf (dag-x-new-type/forward dag) nil) (setf (dag-x-comp-arcs dag) nil) (setf (dag-x-copy dag) nil) (setf (dag-x-visit-slot dag) nil) (when recursivep (dolist (arc (dag-arcs dag)) (compress-dag (dag-arc-value arc) :recursivep recursivep))))) ;;; End of file