;;; Copyright (c) 2003--2004 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `LICENSE' for conditions. ;;; Code for RMRS composition ;;; Building semantic structures via an algebra ;;; FIX - not dealing with optional elements in rules ;;; FIX - share-var-info (in-package :mrs) ;;; final output structure in basermrs.lisp ;;; **************************************************** ;;; Structures (defstruct (semstruct (:include rmrs)) features hook slots ) ;;; hook has an indices structure as a value ;;; ;;; a full version of slots would be a list of indices, ;;; tagged by some sort of name ;;; corresponding to the syntax ;;; however, for the robust semantic composition, ;;; we don't know anything about argument-hood lexically, ;;; and slots is generally unset, except (perhaps) for prepositions ;;; where the class may have known properties (this assumes the ;;; preposition/particle distinction is made) ;;; ;;; doing without slots completely doesn't work as soon as scopal ;;; modification (including negation) is treated properly, ;;; because the modifier `hides' the verb etc's ltop. ;;; But may be able to ;;; get away with an `anchor' - a single label - instead of the full thing (defstruct indices index label default ;; don't want to output hooks ;; which have been created by defaul ;;; extarg ) ;;; Rules (defstruct rmrs-rule-set name alternatives) ;;; alternatives are a list of rules (mostly singletons) (defstruct rmrs-rule name condition dtrs arity head semstruct eqs) (defstruct equality ;;; just used in rules - eqs in rule is a list ;;; of these eq-els) (defstruct pointer ;;; just used in rule equalities - a pointer points ;;; to a variable in a dtr dtrnum hook-el) ;;; Tag information (defstruct rmrs-tag-template name semstruct) ;;; information from a parse tree ;;; constructing this will be formalism specific (defstruct word-info lemma ;; downcase this systematically original ;; original string pos from to) ;;;; *********************************** ;;;; Utility functions (defun find-var-type (str) ;;; this is only used in grammar/lextag rmrs files ;;; to abbreviate variables ;;; It should never be used when reading in an MRS/RMRS file (string-downcase (subseq str 0 1))) (defun construct-grammar-var (var-string &optional extras) (make-grammar-var :type (find-var-type var-string) :id var-string :extra extras)) (defparameter *DEFAULT-TEMPLATE* (MAKE-RMRS-TAG-TEMPLATE :NAME "DEFAULT" :SEMSTRUCT (MAKE-SEMSTRUCT :HOOK (make-indices :index (construct-grammar-var "U") :label (construct-grammar-var "H")) :LISZT (LIST (MAKE-REL :pred "DUMMY-PRED" :handel (construct-grammar-var "H") :flist (LIST (construct-grammar-var "U"))))))) (defun dummy-pred-p (str) (equal str "DUMMY-PRED")) (defun make-dummy-pred nil "DUMMY-PRED") (defun dummy-constant-p (str) (equal str "DUMMY-CONSTANT")) (defun make-dummy-constant nil "DUMMY-CONSTANT") (defun make-default-hook nil ;;; while reading in (make-indices :index (construct-grammar-var "U") :label (construct-grammar-var "H") :default t)) ;;; *************************** ;;; Main entry point (defparameter *rmrs-output-type* 'xml) (defparameter *anchor-rmrs-p* t) ;;; if t, uses new style composition ;;; set to nil for old style ;;; FIX - to be removed once the new code is working ;;; and we can delete the old version (defun construct-sem-for-tree (tree origin ostream &optional original) ;;; takes a tree and returns a semstruct - guaranteed ;;; - unless there's a syntax error in input data ;;; ;;; origin is a record of where the rmrs comes from ;;; - currently this will always be :rasp ;;; original is the original string (in some form) (initialize-rmrs-variables) (let* ((semstruct (construct-sem-for-tree-aux tree original)) (canonical-bindings (close-bindings (semstruct-bindings semstruct))) (rmrs (make-rmrs :top-h (indices-label (semstruct-hook semstruct)) :liszt (semstruct-liszt semstruct) :rmrs-args (semstruct-rmrs-args semstruct) :in-groups (semstruct-in-groups semstruct) :h-cons (semstruct-h-cons semstruct) :cfrom (semstruct-cfrom semstruct) :cto (semstruct-cto semstruct) :bindings canonical-bindings :origin origin))) (canonicalise-rmrs rmrs) ;; destructively modifies rmrs ;; so variables are replaced by the canonical variable (if (eql ostream :quiet) rmrs (output-rmrs1 rmrs *rmrs-output-type* ostream)))) ;;; ********************************************************** ;;; Code for computing transitive closure of variable equalities ;;; ;;; the bindings are a list of binding structures, ;;; each of which relates a variable with a set of equivalents ;;; (as the bindings are being constructed), and ultimately ;;; with a canonical variable. The key is the id (defstruct binding var id equivs canonical) (defun lookup-canonical-var (var binding-list) ;;; returns the canonical var (let* ((id (var-id var)) (var-entry (find id binding-list :key #'binding-id))) (if var-entry (binding-canonical var-entry) var))) ;;; revised code to avoid ridiculously slow version. This version could ;;; be further improved, but may not be the rate limiting step any more. ;;; Very occasional differences in results compared to prior version ;;; current version appears correct. ;;; AAC Jan 2010 (defun close-bindings (bindings) (let ((id-bindings nil) ;;; main work just done with ids to keep data structures smaller (canonical-bindings nil)) (dolist (pair bindings) (dolist (el pair) (unless (member (var-id el) canonical-bindings :key #'binding-id) (push (make-binding :var el :id (var-id el)) canonical-bindings)))) (dolist (pair bindings) (when (cddr pair) (error "Non binary binding ~A unexpected" pair)) (let ((a (if (car pair) (var-id (car pair)))) (b (if (cadr pair) (var-id (cadr pair))))) (when (and a b (not (eql a b))) (push (list a b) id-bindings)))) (let* ((equivalence-sets-unsorted (close-bindings-rec (list (car id-bindings)) (cdr id-bindings))) (equivalence-sets-sorted (loop for equiv-set in equivalence-sets-unsorted collect (sort equiv-set #'<)))) (dolist (binding canonical-bindings) (let* ((id (binding-id binding)) (equivs (dolist (equiv-set equivalence-sets-sorted) (when (member id equiv-set) (return equiv-set)))) (canon-id (car equivs))) (setf (binding-equivs binding) (remove id equivs)) (setf (binding-canonical binding) (binding-var (find canon-id canonical-bindings :key #'binding-id))))) canonical-bindings))) (defun close-bindings-rec (equivalence-sets to-do) (if to-do (let* ((next-pair (car to-do)) (a-member (find-equivalence-set (car next-pair) equivalence-sets)) (b-member (find-equivalence-set (cadr next-pair) equivalence-sets))) (cond ((and (not a-member) (not b-member)) (close-bindings-rec (cons next-pair equivalence-sets) (cdr to-do))) ((not b-member) (close-bindings-rec (add-el-to-equivalence-sets (cadr next-pair) a-member equivalence-sets) (cdr to-do))) ((not a-member) (close-bindings-rec (add-el-to-equivalence-sets (car next-pair) b-member equivalence-sets) (cdr to-do))) ((eql a-member b-member) (close-bindings-rec equivalence-sets (cdr to-do))) (t (close-bindings-rec (merge-equivalence-sets a-member b-member equivalence-sets) (cdr to-do))))) equivalence-sets)) (defun find-equivalence-set (element equiv-sets) (dolist (eqset equiv-sets) (when (member element eqset) (return eqset)))) (defun add-el-to-equivalence-sets (element set-toupdate equivalence-sets) (push element (car (member set-toupdate equivalence-sets))) equivalence-sets) (defun merge-equivalence-sets (a-member b-member equivalence-sets) (let ((reduced-sets (delete a-member (delete b-member equivalence-sets)))) (push (nconc a-member b-member) reduced-sets) reduced-sets)) ;;; end transitive closure code ;;; ******** Code to reset variables to canonical ids ********* (defun canonicalise-rmrs (rmrs) ;;; destructively modifies an rmrs by replacing the ;;; bindings with the canonical binding ;;; Must be called after bindings have been closed (when (semstruct-p rmrs) (error "Not intended for semstructs")) (let ((top-h (rmrs-top-h rmrs)) (eps (rmrs-liszt rmrs)) (rmrs-args (rmrs-rmrs-args rmrs)) (rmrs-h-cons (rmrs-h-cons rmrs)) (rmrs-in-groups (rmrs-in-groups rmrs)) (bindings (rmrs-bindings rmrs))) (when top-h (canonicalise-rmrs-variable top-h bindings)) (loop for ep in eps do (canonicalise-rmrs-ep ep bindings)) (loop for arg in rmrs-args do (canonicalise-rmrs-arg arg bindings)) (loop for ing in rmrs-in-groups do (canonicalise-rmrs-in-group ing bindings)) (loop for hcons in rmrs-h-cons do (canonicalise-rmrs-hcons hcons bindings)) (setf (rmrs-bindings rmrs) nil) rmrs)) (defun canonicalise-rmrs-variable (var bindings) (let ((id (find-rmrs-var-id var bindings))) (setf (var-id var) id))) (defun canonicalise-rmrs-ep (ep bindings) (canonicalise-rmrs-variable (rel-handel ep) bindings) (when *anchor-rmrs-p* (canonicalise-rmrs-variable (rel-anchor ep) bindings)) ;;; actually anchors should probably never change (let ((value (car (rel-flist ep)))) (unless (var-p value) (error "Unexpected value ~A" value)) (canonicalise-rmrs-variable value bindings))) (defun canonicalise-rmrs-arg (arg bindings) (let ((label (rmrs-arg-label arg)) (value (rmrs-arg-val arg))) (canonicalise-rmrs-variable label bindings) (if (var-p value) (canonicalise-rmrs-variable value bindings)))) (defun canonicalise-rmrs-in-group (ing bindings) (canonicalise-rmrs-variable (in-group-label-a ing) bindings) (canonicalise-rmrs-variable (in-group-label-b ing) bindings)) (defun canonicalise-rmrs-hcons (hcons bindings) (canonicalise-rmrs-variable (hcons-scarg hcons) bindings) (canonicalise-rmrs-variable (hcons-outscpd hcons) bindings)) ;;; ******** Main composition code ************ ;;; the code assumes a tree structure, where there's some ;;; notion of a node, which either contains a base tag/lexeme ;;; specification or a rule-name plus dtrs. The functions ;;; daughter-nodes-p, get-rule-name, get-dtr-nodes, get-lexical-tag ;;; and get-lexeme are all defined for the particular format ;;; they all take a node. The calling function is also ;;; format specific, because there may be other stuff in the file ;;; there's no particular reason why we need an overt rule name ;;; on the node - we just need some form of identification of ;;; a recipe for composition (defun construct-sem-for-tree-aux (tree-node original) (if (daughter-nodes-p tree-node) (let* ((rule-name (get-rule-name tree-node)) (dtr-nodes (get-dtr-nodes tree-node)) (dtr-structures (loop for dtr in dtr-nodes collect (construct-sem-for-tree-aux dtr original)))) (if *anchor-rmrs-p* (algebra-compose rule-name dtr-structures) (compose rule-name dtr-structures))) (let ((base-tag (get-lexical-tag tree-node)) (lexeme (get-lexeme tree-node original))) (if *anchor-rmrs-p* (algebra-create-base-struct base-tag lexeme) (create-base-struct base-tag lexeme))))) (defvar *local-var-context* nil) ;;; for equalities (defvar *trace-rmrs-composition-p* nil) ;;; for debugging (defun compose (rule-name dtrs) ;;; compose takes a rule name and a list of daughters ;;; the rule name is looked up ;;; this may give full instructions, or just mark the ;;; head. If there's no instruction, default composition ;;; alone operates. (unless dtrs ;;; really there always ought to be dtrs ;;; but this prevents errors on trees with just a rule name (error "~% defective tree")) (let* ((dtr-features (loop for dtr in dtrs when (semstruct-features dtr) collect (semstruct-features dtr))) (rule-instruction (lookup-instruction rule-name dtr-features)) (dtr-hooks (loop for dtr in dtrs collect (semstruct-hook dtr))) (dtr-slots (loop for dtr in dtrs collect (semstruct-slots dtr))) (dtr-eps (loop for dtr in dtrs append (semstruct-liszt dtr))) (dtr-rargs (loop for dtr in dtrs append (semstruct-rmrs-args dtr))) (dtr-ings (loop for dtr in dtrs append (semstruct-in-groups dtr))) (dtr-hcons (loop for dtr in dtrs append (semstruct-h-cons dtr))) (dtr-binding-list (loop for dtr in dtrs append (semstruct-bindings dtr))) (cfrom (calculate-cfrom-from-daughters dtrs)) (cto (calculate-cto-from-daughters dtrs)) (semhead nil) (semstruct nil) (equalities nil)) (when (and rule-instruction (eql (rmrs-rule-arity rule-instruction) (length dtrs))) ;;; fixed arity assumption may cause probs, ;;; but assume for now (setf semhead (rmrs-rule-head rule-instruction)) (setf *local-var-context* nil) (setf semstruct (when (rmrs-rule-semstruct rule-instruction) (construct-new-semstruct (rmrs-rule-semstruct rule-instruction) cfrom cto nil))) ;;; if semstruct has been computed - this affects the ;;; equality computation via *local-var-context* (setf equalities (compute-equalities dtr-hooks dtr-slots (rmrs-rule-eqs rule-instruction)))) (let ((semstruct-out (make-semstruct :hook (if semhead ; a number indicating the dtr ; -1 means the rule itself is the head (if (eql semhead -1) (if semstruct (semstruct-hook semstruct) (make-default-running-hook)) (semstruct-hook (elt dtrs semhead))) (if (not (cdr dtrs)) (semstruct-hook (car dtrs)) ;;; assume semhead is single dtr ;;; if unary rule (make-default-running-hook))) :slots (cond (semstruct (semstruct-slots semstruct)) ((eql semhead -1) :none) (semhead (semstruct-slots (elt dtrs semhead))) ((not (cdr dtrs)) (semstruct-slots (car dtrs))) (t nil)) :liszt (if semstruct (append (semstruct-liszt semstruct) dtr-eps) dtr-eps) :rmrs-args (if semstruct (append (semstruct-rmrs-args semstruct) dtr-rargs) dtr-rargs) :in-groups (if semstruct (append (semstruct-in-groups semstruct) dtr-ings) dtr-ings) :h-cons (if semstruct (append (semstruct-h-cons semstruct) dtr-hcons) dtr-hcons) :bindings (append equalities dtr-binding-list) :cfrom cfrom :cto cto))) (when *trace-rmrs-composition-p* (format t "~%Applying rule ~A" rule-name) (unless rule-instruction (format t " (not found)")) (dolist (dtr dtrs) (internal-output-rmrs dtr 'vcompact t)) (internal-output-rmrs semstruct-out 'vcompact t)) semstruct-out))) ;;; cfrom and cto utility fns (defun calculate-cfrom-from-daughters (dtrs) (let ((current-min most-positive-fixnum)) (dolist (dtr dtrs) (let ((dtr-cfrom (semstruct-cfrom dtr))) (when (and dtr-cfrom (< dtr-cfrom current-min)) (setf current-min dtr-cfrom)))) (if (eql current-min most-positive-fixnum) NIL current-min))) (defun calculate-cto-from-daughters (dtrs) (let ((current-max -1)) (dolist (dtr dtrs) (let ((dtr-cto (semstruct-cto dtr))) (when (and dtr-cto (> dtr-cto current-max)) (setf current-max dtr-cto)))) (if (< current-max 0) NIL current-max))) ;;; ;;; Tag lookup ;;; (defun create-base-struct (tag lexeme) (let* ((tag-template (get-tag-template tag)) (tag-semstruct (rmrs-tag-template-semstruct (or tag-template *default-template*))) (from (word-info-from lexeme)) (to (word-info-to lexeme))) (construct-new-semstruct tag-semstruct from to lexeme))) ;;; A new semstruct may be created either for a lexical tag or for ;;; a semstruct contributed by a grammar rule. In either case, ;;; a new set of variables has to be created, which are unique ;;; for this semstruct in this derivation, and a base binding list ;;; is also created ;;; In the case of tag lookup, the pred corresponding to ;;; the lexeme will be substituted. #| In the case of a rule, when a new semstruct is created, the variables also affect the equalities in the rule. For instance: e p-agt e x2 VP.index e NP.index x goes to e101 p-agt e101 x201 VP.index e101 NP.index x101 |# ;;; *********************************************** ;;; Variable handling during composition (defvar *rmrs-variable-generator* nil) (defun init-rmrs-variable-generator () (setf *rmrs-variable-generator* (create-variable-generator))) (defun initialize-rmrs-variables nil (if *restart-variable-generator* (init-rmrs-variable-generator))) (defun initialize-rmrs-variables-plus nil (setf *rmrs-variable-generator* (create-variable-generator 10000))) (init-rmrs-variable-generator) (defun create-new-rmrs-var (type gen extras) ;;; constructs a new variable of a given type (let* ((idnumber (funcall gen))) (make-var :type type :id idnumber :extra extras))) (defun make-default-running-hook nil (make-indices :index (create-new-rmrs-var :other *rmrs-variable-generator* nil) :label (create-new-rmrs-var :handle *rmrs-variable-generator* nil))) (defun generate-new-var (old-var-struct) ;;; called from construct-new-semstruct ;;; takes a var-struct with a dummy id ;;; and creates a new variable of the same type (or (let ((existing-var (rest (assoc old-var-struct *local-var-context* :test #'eql-var-id)))) (when (and existing-var (var-extra old-var-struct) (not (var-extra existing-var))) (setf (var-extra existing-var) (var-extra old-var-struct))) existing-var) (let* ((var-type (var-type old-var-struct)) (varstruct (create-new-rmrs-var var-type *rmrs-variable-generator* (var-extra old-var-struct)))) (push (cons old-var-struct varstruct) *local-var-context*) varstruct))) ;;; end variable handling code (defun construct-new-semstruct (semstruct cfrom cto lex) ;;; this is only called when we have a structure ;;; corresponding to a read-in rule or tag ;;; *local-var-context* gets used when interpreting the eqs ;;; in the case of a rule ;;; lex should only be set in the case of a new tag ;;; when it will be a word-info structure (setf *local-var-context* nil) (let* ((new-hook (generate-new-hook (semstruct-hook semstruct)))) (make-semstruct :hook new-hook :slots (let ((slots-spec (semstruct-slots semstruct))) (cond ((eql slots-spec :none) :none) (slots-spec (generate-new-var slots-spec)) (t (indices-label new-hook)))) :liszt (loop for old-ep in (semstruct-liszt semstruct) collect (make-rel :handel (if (rel-handel old-ep) (generate-new-var (rel-handel old-ep)) (create-new-rmrs-var :handle *rmrs-variable-generator* nil)) :pred (let ((old-pred (rel-pred old-ep))) (if (dummy-pred-p old-pred) (make-realpred :lemma (string-downcase (word-info-lemma lex)) :pos (word-info-pos lex)) old-pred)) :flist (loop for old-arg in (rel-flist old-ep) collect (generate-new-var old-arg)) :str (if (word-info-p lex) (word-info-original lex)) :cfrom cfrom :cto cto)) :rmrs-args (loop for old-rarg in (semstruct-rmrs-args semstruct) collect (let ((val (rmrs-arg-val old-rarg))) (make-rmrs-arg :arg-type (rmrs-arg-arg-type old-rarg) :label (generate-new-var (rmrs-arg-label old-rarg)) :val (if (var-p val) (generate-new-var val) (if (dummy-constant-p val) (string-downcase (word-info-lemma lex)) ; for constants in names etc ; downcased, but may revisit ; this val))))) :in-groups (loop for old-ing in (semstruct-in-groups semstruct) collect (make-in-group :label-a (generate-new-var (in-group-label-a old-ing)) :label-b (generate-new-var (in-group-label-b old-ing)))) :h-cons (loop for old-hcons in (semstruct-h-cons semstruct) collect (make-hcons :relation (hcons-relation old-hcons) :scarg (generate-new-var (hcons-scarg old-hcons)) :outscpd (generate-new-var (hcons-outscpd old-hcons)))) :cfrom cfrom :cto cto))) (defun generate-new-hook (old-hook) (make-indices :label (generate-new-var (indices-label old-hook)) :index (generate-new-var (indices-index old-hook)))) (defun compute-equalities (dtr-hooks dtr-slots equalities) ;;; equality components in rules are either ;;; integers plus path (indicating dtr-hook elements or dtr-slots ) or ;;; variables which should correspond to the *local-var-context* ;;; ;;; this function returns a list of lists of variables (let ((real-eqs nil)) (dolist (equality equalities) (let ((els (equality-eq-els equality)) (new-els nil)) (dolist (el els) (let ((new-var (if (pointer-p el) (get-var-for-pointer el dtr-hooks dtr-slots) (rest (assoc el *local-var-context* :test #'eql-var-id))))) (when new-var (dolist (el1 new-els) (share-var-info new-var el1)) (push new-var new-els)))) (when new-els (push new-els real-eqs)))) real-eqs)) (defun share-var-info (v1 v2) ;;; FIX ;;; eventually this should deal with the cases of compatible ;;; extra info, but for now, just assume only one variable has ;;; extra info ;;; should also deal with compatible sorts (if (var-extra v1) (setf (var-extra v2) (var-extra v1)) (setf (var-extra v1) (var-extra v2)))) (defun get-var-for-pointer (pointer dtr-hooks dtr-slots) (let* ((dtr-num (pointer-dtrnum pointer)) (hook (elt dtr-hooks dtr-num)) (slot (elt dtr-slots dtr-num))) (cond ((and (equal (pointer-hook-el pointer) "INDEX") hook (indices-p hook)) (indices-index hook)) ((and (equal (pointer-hook-el pointer) "LABEL") hook (indices-p hook)) (indices-label hook)) ((and (equal (pointer-hook-el pointer) "ANCHOR") slot (not (eql slot :none))) slot) (t nil)))) ;;; ******************************** ;;; Rule lookup (defparameter *rule-instructions* nil) ;;; now a list of rule sets (defun lookup-instruction (rule-name features) ;;; first check for an exact match ;;; failing this, check for a match ignoring the optional spec ;;; If this is found, dtrs may need adjusting (let* ((rule (rule-and-condition-match rule-name features *rule-instructions*))) (if rule (progn (increment-rule-record rule-name nil t) rule) (let ((base-name (remove-optional-spec rule-name))) (if base-name (let ((mrule (rule-and-condition-match base-name features *rule-instructions*)) (opt-dtrs (if base-name (find-opt-dtrs rule-name)))) (if mrule (progn (increment-rule-record base-name opt-dtrs t) (rule-with-adjusted-dtrs mrule opt-dtrs)) (progn (increment-rule-record base-name opt-dtrs nil) nil))) (progn (increment-rule-record rule-name nil nil) nil)))))) (defun rule-and-condition-match (rule-name features rule-list) ;;; a conditional rule comes in two (or more) variants ;;; one may have an empty condition. If we return ;;; multiple rules on the rule-name match, we check the conditions ;;; If features is empty, we match the unconditional rules ;;; If there are features, then we take the conditional version ;;; if they match the condition specification. If no, we take ;;; the unconditional version (let ((rule-set (find rule-name rule-list :test #'equal :key #'rmrs-rule-set-name))) (if rule-set (let ((rule-options (rmrs-rule-set-alternatives rule-set))) (if (or (not (cdr rule-options)) (not features)) (car rule-options) (or (find-if #'(lambda (rule) (member (rmrs-rule-condition rule) features :test #'equal)) rule-options) ;;; null features - default rule has null condition ;;; and will match. If there are ;;; features then (first) matching rule (car rule-options)))) ;;; no feature that matches -> default ;;; first thing on the rule options list is the default nil))) (defun remove-optional-spec (rule-name) ;;; for rules of form N1/ap_n1/- return N1/ap_n1 ;;; if no optional spec, return nil (we should have found it already) (let* ((slash-pos1 (position #\/ rule-name)) (slash-pos2 (if slash-pos1 (position #\/ rule-name :start (+ 1 slash-pos1))))) (if slash-pos1 (if slash-pos2 (subseq rule-name 0 slash-pos2) (let ((postslash (subseq rule-name 0 slash-pos1))) (if (every #'(lambda (x) (member x '(#\+ #\-))) (coerce postslash 'list)) postslash)))))) (defun find-opt-dtrs (rule-name) ;;; given we've got something after the slash, return a list of ts and ;;; nils (let* ((slash-pos-end (position #\/ rule-name :from-end t)) (opt-dtr-str (subseq rule-name (+ 1 slash-pos-end)))) (loop for char in (coerce opt-dtr-str 'list) collect (cond ((eql char #\+) t) ((eql char #\-) nil) (t (error "Unexpected character in optional part of ~A" rule-name)))))) (defun rule-with-adjusted-dtrs (rule opt-dtrs) ;;; opt-dtrs is a list with t and nil - we go through the ;;; OPT things is order, adjusting the rule so that it ;;; behaves as it would if just the OPTs that are actually present were ;;; specified. This involves renumbering the daughter pointers. ;;; e.g. if we have D1 OPT D2 OPT D3 OPT ;;; and - - + ;;; then we end up with D1 D2 D3 OPT (arity 4) ;;; and need to map dtr numbers 0->0, 1->?, 2->1, 3->?, 4->2, 5->3? ;;; though the ? shouldn't actually be used ;;; OPT OPT D1 with -- should give 2->0 ;;; Attempt to make this robust to screwups ! (let ((new-rule (copy-rmrs-rule rule)) (rule-dtrs (rmrs-rule-dtrs rule)) (number-map nil) (real-count 0)) (dotimes (n (length rule-dtrs)) (push (cons n n) number-map)) (setf number-map (nreverse number-map)) (setf (rmrs-rule-dtrs new-rule) (loop for dtr in rule-dtrs and mapping in number-map nconc (if (not (member dtr '("OPT" "OPT*") :test #'string-equal)) (progn (setf (cdr mapping) real-count) (incf real-count) (list dtr)) (let ((next-opt (car opt-dtrs))) (setf (cdr mapping) nil) (setf opt-dtrs (cdr opt-dtrs)) (if (null next-opt) nil (progn (incf real-count) (list dtr))))))) ;;; (pprint number-map) (setf (rmrs-rule-arity new-rule) (length (rmrs-rule-dtrs new-rule))) (unless (eql (rmrs-rule-head new-rule) -1) (setf (rmrs-rule-head new-rule) (cdr (assoc (rmrs-rule-head new-rule) number-map)))) (unless (rmrs-rule-head new-rule) (error "Head missing")) (setf (rmrs-rule-eqs new-rule) (loop for eq in (rmrs-rule-eqs new-rule) collect (make-equality :eq-els (loop for eq-el in (equality-eq-els eq) collect (if (pointer-p eq-el) (let ((new-dtr (cdr (assoc (pointer-dtrnum eq-el) number-map)))) (unless new-dtr (error "Optional daughter not optional")) (make-pointer :dtrnum new-dtr :hook-el (pointer-hook-el eq-el))) eq-el))))) new-rule)) ;;; **************************************** ;;; recording rule use ;;; ;;; This is to aid grammar development, so that ;;; the developer can tell which rules are most important ;;; for a given test suite (defvar *known-rule-record* (make-hash-table :test #'equal)) (defvar *unknown-rule-record* (make-hash-table :test #'equal)) (defun clear-rule-record nil (setf *known-rule-record* (make-hash-table :test #'equal)) (setf *unknown-rule-record* (make-hash-table :test #'equal))) (defun show-rule-record (knownp) (maphash #'(lambda (key val) (format t "~%~A " key) (loop for rec in val do (let ((opt (car rec)) (count (cdr rec))) (if opt (progn (format t "~{~A~}" (loop for el in opt collect (if el "+" "-"))) (format t " ~A; " count)) (format t "/ ~A" count))))) (if knownp *known-rule-record* *unknown-rule-record*))) (defun show-sorted-rule-record (knownp) (let ((rules nil)) (maphash #'(lambda (key val) (let ((max 0)) (loop for rec in val do (let ((count (cdr rec))) (when (> count max) (setf max count)))) (push (list max key val) rules))) (if knownp *known-rule-record* *unknown-rule-record*)) (setf rules (sort rules #'> :key #'car)) (dolist (rule rules) (let ((key (cadr rule)) (val (caddr rule))) (format t "~%~A " key) (loop for rec in val do (let ((opt (car rec)) (count (cdr rec))) (if opt (progn (format t "~{~A~}" (loop for el in opt collect (if el "+" "-"))) (format t " ~A; " count)) (format t "/ ~A" count)))))))) (defun increment-rule-record (rule-name opt-spec knownp) (let* ((rule-table (if knownp *known-rule-record* *unknown-rule-record*)) (record (gethash rule-name rule-table))) (if record (let ((opt-part (assoc opt-spec record :test #'equal))) (if opt-part (incf (cdr opt-part)) (push (cons opt-spec 1) (gethash rule-name rule-table)))) (setf (gethash rule-name rule-table) (list (cons opt-spec 1))))))