;;; Copyright (c) 2009 ;;; Ann Copestake ;;; see `LICENSE' for conditions. (in-package :mrs) ;;; Anaphora on DMRS #| A sequence of MRSs is regarded as a discourse. We add a new structure discourse-item, to add extra information needed when treating a structure as part of a discourse. Anaphora resolution consists of building up a discourse model with possible referents, items assumed to be anaphoric (a subset of the referents) and anaphoric links between an anaphoric item and a referent. The data structures and general idea will work with MRS or RMRS too, but for this code, we assume DMRSs Stages in building the model 0) turn a sequence of DMRSs into a discourse model - i.e., a sequence of discourse items 1) extract the referents 2) extract the anaphoric items 3) add a-links |# (defstruct discourse-item utterance-id mrs ; may be an mrs, rmrs or dmrs referents ; some are marked as anaphoric a-links ;; from anaphora in this discourse-item to referents ;; which may be in other items. Possibly should be at a higher ;; level. ) (defstruct referent d-id anaphoric-p ; t if anaphor node ;; cache features ;; cache features ) (defstruct rfeatures pronoun reflexive definite natural-gender pair animacy main-clause argnum ) (defstruct a-link anaphor ; d-id referent ; d-id score ; integer value in ;; boolean value - t means this link is being counted in this ;; model, nil that it isn't features ;; cache features to simplify reranking ) (defstruct lfeatures sentence-distance ; 0 for same sentence token-distance ) (defstruct d-id ;; dual part id so referents are unique id utterance-id ) #| (defstruct (dmrs) nodes links) (defstruct dmrs-node ;;; the LTOP node is always id 0, others may be anything - but ;;; code for conversion from RMRS uses the anchors. ;;; cfrom and cto as in (R)MRS ;;; charvar and label are for convenience in processing ;;; char var is the variable characteristic of the RMRS predication ;;; (always the ARG0, but not all ARG0s are charvars) ;;; label is the label of the RMRS predication id pred cfrom cto carg charvar ;; for RMRS to DMRS only label ;; for RMRS to DMRS only ) (defstruct dmrs-link ;;; from and to are DMRS node ids, pre and post make up the arc ;;; label from to pre post) |# (defun dmrs-list-to-discourse (dmrs-list) (let ((discourse-base (convert-dmrs-list dmrs-list))) (extract-dmrs-referents discourse-base) ;; sets referents (extract-dmrs-anaphora discourse-base) ;; sets anaphora (add-a-links discourse-base) discourse-base)) ;;; Step 0 - take a sequence of DMRSs and convert it to a series ;;; of discourse items (defun convert-dmrs-list (dmrs-list) (loop for item in dmrs-list and item-number from 1 collect (make-discourse-item :utterance-id item-number :mrs item ))) ;;; Step 1 - extract referents (defun extract-dmrs-referents (discourse-items) (dolist (discourse-item discourse-items) (let ((utterance-id (discourse-item-utterance-id discourse-item))) (setf (discourse-item-referents discourse-item) (loop for node in (dmrs-nodes (discourse-item-mrs discourse-item)) when (potential-referent-p node (dmrs-nodes (discourse-item-mrs discourse-item)) (dmrs-links (discourse-item-mrs discourse-item))) collect (make-referent :d-id (make-d-id :id (dmrs-node-id node) :utterance-id utterance-id) :node node)))))) (defun potential-referent-p (node dmrs-nodes dmrs-links) ;;; type is individual ;;; FIX (and (equal (dmrs-node-cvtype node) "x") (let ((id (dmrs-node-id node))) (not (match-dmrs-link-config (list "compound_name_rel" "ARG2" id) dmrs-nodes dmrs-links))))) (defun match-dmrs-link-config (config nodes links) ;;; make more general (let* ((id (third config)) (rel (first config)) (arg (second config)) (rel-set (loop for node in nodes when (let ((pred (dmrs-node-pred node))) (dmrs-pred-match pred rel)) collect (dmrs-node-id node)))) (member-if #'(lambda (link) (and (eql (dmrs-link-to link) id) (equal (dmrs-link-pre link) arg) (member (dmrs-link-from link) rel-set))) links))) (defun dmrs-pred-match (pred rel) (if (realpred-p pred) nil (string-equal pred rel))) ;;; Step 2 - extract anaphoric items (defun extract-dmrs-anaphora (discourse-items) (dolist (discourse-item discourse-items) (let* ((referents (discourse-item-referents discourse-item))) (loop for referent in referents when (anaphoric-p (referent-node referent)) collect (setf (referent-anaphoric-p referent) t))))) (defun anaphoric-p (node) ;;; pronoun, definite etc ;;; FIX (let ((pred (dmrs-node-pred node))) (and (stringp pred) (string-equal (dmrs-node-pred node) "pron_rel")))) ;;; Step 3 - add a-links (defun add-a-links (discourse-items) (let ((past-items nil)) (dolist (discourse-item discourse-items) (dolist (referent (discourse-item-referents discourse-item)) (when (referent-anaphoric-p referent) (let ((new-links (find-a-links referent discourse-item past-items))) ;; forget cataphora in future sentences! (setf (discourse-item-a-links discourse-item) (append new-links (discourse-item-a-links discourse-item)))))) (push discourse-item past-items)))) (defun find-a-links (anaphor discourse-item past-items) ;;; FIX ;;; past-items are in reverse order, most recent first (append (loop for referent in (discourse-item-referents discourse-item) unless (or (eql referent anaphor) (violates-binding-conditions anaphor referent discourse-item) (referent-property-mismatch anaphor referent)) collect (make-a-link :anaphor (referent-d-id anaphor) :referent (referent-d-id referent))) ;;; don't look back if prontype='refl' (if (node-properties-p (referent-node anaphor) "prontype" "refl") nil (loop for past-item in past-items append (loop for referent in (discourse-item-referents past-item) unless (referent-property-mismatch anaphor referent) collect (make-a-link :anaphor (referent-d-id anaphor) :referent (referent-d-id referent))))))) (defun node-properties-p (node feature value) ;;; feature and value are specified as strings ;;; this function takes care of nastinesses like packages ;;; though not very efficiently (let ((properties (dmrs-node-cvextra node))) (dolist (property properties) (let ((f (extrapair-feature property)) (v (extrapair-value property))) (when (and (string-equal (string f) feature) (string-equal (string v) value)) (return t)))))) (defun referent-property-mismatch (anaphor referent) ;;; FIX (declare (ignore anaphor referent)) nil) (defun violates-binding-conditions (anaphor referent discourse-item) ;;; this of course is indefinitely complex ;;; in the medium term, allow patterns to be specified in ;;; DMRS such that if the configuration matches the pattern ;;; we have a violation (if (node-properties-p (referent-node anaphor) "prontype" "refl") nil ;;; FIX! ;;; non-reflexive ;;; crudely block all cases where the elements are ;;; linked from the same node (let* ((dmrs (discourse-item-mrs discourse-item)) (alinked (collect-linked-from anaphor dmrs)) (rlinked (collect-linked-from referent dmrs))) (intersection alinked rlinked)))) (defun collect-linked-from (ref dmrs) (let ((ref-id (d-id-id (referent-d-id ref))) (links (dmrs-links dmrs))) (loop for link in links when (eql (dmrs-link-to link) ref-id) collect (dmrs-link-from link)))) ;;; ************************************************* ;;; ;;; Output (as XML for now) ;;; ;;; ************************************************* #| |# (defun output-discourse (discourse ostream) (output-discourse-start discourse ostream) (dolist (discourse-item discourse) (output-discourse-item-start discourse-item ostream) (output-dmrs1 (discourse-item-mrs discourse-item) 'dxml ostream) (dolist (referent (discourse-item-referents discourse-item)) (output-referent referent ostream)) (dolist (alink (discourse-item-a-links discourse-item)) (output-alink alink ostream)) (output-discourse-item-end discourse-item ostream)) (output-discourse-end discourse ostream)) (defun output-discourse-start (discourse ostream) (declare (ignore discourse)) (write-string " " ostream) (terpri ostream) (write-string "" ostream) ) (defun output-discourse-end (discourse ostream) (declare (ignore discourse)) (terpri ostream) (write-string "" ostream) (terpri ostream)) (defun output-discourse-item-start (discourse-item ostream) (terpri ostream) (write-string "" ostream) ) (defun output-discourse-item-end (discourse-item ostream) (declare (ignore discourse-item)) (write-string "" ostream) ) #| |# (defun output-referent (referent ostream) (let ((d-id (referent-d-id referent))) (terpri ostream) (write-string "" ostream))) #| |# (defun output-alink (alink ostream) (let ((anaphor (a-link-anaphor alink)) (referent (a-link-referent alink))) (terpri ostream) (write-string "" ostream))) ;;; ************************************************* ;;; ;;; Utility functions ;;; ;;; ************************************************* ;;; extract-fine-system fns are copied from qa2008.lisp (defparameter *discourse-sets* '( ;;; 1 ;;; (21.34) Victoria Chen, Chief Financial Officer of Megabucks Banking Corp since 2004, saw her pay jump 20%, to $1.3 million, as the 37-year-old also became the Denver-based financial-services company's president. ;;; It has been ten years since she came to Megabucks from rival Lotsabucks. (1 2) ;;; (21.36) According to Doug, Sue just bought a 1961 Ford Falcon. ;;; a. But that turned out to be a lie. ;;; b. But that was false. ;;; c. That struck me as a funny way to describe the situation. ;;; 2 (3 4) ;;; 3 (3 5) ;;; 4 (3 6) ;;; (a) Mrs. Martin was so very kind as to send Mrs. Goddard a beautiful goose. ; 7 fails ;;; 5 ;;; (b) He had gone round one day to bring her some walnuts. (8) ;;; 6 ;;; (c) I saw this beautiful Ford Falcon today. (9) ;;; 7 ;;; 21.38) I am going to the butcher's to buy a goose. (10) ;;; 8 ;;; (21.39) It concerns a white stallion which I have sold to an officer. ;;; But the pedigree of the white stallion was not fully established. (11 12) ;;; 9 ;;; (21.40) I read about it in The New York Times. (13) ;;; 10 ;;; (21.41) Emma smiled and chatted as cheerfully as she could. (14) ;;; (21.42) ;;;a. John went to Bob's party, and parked next to a classic Ford Falcon. ;;;b. He went inside and talked to Bob for more than an hour. ;;;c. Bob told him that he recently got engaged. ;;;d. ?? He also said that he bought it yesterday. ;;;d. He also said that he bought the Falcon yesterday. ;;; 11 (15 16 17 18) ;;; 12 (15 16 17 19) ;;; 13 ;;;(21.43) Even before she saw it, Dorothy had been thinking about the Emerald City every day. (20) ;;; 14 ;;;(21.44) Every dancer brought her left arm forward. (21) ;;; 15 ;;;(21.45) I just bought a copy of Thoreau's Walden. I had bought one five years ago. That one had been very tattered; this one was in much better condition. (22 23 24) ;;; 16 ;;; (21.46) I almost bought a 1961 Ford Falcon today, but a door had a dent and the engine seemed noisy. (25) ;;; 17 ;;;(21.47) I'm interested in buying a Mac laptop. ;;;They are very stylish. (26 27) ;;; 18 ;;; (21.48) In March in Boulder you have to wear a jacket. (28) ;;; 19 ;;; (a) It was Frodo who carried the ring. (29) ;;; (b) It was good that Frodo carried the ring ; 30 fails ;;; 20 ;;; John has a Ford Falcon. ;;; It is red. (31 32) ;;; 21 ;;; John has a Ford Falcon. ;;; They are red. (33 34) ;;; 22 ;;; John has three Ford Falcons. ;;; It is red. (35 36) ;;; 23 ;;; John has three Ford Falcons. ;;; They are red. (37 38) ;;; 24 ;;; (21.50) IBM announced a new machine translation product yesterday. ;;; They have been working on it for 20 years. (39 40) ;;; 25 ;;; (21.51) John has a Ford. ;;; He is attractive. (41 42) ;;; 26 ;;; (21.52) John has a Ford. ;;; It is attractive. (43 44) ;;; 27 ;;; (21.53) John bought himself a new Ford. (45) ;;; 28 ;;;(21.54) John bought him a new Ford. (46) ;;; 29 ;;; (21.55) John said that Bill bought him a new Ford. (47) ;;; 30 ;;; (21.56) John said that Bill bought himself a new Ford. (48) ;;; 31 ;;; (21.57) He said that he bought John a new Ford. (49) ;;; 32 ;;; (21.58) John parked his car in the garage after driving it around for hours. (50) ;;; 33 ;;; (21.59) The doctor found an old map in the captain's chest. ;;; Jim found an even older map hidden on the shelf. ;;; It described an island. (51 52 53) ;;; 34 ;;; (21.60) Billy Bones went to the bar with Jim Hawkins. ;;; He called for a glass of rum. (54 55) ;;; 35 ;;; (21.61) Jim Hawkins went to the bar with Billy Bones. ;;; He called for a glass of rum. (56 57) ;;; 36 ;;; (21.62) Billy Bones had been thinking about a glass of rum ever since the pirate ship docked. ;;; He hobbled over to the Old Parrot bar. ;;; Jim Hawkins went with him. ;;; He called for a glass of rum. (58 59 60 61) ;;; 37 ;;; (21.63) Long John Silver went with Jim to the Old Parrot. ;;; Billy Bones went with him to the Old Anchor Inn. (62 63) ;;; 38 ;;; (21.64) John telephoned Bill. ;;; He lost the laptop. (64 65) ;;; 39 ;;; (21.65) John criticized Bill. ;;; He lost the laptop. (66 67) ;;; 40 ;;; (21.66) John saw a beautiful 1961 Ford Falcon at the used car dealership. ;;; He showed it to Bob. ;;; He bought it. (68 69 70) )) ;;; ignore the rest for now #| (convert-fine-system-output-to-discourses "~/lingo/lkb/src/rmrs/anaphora-test/ana-exs" "~/lingo/lkb/src/rmrs/anaphora-test/ana.dmrs") (convert-fine-system-output-to-discourses "~/lingo/lkb/src/rmrs/anaphora-test/ana-exs" "~/lingo/lkb/src/rmrs/anaphora-test/ana.dmrs" '(30)) |# (defun convert-fine-system-output-to-discourses (ifile odir &optional dnums) (let ((*anchor-rmrs-p* t) (drecord nil)) ;;; read in all the discourses and store them in drecord (with-open-file (istream ifile :direction :input) (loop (let ((fsout (read-line istream nil nil))) (unless fsout (return)) (let* ((scount (extract-fine-system-number fsout)) (dcount (if (integerp scount) (loop for disc in *discourse-sets* and temp-count from 1 when (member scount disc) return temp-count))) (mrs-string (extract-fine-system-mrs fsout))) ;;; (when (and dcount (stringp mrs-string) (not (equal mrs-string ""))) (with-input-from-string (mstream mrs-string) (let* ((mrs (read-mrs mstream)) (rmrs (mrs-to-rmrs mrs)) (dmrs (rmrs-to-dmrs rmrs))) (when dmrs (let ((drec (assoc dcount drecord))) (if drec (push dmrs (cdr drec)) (push (cons dcount (list dmrs)) drecord))))))))))) ;;; convert each item in drecord to a discourse (dolist (ditem drecord) (let* ((dcount (car ditem)) (dmrss (reverse (cdr ditem))) (discourse (dmrs-list-to-discourse dmrss))) (unless (and dnums (not (member dcount dnums))) (let ((ofile (format nil "~Ad~A.dmrs" odir dcount))) (with-open-file (ostream ofile :direction :output :if-does-not-exist :create :if-exists :supersede) (output-discourse discourse ostream) (finish-output ostream)))))))) (defun extract-fine-system-mrs (str) ;;; compare extract-fine-system-sentence (if (find #\@ str) (let ((ampcount 0) (sstart nil) (send nil)) (dotimes (n (length str)) (let ((char (elt str n))) (when (eql char #\@) (setf ampcount (+ 1 ampcount)) (when (eql ampcount 13) (setf sstart (+ 1 n))))) (when (eql ampcount 14) (setf send n) (return))) (if (and sstart send) (subseq str sstart send) str)) str)) (defun extract-fine-system-number (str) ;;; compare extract-fine-system-sentence (let ((apos (position #\@ str))) (if apos (parse-integer (subseq str 0 apos) :junk-allowed t)))) ;;; Sciborg data ;;; (convert-sciborg-data-to-discourse "~/select-papers" "~/sciborg-select.dmrs") (defun convert-sciborg-data-to-discourse (ifile ofile) (let ((*anchor-rmrs-p* t) (*robust-dmrs-p* t) (dmrss nil) (prior-sentence nil)) ;;; read in the discourse (with-open-file (istream ifile :direction :input) (loop (let ((rmrs-record (read-line istream nil nil))) (unless rmrs-record (return)) (let* ((item-count (extract-sciborg-system-number rmrs-record)) (sentence-count (extract-sciborg-system-sentence-number rmrs-record)) (rmrs-string (extract-sciborg-system-rmrs rmrs-record))) ;;; just take the first result from any sentence (when (and item-count sentence-count (stringp rmrs-string) (not (equal rmrs-string ""))) (unless (eql sentence-count prior-sentence) (let* ((rmrs (read-single-rmrs-from-string rmrs-string)) (dmrs (rmrs-to-dmrs rmrs))) (unless dmrs (format t "~%dmrs conversion failure on ~A" sentence-count)) (when dmrs (push dmrs dmrss))) (setf prior-sentence sentence-count))))))) (let ((discourse (dmrs-list-to-discourse dmrss))) (with-open-file (ostream ofile :direction :output :if-does-not-exist :create :if-exists :supersede) (output-discourse discourse ostream) (finish-output ostream))))) (defun extract-sciborg-system-number (str) (parse-integer str :junk-allowed t)) (defun extract-sciborg-system-sentence-number (str) (let ((apos (position #\tab str))) (when apos (parse-integer str :start (+ 1 apos) :junk-allowed t)))) (defun extract-sciborg-system-rmrs (str) (let ((apos (position #\< str))) (if apos (let ((rmrs-with-ns (subseq str apos)) (chars nil) (seen-slash nil)) (dolist (char (coerce rmrs-with-ns 'list)) (if (and (char= char #\n) seen-slash) (pop chars) (push char chars)) (if (char= char #\\) (setf seen-slash t) (setf seen-slash nil))) (coerce (nreverse chars) 'string))))) ;;;; scratch code #| (with-open-file (istream "~/papers.dump" :direction :input) (let ((count nil) (rmrs nil)) (loop (let ((mrs-string (read-line istream nil nil))) (unless mrs-string (return)) (when (and count (> count 1000)) (return)) (when (and (> (length mrs-string) 9) (eql (elt mrs-string 0) #\C) (eql (elt mrs-string 1) #\O) (eql (elt mrs-string 2) #\P) (eql (elt mrs-string 3) #\Y) (eql (elt mrs-string 4) #\space) (eql (elt mrs-string 5) #\r) (eql (elt mrs-string 6) #\m) (eql (elt mrs-string 7) #\r) (eql (elt mrs-string 8) #\s)) (setf count 0)) (when count (push mrs-string rmrs) (incf count)))) (with-open-file (ostream "~/select-papers" :direction :output) (dolist (str (nreverse rmrs)) (write-string str ostream) (terpri ostream))))) (with-open-file (istream "~/tst.raw" :direction :input) (let ((mrs-string (read-line istream nil nil))) (with-input-from-string (mstream mrs-string) (let ((rmrs (mrs-to-rmrs (read-mrs mstream)))) (rmrs-to-dmrs rmrs))))) ;;; (output-dmrs1 (rmrs-to-dmrs rmrs) 'dxml t))))) ;;; (with-open-file (ostream file-name :direction :output ;;; :if-exists :supersede ;;; :if-does-not-exist :create) ;;; (layout-dmrs dmrsstruct :svg ostream)) ;;; testing well-formedness of dmrs (let ((*anchor-rmrs-p* t)) (with-open-file (istream "~/anaphora.1" :direction :input) (loop (let ((fsout (read-line istream nil nil))) (unless fsout (return)) (let* ((scount (extract-fine-system-number fsout)) (mrs-string (extract-fine-system-mrs fsout))) ;;; (when (and (stringp mrs-string) (not (equal mrs-string ""))) (with-input-from-string (mstream mrs-string) (let* ((mrs (read-mrs mstream)) (rmrs (mrs-to-rmrs mrs)) (dmrs (rmrs-to-dmrs rmrs))) (when dmrs (with-open-file (ostream "~/dmrs-test.dmrs" :direction :output :if-does-not-exist :create :if-exists :supersede) (write-string " " ostream) (output-dmrs1 dmrs 'dxml ostream) (finish-output ostream) (excl::shell (concatenate 'string "xmlnorm -s ~/dmrs-test.dmrs 2>| ~/dmrs-tmp/" (format nil "~A" scount) ".errs")))))))))))) |#