;;;-*- Mode: Lisp; Package: (PARSER) -*- ;;; ;;; Chart parser for context free grammars. ;;; (C) Paul Meurer 16.12.1997 ;;; ;;; extended-context-free-grammar uses extended lists in rule definitions ;;; ;;; If reverse-p is true, the rhs of the rules are used in reversed order. ;;; This is useful for parsing regular expressions for example. ;;; Can't be used with extended lists. ; the toplevel function is cfg-chart-parse ;(require :tree/dg-grapher) #+ignore (eval-when (load compile eval) (um:use-module :extended-lists "projects:parser;extended-lists")) (in-package :parser) (eval-when (load compile eval) (export '(get-top-nodes initialize-chart))) (defstruct parse-edge start end found to-find rule ; label of a first constituent in the to-find part of the rule-instance next-label) (defmacro active-p (edge) `(parse-edge-to-find ,edge)) (defstruct rule flat-rule ps ; phrase structure part of rule minimal-length ; minimal length of all expansions of ps first-rewrites ; possibly first rhs constituents applicable-rules ; list of rules having a rhs constituent which can possibly function ; function which is executed when rule gets applied type ) (defclass context-free-grammar () ((grammar :initform nil :initarg :grammar :accessor grammar) (reverse-p :initform nil :initarg :reverse-p :accessor reverse-p))) (defclass extended-context-free-grammar (context-free-grammar) ()) ; contains everything the parser needs (defclass cfg-chart (context-free-grammar) ((templates :initform (make-hash-table) :reader templates) (active-edges :initform (make-array 16 :adjustable t :initial-element '()) ; 16 is arbitrary :accessor active-edges) (inactive-edges :initform (make-array 16 :adjustable t :initial-element '()) :accessor inactive-edges) (sentence-length :accessor sentence-length) (lexical-cat-fn :initarg :lexical-cat-fn :reader lexical-cat-fn))) (defclass extended-cfg-chart (cfg-chart extended-context-free-grammar) ()) (defparameter *cfg-chart* (make-instance 'cfg-chart)) ;;; initialize the chart (bottom-up version) (defmethod tokenize-string (sentence (chart cfg-chart) &key) "ad hoc function transforming a string of substrings divided by space into a list of those substrings" (let (word-list) (labels ((tokenize (str end) (let ((ws-pos (position #\Space sentence :end end :from-end t))) (cond (ws-pos (push (subseq str (1+ ws-pos) end) word-list) (tokenize str ws-pos)) (t (push (subseq str 0 end) word-list)))))) (tokenize sentence nil) word-list))) (defmethod initialize-chart ((chart cfg-chart) (sentence list)) ; reset and adjust the edge arrays (let ((array-length (array-dimension (active-edges chart) 0)) (sentence-length (length sentence))) (when (>= sentence-length array-length) (setf (active-edges chart) (adjust-array (active-edges chart) (1+ sentence-length))) (setf (inactive-edges chart) (adjust-array (inactive-edges chart) (1+ sentence-length)))) (loop for i from 0 to (max sentence-length (1- array-length)) do (setf (aref (active-edges chart) i) nil (aref (inactive-edges chart) i) nil))) ; initialize with lexical edges (setf (sentence-length chart) (length sentence)) (let ((vertex 0)) (dolist (word sentence) (add-lexical-edges chart word ;(print (gethash word (lexicon chart))) (funcall (lexical-cat-fn chart) word) vertex) (incf vertex)))) (defmethod add-lexical-edges ((chart cfg-chart) word categories vertex) (dolist (cat categories) (add-edge chart (make-parse-edge :start vertex :end (1+ vertex) :found word :rule cat)))) #||;;; top level function (defmethod cfg-chart-parse ((sentence string) &key (goal 'S) (chart *cfg-chart*) (display t)) "parses a sentence given as a string using the grammar, templates and lexicon in chart" (let ((word-list (tokenize-string sentence chart :reverse-p (reverse-p chart)))) (time (initialize-chart chart word-list)) ;(parse chart word-list) ;(display-parse chart :display display :goal goal :title sentence) )) (defmethod display-parse ((chart cfg-chart) &key (goal 'S) display (title "Parse tree")) "displays the phrase structure trees of a parse" (let ((tree (build-tree chart goal))) (if (and display tree) (dolist (one-tree tree) (when (eq (car one-tree) goal) (make-instance 'parse-grapher-window :view-font '("MonacoGe" 9) :view-position #@(4 300) :parse-tree one-tree :window-title title)))) (values))) ||# ;;; this and the next three methods are the recursive core of the parser ;;; search is depth first bottom up (defmethod add-edge ((chart cfg-chart) edge) "adds an edge to the chart, recording any new edges that may need to be added as a consequence" (with-slots (active-edges inactive-edges) chart (cond ((active-p edge) ; added edge is active (push edge (aref active-edges (parse-edge-end edge))) (check-inactive-edges chart (aref inactive-edges (parse-edge-end edge)) edge) ;(check-rules-for-active chart edge) ) (t ; otherwise added edge is inactive (push edge (aref inactive-edges (parse-edge-start edge))) (check-active-edges chart (aref active-edges (parse-edge-start edge)) edge) (check-rules-for-inactive chart edge))))) (defmethod check-active-edges ((chart cfg-chart) edges inactive-edge) (mapc #'(lambda (edge) (when (active-p edge) ; what about edges with optionals? (check-and-combine chart edge inactive-edge))) edges)) (defmethod check-inactive-edges ((chart cfg-chart) edges active-edge) (mapc #'(lambda (edge) (unless (active-p edge) (check-and-combine chart active-edge edge))) edges)) (defmethod check-and-combine ((chart cfg-chart) active-edge inactive-edge) "Tries to combine an active and inactive edge using the fundamental rule. Adds a new edge if these can combine." (let* ((active-label (car (parse-edge-to-find active-edge))) (inactive-label (if (rule-p #|| #+mcl ccl::structurep #+pcl pcl::structurep #+allegro excl::structurep #+lispworks structurep #+sbcl structurep ||# (parse-edge-rule inactive-edge)) ; change! (car (rule-flat-rule (parse-edge-rule inactive-edge))) (parse-edge-rule inactive-edge)))) (when (eq active-label inactive-label) (let ((new-to-find (cdr (parse-edge-to-find active-edge))) (right-words (- (sentence-length chart) (parse-edge-end inactive-edge)))) (when (>= right-words (length new-to-find)) (let ((new-found (cons inactive-edge (parse-edge-found active-edge)))) (add-edge chart (make-parse-edge ; new edge :start (parse-edge-start active-edge) :end (parse-edge-end inactive-edge) :found new-found :to-find new-to-find :rule (parse-edge-rule active-edge))))))))) (defmethod check-and-combine ((chart extended-cfg-chart) active-edge inactive-edge) "Tries to combine an active and inactive edge using the fundamental rule. Adds a new edge if these can combine." (let* ((next-label (parse-edge-next-label active-edge)) (active-label (nth next-label (rule-flat-rule (parse-edge-rule active-edge)))) (inactive-label (if (rule-p #|| #+mcl ccl::structurep #+pcl pcl::structurep #+allegro excl::structurep #+lispworks structurep #+sbcl structurep ||# (parse-edge-rule inactive-edge)) (car (rule-flat-rule (parse-edge-rule inactive-edge))) (parse-edge-rule inactive-edge)))) (when (eq active-label inactive-label) (let* ((new-to-find (remove-first-elt next-label (parse-edge-to-find active-edge) #'=)) (new-found (cons inactive-edge (parse-edge-found active-edge))) (right-words (- (sentence-length chart) (parse-edge-end inactive-edge))) (min-length (extended-list-min-length new-to-find))) (when (>= right-words min-length) (dolist (label (or (first-elements new-to-find) (list nil))) (add-edge chart (make-parse-edge ; new edge :start (parse-edge-start active-edge) :end (parse-edge-end inactive-edge) :found new-found :to-find new-to-find :rule (parse-edge-rule active-edge) :next-label label))) ; edges with optional to-find are added as inactive edges, too (when (and new-to-find (zerop min-length)) (add-edge chart (make-parse-edge :start (parse-edge-start active-edge) :end (parse-edge-end inactive-edge) :found new-found :rule (parse-edge-rule active-edge))))))))) ;;; bottom up parsing functions ;;; add new edges for any rules which can use inactive edges ;; ;; grammar is a list of rules; ;; each rule is: ;; 1. the dg which contains the whole rule, whose body's arc's ;; attrs are the numbers 0..n ;; 2. an extended list with numbers from 1 to the number of ;; rhs dgs, eg. ;; {[1] [2] 3} if the rule is S -> {[NP] [NP] VP}; (defmethod cat-rules ((chart cfg-chart) rule-instance) (let ((cat-rules ())) (dolist (rule (grammar chart)) (when (eq (cadr (rule-flat-rule rule)) rule-instance) ;(push (cons rule (car (rule-flat-rule rule))) cat-rules))) (push rule cat-rules))) cat-rules)) ; improve this! (defmethod cat-rules ((chart extended-cfg-chart) rule-instance) (let ((cat-rules ())) (dolist (rule (grammar chart)) (dolist (rewrite-label (rule-first-rewrites rule)) (when (eq (nth rewrite-label (rule-flat-rule rule)) rule-instance) ;(format t "label: ~a, rule: ~a~%" rewrite-label (rule-flat-rule rule)) (push (cons rule rewrite-label) cat-rules)))) cat-rules)) ; edge is inactive (defmethod check-rules-for-inactive ((chart cfg-chart) edge) ;(declare (optimize (speed 3) (safety 0) (compilation-speed 0) (space 0))) (with-slots (grammar sentence-length) chart (let ((right-words (- sentence-length (parse-edge-end edge) -1))) (dolist (rule (if (rule-p #|| #+mcl ccl::structurep #+pcl pcl::structurep #+allegro excl::structurep #+lispworks structurep #+sbcl structurep ||# (parse-edge-rule edge)) ; change this! (rule-applicable-rules (parse-edge-rule edge)) (cat-rules chart (parse-edge-rule edge)))) (when (and ; add only rules which haven't been added before (not (some #'(lambda (active-edge) (and (eq rule (parse-edge-rule active-edge)) (= (parse-edge-start active-edge) ; rule is fresh (parse-edge-start edge)) )) (aref (active-edges chart) (parse-edge-start edge)))) ; add only rules which have a chance to fully expand (>= right-words (1- (length (rule-flat-rule rule)))) ) ;; rule is applicable if the first dg in the rhs of the rule ;; unifies with the lhs of the rule of the edge. We don't need to ;; test here because we precollected the unifying rules in ;; the rule-applicable-rules slot of rule (add-edge chart (make-parse-edge :start (parse-edge-start edge) :end (parse-edge-start edge) :to-find (cdr (rule-flat-rule rule)) :rule rule))))))) (defmethod check-rules-for-inactive ((chart extended-cfg-chart) edge) (with-slots (grammar sentence-length) chart (let ((right-words (- sentence-length (parse-edge-end edge) -1))) (dolist (rule+label (if (rule-p #|| #+mcl ccl::structurep #+pcl pcl::structurep #+allegro excl::structurep #+lispworks structurep #+sbcl structurep ||# (parse-edge-rule edge)) ; change this! (rule-applicable-rules (parse-edge-rule edge)) (cat-rules chart (parse-edge-rule edge)))) (destructuring-bind (rule . label) rule+label (when (and ; add only rules which haven't been added before (not (some #'(lambda (active-edge) (and (eq rule (parse-edge-rule active-edge)) (= (parse-edge-start active-edge) ; rule is fresh (parse-edge-start edge)) (eq label (parse-edge-next-label active-edge)))) ;?? (aref (active-edges chart) (parse-edge-start edge)))) ; add only rules which have a chance to fully expand (>= right-words (rule-minimal-length rule))) ;; rule is applicable if the first dg in the rhs of the rule ;; unifies with the lhs of the rule of the edge. We don't need to ;; test here because we precollected the unifying rules in ;; the rule-applicable-rules slot of rule (add-edge chart (make-parse-edge :start (parse-edge-start edge) :end (parse-edge-start edge) :to-find (rule-ps rule) :rule rule :next-label label)))))))) ;;; building parse trees (defmethod build-tree ((chart cfg-chart) goal) (labels ((build (edge) (let ((subnodes (parse-edge-found edge))) (if (listp subnodes) (cons (or (rule-type (parse-edge-rule edge)) (car (rule-flat-rule (parse-edge-rule edge)))) (nreverse (mapcar #'build subnodes))) (cons (parse-edge-rule edge) (parse-edge-found edge)))))) (mapcar #'build (get-top-nodes chart goal)))) (defmethod build-tree ((chart extended-cfg-chart) goal) (labels ((build (edge) (let ((subnodes (parse-edge-found edge))) (if (listp subnodes) (cons (car (rule-flat-rule (parse-edge-rule edge))) (nreverse (mapcar #'build subnodes))) (cons (parse-edge-rule edge) (parse-edge-found edge)))))) (mapcar #'build (get-top-nodes chart goal)))) (defmethod get-top-nodes ((chart cfg-chart) goal) (with-slots (sentence-length) chart (delete-if #'null (mapcar (lambda (edge) (when (and (= (parse-edge-start edge) 0) (or (not sentence-length) ; used for a generating parser (= (parse-edge-end edge) sentence-length)) (eq goal (car (rule-flat-rule (parse-edge-rule edge))))) edge)) (aref (inactive-edges chart) 0))))) (defun build-cfg-lexicon (lexicon-list &optional (chart *cfg-chart*)) (with-slots (lexicon) chart (clrhash lexicon) (mapc #'(lambda (pair) (push (cadr pair) (gethash (car pair) lexicon))) lexicon-list) lexicon)) (defun rule-to-labels (ps-rule &optional (start 0)) (labels ((convert (rule) (cond ((listp rule) (mapcar #'convert rule)) ((extended-list-p rule) (make-extended-list :char (extended-list-char rule) :form (mapcar #'convert (extended-list-form rule)))) (t (prog1 start (incf start)))))) (convert ps-rule))) ;(rule-to-labels '(S {[NP1] [NP2] VP})) (defmethod build-cfg-grammar ((chart extended-cfg-chart) rules-list &key) (with-slots (grammar) chart (setf grammar nil) (dolist (rule rules-list) (let* ((flattened-rule (flatten-extended-list rule)) (rule-min-length (extended-list-min-length (cdr rule))) (labels-ps-rule (rule-to-labels (cdr rule) 1))) (push (make-rule :flat-rule flattened-rule :ps labels-ps-rule :minimal-length rule-min-length :first-rewrites (first-elements labels-ps-rule) :applicable-rules nil) grammar))) (dolist (rule grammar) (dolist (r grammar) (dolist (rewrite-label (rule-first-rewrites r)) (when (eq (nth rewrite-label (rule-flat-rule r)) (car (rule-flat-rule rule))) (push (cons r rewrite-label) (rule-applicable-rules rule))))))) t)