;;;-*- Mode: Lisp; Package: TRANSDUCER -*- ;;; ;;; Finite state transducer with unification ;;; (C) Paul Meurer 1999 (in-package :fst) (use-package :parser) ;;(export "UTP" "UTP-OR" "MAKE-UTP-OR" "PRECOMPILE-U-TRANSDUCER" "COMPILE-U-TRANSDUCER") ;;; ; Special symbols: ; @ : any char ; % : alternative if no other satisfied ;; feature structure symbols (defclass u-symbols (extended-symbols) ()) (defmethod symbol-order-fn ((fsa u-symbols)) (lambda (s1 s2) (declare #.cl-user::*highly-optimized*) ;; sort chars before fixnums before strings before symbols (etypecase s1 (character (etypecase s2 (character (if (char= (the character s1) (the character s2)) :equal #+excl (and (char< (the character s1) (the character s2)) t) #-excl (char< (the character s1) (the character s2)))) ((or fixnum simple-string symbol cons) t))) (fixnum (etypecase s2 (character nil) (fixnum (if (= (the fixnum s1) (the fixnum s2)) :equal (< (the fixnum s1) (the fixnum s2)))) ((or simple-string symbol cons) t))) (simple-string (etypecase s2 ((or character fixnum) nil) (simple-string (if (string= s1 s2) :equal (and (string< s1 s2) t))) ((or symbol cons) t))) (symbol (etypecase s2 ((or character fixnum simple-string) nil) (symbol (if (eq s1 s2) :equal (and (string< (symbol-name s1) (symbol-name s2)) t))) (cons t))) ;; cons = dg (cons (etypecase s2 ((or character fixnum simple-string symbol) nil) (cons (parser::dg< s1 s2))))))) (defclass u-fsa (fixnum-states u-symbols skip-list-relations list-sets) ()) (defclass u-nfa (u-fsa nfa) ()) (defclass u-dfa (u-fsa dfa) ()) (defclass u-fst (transducer-symbols fixnum-states u-symbols skip-list-relations list-sets) ((name :initform nil :initarg :name :accessor u-fst-name))) (defmethod print-object ((obj u-fst) stream) (print-unreadable-object (obj stream) (print-object (class-name (class-of obj)) stream) (let ((name (u-fst-name obj))) (when name (format stream " ~a" name))))) (defclass u-nft (u-fst nfa) ()) (defclass u-dft (u-fst dfa) ()) (defmethod make-nfa ((fst u-fst)) (make-instance 'u-nft)) (defmethod make-u-nfa ((fst u-fst)) (make-instance 'u-nfa)) (defmethod make-u-dfa ((fst u-fst)) (make-instance 'u-dfa)) (defmethod make-dfa ((fst u-fst)) (make-instance 'u-dft)) ;; useful?? (defmethod any-determinize ((fsa u-dft)) (let ((nfa (fsa::copy-to-nfa fsa))) (any-determinize nfa))) (defmethod any-determinize ((nfa u-nft)) ;(fsa::fsa-print nfa) (relation-map #'(lambda (state relation) (declare (ignore state)) (relation-map #'(lambda (symbol any-dest) (when (or (eq symbol '@) (and (transducer-pair-p symbol) (eq (tp-upper symbol) '@))) (let ((lower (if (eq symbol '@) '@ (tp-lower symbol))) (augment (if (eq symbol '@) nil (tp-augment symbol)))) (relation-map #'(lambda (key value) (declare (ignore value)) (let* ((tp (make-transducer-pair (if (transducer-pair-p key) (tp-upper key) key) lower augment)) (new-dest (or (relation-get tp relation) (setf (relation-get tp relation) (fsa::make-state-set nfa))))) (set-map #'(lambda (dest) (set-insert dest new-dest)) any-dest))) relation)))) relation)) (fsa-delta nfa)) (minimize nfa)) (defparameter *u-fst-table* (make-hash-table)) (defmethod compile-u-transducer ((list-regexp list) &key name (fst-table *u-fst-table*) (fst-class 'u-fst) (dfa-class 'extended-dfa)) (let* ((tr (precompile-u-transducer list-regexp :name name :fst-table fst-table :fst-class fst-class)) (dtr (any-determinize tr) ;(determinize tr) )) (when name (setf (gethash name fst-table) tr)) (copy-to-one-way-fst dtr :dfa-class dfa-class))) ;(inspect *u-fst-table*) (defun precompile-u-transducer (list-regexp &key name (fst-table *u-fst-table*) (fst-class 'u-fst)) (let ((fsa (make-instance fst-class))) (labels ((walk (exp) (etypecase exp (list (ecase (car exp) (seq (minimize (apply #'fsa-concat (mapcar #'walk (cdr exp))))) (or (minimize (apply #'fsa-union (mapcar #'walk (cdr exp))))) (? (assert (null (cddr exp))) (minimize (fsa-optional (walk (cadr exp))))) (* (assert (null (cddr exp))) (minimize (fsa-closure (walk (cadr exp))))) (+ (assert (null (cddr exp))) (minimize (fsa-plus (walk (cadr exp))))))) (string (minimize (apply #'fsa-concat (map 'list #'(lambda (c) (fsa-symbol c fsa)) exp)))) (symbol (case exp ((@ %) (fsa-symbol exp fsa)) (otherwise (let ((sub-fst (gethash exp fst-table))) (or sub-fst (fsa-symbol exp fsa)))))) (transducer-pair (let ((upper (tp-upper exp)) (augment (tp-augment exp))) (cond ((eq *epsilon* upper) (fsa-symbol exp fsa)) ((stringp upper) (minimize (apply #'fsa-concat (loop for c across upper with last = (length upper) and pos = 0 and lower = (tp-lower exp) do (incf pos) collect (fsa-symbol (if (= pos last) (make-transducer-pair c lower augment) (make-transducer-pair c nil nil)) fsa))))) (t (fsa-symbol exp fsa)))))))) (let ((tr (walk list-regexp))) (when name (setf (gethash name fst-table) tr (u-fst-name tr) name)) tr)))) #+not-necessary (defmethod copy-delta ((new u-nft) (old u-dft) state-map) (relation-map #'(lambda (state relation) (let ((new-relation (symbols-get (fsa::map-state state state-map new) new))) (relation-map #'(lambda (symbol dest) (setf (relation-get symbol new-relation) (let ((set (fsa::make-state-set new))) (set-insert (fsa::map-state dest state-map new) set) set))) relation))) (fsa-delta old)) ;(fsa::fsa-print new) ) (defmethod copy-delta ((new extended-dfa) (old u-dft) state-map) ;(fsa::fsa-print old) (relation-map #'(lambda (state relation) #+debug (progn (terpri) (Print (list state relation)) (skip-list::map-skip-list (lambda (x y) (print (cons x y))) (skip-list-relations::relation-skip-list relation)) (terpri)) (let ((new-relation (symbols-get (fsa::map-state state state-map new) new))) (relation-map #'(lambda (symbol dest) (let* ((tp-p (transducer-pair-p symbol)) (upper (if tp-p (tp-upper symbol) symbol)) (lower (if tp-p (tp-lower symbol) symbol)) (augment (when tp-p (tp-augment symbol)))) #+debug (print (cons dest symbol)) (push #-no-augment (list (fsa::map-state dest state-map new) lower augment) #+no-augment (cons (fsa::map-state dest state-map new) lower) (relation-get upper new-relation)))) relation))) (fsa-delta old))) (defmethod copy-to-one-way-fst ((dft u-dft) &key (dfa-class 'extended-dfa)) (let* ((dfa (make-instance dfa-class)) (state-map (make-state-map dft))) (copy-delta dfa dft state-map) (fsa::copy-states dfa (fsa-final-states dfa) (fsa-final-states dft) state-map) (setf (fsa-start-state dfa) (fsa::map-state (fsa-start-state dft) state-map dfa)) dfa)) #+ignore (defun delta-get-u-transduction-and-state (symbol state delta) (let ((relation (relation-get state delta))) (when relation (or (relation-get symbol relation) (relation-get '@ relation) (relation-get '% relation))))) (defun delta-get-epsilon-transduction-and-state (state delta) (let ((relation (relation-get state delta))) (when relation (relation-get *epsilon* relation)))) ;; Used to change ad hoc one symbol (represented by $) in the compiled ;; transducer (by binding $ to a utp). Useful for testing roots or stems. (defparameter *$* t) (defun delta-get-$-transduction-and-state (state delta) (let ((relation (relation-get state delta))) ;; always only one next state? (when relation (car (relation-get '$ relation))))) ;; should be a fsa and/or persistent (defparameter *root-list* (make-skip-list (lambda (s1 s2) (cond ((string= s1 s2) :equal) ((string< s1 s2) t) (t nil))))) (defun add-root (root) "Adds a root to the root list." (setf (skip-list-get root *root-list*) t)) #+ignore (defparameter *c-root-list* (make-skip-list (lambda (s1 s2) (cond ((string= s1 s2) :equal) ((string< s1 s2) t) (t nil))))) #+ignore (defun add-c-root (c-root root) "Adds a root and its dg to the root list." (push root (skip-list-get c-root *c-root-list*))) (defmethod match-roots ((dfa extended-dfa) string pos) "Returns a list of pairs of roots matching string starting with pos and root dgs. A preliminary device to avoid recompiling the network when a new root is added." (loop for i from (1+ pos) to (length string) for root = (subseq string pos i) when (skip-list-get root *root-list*) collect (cons (list-to-dg `((root ,root))) i))) (defvar *noun-table* (make-hash-table :test #'equal)) (defvar *root-table* (make-hash-table :test #'equal)) (defmethod match-stems ((dfa extended-dfa) string pos) "Returns a list of pairs of stems matching string starting with pos and stem dgs. A preliminary device to avoid recompiling the network when a new stem is added." (collecting (loop for i from (1+ pos) to (length string) for stem = (subseq string pos i) do (dolist (dg (gethash stem *noun-table*)) (collect (cons dg i)))))) ;; preliminary (defun get-roots () (collecting (map-skip-list (lambda (root value) (declare (ignore value)) (collect (cons root (list-to-dg `((root ,root)))))) *root-list*))) ;(get-roots) ;; commodity function (defun print-all-roots () (map-skip-list (lambda (key value) (declare (ignore value)) (print key)) *root-list*)) ;; commodity function (defun count-all-roots () (skip-list-length *root-list*)) ;; fs is start feature structure (defmethod u-transduce (string (fsa extended-dfa) &key fs) ;;(format t "~& \"~a\"~%" string) (with-slots (fsa-delta fsa-start-state fsa-final-states) fsa (let ((length (length string)) (res ())) (labels ((step (i state dg) (cond ((= i length) (when (set-member-p state fsa-final-states) (push dg res))) (t (let* ((c (aref string i)) (states (delta-get-transduction-and-state c state fsa-delta))) ;(print states) ;; normal states (dolist (state+lower states) (let* ((lower (cadr state+lower)) (augment (caddr state+lower)) (unification (let ((augmented-dg (if augment (parser::expand-dg-one dg augment) dg))) (if lower (parser::unify augmented-dg lower) augmented-dg)))) (when unification (step (1+ i) (car state+lower) unification))))) ;; $ states (cond ((eq *$* t) ;; more than one root (let ((state+type (delta-get-$-transduction-and-state state fsa-delta))) (when state+type ;(print state+type) (dolist (lower+root-end (if (parser::get-attr (cadr state+type) 'root) (match-roots fsa string i) (match-stems fsa string i))) (destructuring-bind (lower . root-end) lower+root-end (let ((unification (if lower (parser::unify dg lower) dg))) (when unification (step root-end (car state+type) unification)))))))) (*$* (let ((state+nil ;; obs! lower is never considered! (delta-get-$-transduction-and-state state fsa-delta))) (when (and state+nil (<= (length (car *$*)) (- (length string) i)) (string= (car *$*) string :start2 i :end2 (+ i (length (car *$*))))) (let* ((lower (cdr *$*)) (unification (if lower (parser::unify dg lower) dg))) (when unification (step (+ i (length (car *$*))) (car state+nil) unification))))))))) ;; epsilon states (when (<= i length) (let* ((epsilon-states (delta-get-epsilon-transduction-and-state state fsa-delta))) (dolist (state+lower epsilon-states) (let* ((lower (cadr state+lower)) (invert-p (eq (caddr state+lower) 'not)) (augment (and (not invert-p) (caddr state+lower))) (unification (let ((augmented-dg (if augment (parser::expand-dg-one dg augment) dg))) (if lower (parser::unify augmented-dg lower) augmented-dg)))) (cond ((and (not invert-p) unification) (step i (car state+lower) unification)) ((and invert-p (not unification)) (step i (car state+lower) dg) )))))))) (step 0 fsa-start-state (or fs (parser::make-dg))) res)))) #+old (defun utp (symbol dg-list) (make-transducer-pair symbol (list-to-dg dg-list))) (defun utp (symbol dg-list &key augment) (make-transducer-pair symbol (list-to-dg dg-list) augment)) (defun utp-e (dg-list &key augment) (make-transducer-pair *epsilon* (list-to-dg dg-list) augment)) (defun utp-not (dg-list) (make-transducer-pair *epsilon* (list-to-dg dg-list) 'not)) (defparameter *transducer-table* (make-hash-table :test #'equal)) (defun make-utp (name symbol dg-list) (let ((tp (utp symbol dg-list))) (precompile-u-transducer tp :name name))) ; the captured variable morph stands for the literal morpheme (defmacro utp-or (morpheme-list feature-structure &key augment) `(cons 'or (mapcar (lambda (morph) (utp morph ,feature-structure :augment ,augment)) ,morpheme-list))) (defmacro make-utp-or (morpheme-list feature-structure &key name) `(precompile-u-transducer (utp-or ,morpheme-list ,feature-structure) :name ,name)) #+old (defun display-parses (word &key (fst *fst*)) (let ((parses (u-transduce word fst))) (if (zerop (length parses)) (parser::display-dgs nil) (mapc #'parser::display-dg parses)) (length parses))) (defun display-parses (word &key (fst *fst*)) (let ((parses (runtime (u-transduce word fst)))) ;(print parses) (display-dgs parses :string word :node-class 'parser::disjunctive-f-structure-node) (length parses) (values))) ;; --------------------------- generation ------------------------------ (defun delta-get-transductions-and-states (state delta) "Returns all states reachable from STATE in one step" (let ((relation (relation-get state delta))) (when relation (u:collecting (relation-map (lambda (symbol value) (unless (or (eq symbol *epsilon*) (eq symbol '$)) (u:collect (cons symbol value)))) relation))))) (defmethod u-generate (dg (fsa extended-dfa) &key augment-dgs root fun root-marker right-root-marker) "Takes a disjunctive feature structure as input and generates all strings accepted by the network & unification." (when dg (with-slots (fsa-delta fsa-start-state fsa-final-states) fsa (let ((count 0) (res ())) (labels ((step (i state dg str) (unless (find (code-char 196) str) (when (set-member-p state fsa-final-states) (incf count) (if fun (funcall fun str dg) (push (print str) res))) (let* ((tr+states (delta-get-transductions-and-states state fsa-delta))) ;; normal states (dolist (symbol+states tr+states) (destructuring-bind (c . states) symbol+states (dolist (state+lower states) (destructuring-bind (state lower augment) state+lower (let ((unification (let ((augmented-dg (cond ((null augment) dg) ((getf augment-dgs augment) (unify (getf augment-dgs augment) (parser::expand-dg-one dg augment))) (t (parser::expand-dg-one dg augment))))) (cond ((null augmented-dg) nil) (lower (unify augmented-dg lower)) (t augmented-dg))))) (when unification (step (1+ i) state unification (concat str (list c)))))))))) ;; $ states (cond ((eq *$* t) (let ((state+nil ;; obs! lower is never considered! (delta-get-$-transduction-and-state state fsa-delta))) (when state+nil (let* ((dg-root (cdr (get-dg-by-path dg '(root)))) (unification (if root (unify dg (list-to-dg `((root ,root)))) dg)) (root (or root dg-root))) (when unification (step (+ i (length root)) (car state+nil) unification (if root-marker (concat str root-marker root (or right-root-marker root-marker)) (concat str root)))))))) (*$* (let ((state+nil ;; obs! lower is never considered! (delta-get-$-transduction-and-state state fsa-delta))) (when state+nil (let ((unification (unify dg (list-to-dg `((root (car *$*))))))) (when unification (step (+ i (length root)) (car state+nil) unification (if root-marker (concat str root-marker root (or right-root-marker root-marker)) (concat str root)))))))) (t nil)) ;; epsilon states (let* ((epsilon-states (delta-get-epsilon-transduction-and-state state fsa-delta))) (dolist (state+lower epsilon-states) (let* ((lower (cadr state+lower)) (invert-p (eq (caddr state+lower) 'not)) (augment (and (not invert-p) (caddr state+lower))) (unification (let ((augmented-dg (cond ((null augment) dg) ((getf augment-dgs augment) (unify (getf augment-dgs augment) (parser::expand-dg-one dg augment))) (t (parser::expand-dg-one dg augment))))) (cond ((null augmented-dg) nil) (lower (unify augmented-dg lower)) (t augmented-dg))))) (cond ((and (not invert-p) unification) (step i (car state+lower) unification str)) ((and invert-p (not unification)) (step i (car state+lower) dg str))) #+ignore (when unification (step i (car state+lower) unification str)))))))) (step 0 fsa-start-state (or dg (parser::make-dg)) "") count))))) (defun generate-verb-forms (c-root dg) (loop for root in (c-root-lookup c-root) for filter-dg = (unify (get-verb-filter root) (unify (list-to-dg `((c-root ,c-root))) dg)) when filter-dg sum (u-generate filter-dg *verb-fst* :root root)))