;;;-*- Mode: Lisp; Package: TRANSDUCER -*- ;;; ;;; Finite state transducer. ;;; (C) Paul Meurer 16.12.1997 (in-package :fst) ;;; ; Special symbols: ; @ : any char ; % : alternative if no other satisfied (defclass extended-symbols () ()) (defmethod symbol-order-fn ((fsa extended-symbols)) #'(lambda (s1 s2) (declare #.cl-user::*highly-optimized*) ;; sort chars before fixnums before strings before symbols (etypecase s1 (character (etypecase s2 (character (if (char= (the character s1) (the character s2)) :equal #+excl (and (char< (the character s1) (the character s2)) t) #-excl (char< (the character s1) (the character s2)))) ((or fixnum simple-string symbol) t))) (fixnum (etypecase s2 (character nil) (fixnum (if (= (the fixnum s1) (the fixnum s2)) :equal (< (the fixnum s1) (the fixnum s2)))) ((or simple-string symbol) t))) (simple-string (etypecase s2 ((or character fixnum) nil) (simple-string (if (string= s1 s2) :equal (and (string< s1 s2) t))) (symbol t))) (symbol (etypecase s2 ((or character fixnum simple-string) nil) (symbol (if (eq s1 s2) :equal (and (string< (symbol-name s1) (symbol-name s2)) t)))))))) (defclass extended-fsa (fixnum-states extended-symbols skip-list-relations list-sets) ()) (defclass extended-nfa (extended-fsa nfa) ()) (defclass extended-dfa (extended-fsa dfa) ()) (defclass fst (transducer-symbols fixnum-states extended-symbols skip-list-relations list-sets) ()) (defclass nft (fst nfa) ()) (defclass dft (fst dfa) ()) (defmethod make-nfa ((fst fst)) (make-instance 'nft)) (defmethod make-extended-nfa ((fst fst)) (make-instance 'extended-nfa)) (defmethod make-extended-dfa ((fst fst)) (make-instance 'extended-dfa)) (defmethod make-dfa ((fst fst)) (make-instance 'dft)) (defmethod any-determinize ((fsa dft)) (let ((nfa (fsa::copy-to-nfa fsa))) (any-determinize nfa))) (defmethod any-determinize ((nfa nft)) (relation-map #'(lambda (state relation) (declare (ignore state)) (relation-map #'(lambda (symbol any-dest) (when (or (eq symbol '@) (and (transducer-pair-p symbol) (eq (tp-upper symbol) '@))) (let ((lower (if (eq symbol '@) '@ (tp-lower symbol))) (augment (if (eq symbol '@) nil (tp-augment symbol)))) (relation-map #'(lambda (key value) (declare (ignore value)) (let* ((tp (make-transducer-pair (if (transducer-pair-p key) (tp-upper key) key) lower augment)) (new-dest (or (relation-get tp relation) (setf (relation-get tp relation) (fsa::make-state-set nfa))))) (set-map #'(lambda (dest) (set-insert dest new-dest)) any-dest))) relation)))) relation)) (fsa-delta nfa)) (minimize nfa)) (defparameter *fst-table* (make-hash-table)) (defmethod compile-transducer ((list-regexp list) &key name (fst-table *fst-table*)) (let ((fsa (make-instance 'fst))) (labels ((walk (exp) (etypecase exp (list (ecase (car exp) (seq (minimize (apply #'fsa-concat (mapcar #'walk (cdr exp))))) (or (minimize (apply #'fsa-union (mapcar #'walk (cdr exp))))) (? (assert (null (cddr exp))) (minimize (fsa-optional (walk (cadr exp))))) (* (assert (null (cddr exp))) (minimize (fsa-closure (walk (cadr exp))))) (+ (assert (null (cddr exp))) (minimize (fsa-plus (walk (cadr exp))))))) (string (minimize (apply #'fsa-concat (map 'list #'(lambda (c) (fsa-symbol c fsa)) exp)))) (symbol (case exp ((@ %) (fsa-symbol exp fsa)) (otherwise (let ((sub-fst (gethash exp fst-table))) (or sub-fst (fsa-symbol exp fsa)))))) (transducer-pair (let ((upper (tp-upper exp))) (if (stringp upper) (minimize (apply #'fsa-concat (loop for c across upper with first = t and lower = (tp-lower exp) and augment = (tp-augment exp) collect (fsa-symbol (cond (first (setq first nil) (make-transducer-pair c (or lower *epsilon*) augment)) (t (make-transducer-pair c (case lower ((@ %) lower) (otherwise *epsilon*)) (case lower ((@ %) augment) (otherwise nil))))) fsa)))) (fsa-symbol exp fsa))))))) (let* ((tr (walk list-regexp)) (dtr (any-determinize tr))) (when name (setf (gethash name fst-table) tr)) (fsa::fsa-print tr) (fsa::fsa-print dtr) (copy-to-one-way-fst dtr))))) (defmethod copy-delta ((new extended-dfa) (old dft) state-map) (relation-map #'(lambda (state relation) (let ((new-relation (symbols-get (fsa::map-state state state-map new) new))) (relation-map #'(lambda (symbol dest) (let* ((tp-p (transducer-pair-p symbol)) (upper (if tp-p (tp-upper symbol) symbol)) (lower (if tp-p (tp-lower symbol) symbol))) (push (cons (fsa::map-state dest state-map new) lower) (relation-get upper new-relation)))) relation))) (fsa-delta old))) (defmethod copy-to-one-way-fst ((dft dft) &key &allow-other-keys) (let* ((dfa (make-extended-dfa dft)) (state-map (make-state-map dft))) (copy-delta dfa dft state-map) (fsa::copy-states dfa (fsa-final-states dfa) (fsa-final-states dft) state-map) (setf (fsa-start-state dfa) (fsa::map-state (fsa-start-state dft) state-map dfa)) dfa)) (defun delta-get-transduction-and-state (symbol state delta) (let ((relation (relation-get state delta))) (when relation (or (relation-get symbol relation) (relation-get '@ relation) (relation-get '% relation))))) (defmethod transduce (vector (fsa extended-dfa)) ;(format t "~& \"~a\"~%" vector) (with-slots (fsa-delta fsa-start-state fsa-final-states) fsa (let ((length (length vector)) (res ())) (labels ((step (i state node) (cond ((= i length) (when (set-member-p state fsa-final-states) (push node res))) (t (let* ((c (aref vector i)) (states (delta-get-transduction-and-state c state fsa-delta))) (dolist (state+lower states) (let* ((lower (cdr state+lower)) (leaf (case lower (#.*epsilon* nil) ((@ %) (string c)) (otherwise (string lower)))) (subst-leaf (if (stringp leaf) (substitute c #\@ leaf) leaf)) (leaf-list (if subst-leaf (cons subst-leaf node) node))) (step (1+ i) (car state+lower) leaf-list)))))))) (step 0 fsa-start-state nil) (mapcar #'nreverse res) #+woll(mapcar #'(lambda (branch) (let ((transduction "")) (dolist (str branch) (when str (setq transduction (utils:concat str transduction)))) transduction)) res))))) #| (transduce "cbaebabababaaaasfewrtrewtyaa" *fst*) (setf *fst* (compile-transducer '(:* (:or #[:@] #["c"/"?"] #["ba"/"."])))) (setf *fst* (compile-transducer '(* (or #[@] #["c"/"?"] #["ba"/"."])))) (fsa::fsa-print (compile-transducer '(:seq #["b"/"y"]))) (fsa::fsa-print (compile-transducer '(:seq "y"))) (fsa::fsa-print *fst*) (compile-transducer '(seq (or (seq (* #[@]) #[@ /"@"]) (seq (* #[@]) #[@ /"@"] (? #["n"/"(n)"]) #["ing"/"(@)"])) (* (seq (? (or #["s"/"(s)"] #["e"/"(e)"])) (or (+ #[@]) (seq (+ #[@]) (? #["n"/"(n)"]) #["ing"/"(@)"])))))) (compile-transducer '(or #["sving"/ @] #["flyv"/ @]) :name ') (compile-transducer '(or #["hus"/ @] #["barn"/ @]) :name ') (defparameter *fst* (compile-transducer '(seq (or (seq (? #["n"/"(n)"]) #["ing"/"(ing)"])) (* (seq (? (or #["s"/"(s)"] #["e"/"(e)"])) (or (seq (? #["n"/"(n)"]) #["ing"/"(ing)"]))))))) (time (dotimes (i 100) (transduce "svingning" *fst*))) (transduce "svinging" *fst*) (defparameter *fst* (compile-transducer '(seq (? (seq (or #["da"/"(pv)"] #["ga"/"(pv)"] #["ca"/"(pv)"]) (? #["mo"/"(dir-pv)"]))) (? (or (seq (or #["m"/"(1sg-obj)"] #["g"/"(2obj)"]) (? (or #["i"/"(cv-i)"] #["e"/"(cv-e)"] #["a"/"(cv-a)"]))) (seq (? (or #["v"/"(1subj)"] #["h"/"(3obj)"])) (? (or #["u"/"(cv-u)"] #["i"/"(cv-i)"] #["e"/"(cv-e)"] #["a"/"(cv-a)"]))))) (or "g" "cer" "Cd" "ket" "XeX") ; root (? (or #["eb"/"(stf)"] #["ob"/"(stf)"] #["av"/"(stf)"] #["i"/"(stf)"])) (? (or #["s"/"(3sg-subj)"] #["en"/"(3pl-subj)"] #["t"/"(12pl-subj)"]))))) (transduce "gaigeb" *fst*) (transduce "gavaketeben" *fst*) (fsa::fsa-print *fst*) (defparameter *cv-fst* (compile-transducer '(* (or #["a"/"Va"] #["e"/"Ve"] #["i"/"Vi"] #["o"/"Vo"] #["u"/"Vu"] #[% /"C@"] )))) (transduce "wollwutz" *cv-fst*) (fsa::fsa-print *cv-fst*) |#