;;;-*- Mode: Lisp; Package: FSA -*- ;;; ;;; (C) Paul Meurer, HIT-centre, 2000 ;;; paul.meurer@hit.uib.no (in-package :fsa) ;; connect to the database containing the dictionary (defclass boolean-regexp-symbols () ()) (defmethod fsa-list< ((fsa boolean-regexp-symbols) s1 s2) (cond ((null s1) (if s2 t :equal)) ((null s2) nil) (t (let ((car-equal-p (funcall (symbol-order-fn fsa) (car s1) (car s2)))) (if (eq car-equal-p :equal) (fsa-list< fsa (cdr s1) (cdr s2)) car-equal-p))))) (defmethod symbol-order-fn ((fsa boolean-regexp-symbols)) (lambda (s1 s2) (declare #.cl-user::*highly-optimized*) ;; sort chars before fixnums before strings before symbols before lists (etypecase s1 (character (etypecase s2 (character (if (char= (the character s1) (the character s2)) :equal #+excl (and (char< (the character s1)(the character s2)) t) #-excl (char< (the character s1) (the character s2)))) ((or fixnum simple-string symbol list) t))) (fixnum (etypecase s2 (character nil) (fixnum (if (= (the fixnum s1) (the fixnum s2)) :equal (< (the fixnum s1) (the fixnum s2)))) ((or simple-string symbol list) t))) (simple-string (etypecase s2 ((or character fixnum) nil) (simple-string (if (string= s1 s2) :equal (and (string< s1 s2) t))) ((or symbol list) t))) (symbol (etypecase s2 ((or character fixnum simple-string) nil) (symbol (if (eq s1 s2) :equal (and (string< (symbol-name s1) (symbol-name s2)) t))) (list t))) (list (etypecase s2 ((or character fixnum simple-string symbol) nil) (list (fsa-list< fsa s1 s2))))))) #+test (defparameter *cp* (dfa-compile-parsed-boolean-list '(:* a) (make-instance 'boolean-list-fsa))) (defclass boolean-list-fsa (standard-states::fixnum-states boolean-regexp-symbols boolean-list-relations list-sets::list-sets) ()) ;(make-instance 'standard-states::fixnum-states) (defclass boolean-list-nfa (boolean-list-fsa nfa) ()) (defclass boolean-list-dfa (boolean-list-fsa dfa) ()) (defmethod fsa-print ((fsa boolean-list-fsa) &optional (stream *standard-output*)) (format stream "start: ~s~%final: ~s~%" (fsa-start-state fsa) (fsa-final-states fsa)) (relation-map #'(lambda (state relation) (format stream "~2d: ~%" state) (relation-map #'(lambda (symbol dest) (format stream " ~a : ~a~%" dest symbol)) relation) (terpri stream)) (fsa-delta fsa))) ;(fsa::fsa-print *cp*) (defmethod make-state-relation ((fsa boolean-list-fsa)) (make-instance 'skip-list-relations::skip-list-relation :order-fn (state-order-fn fsa)) #+ignore (make-relation (state-order-fn fsa) fsa)) (defmethod make-nfa ((fsa boolean-list-fsa)) (make-instance 'boolean-list-nfa)) (defmethod make-dfa ((fsa boolean-list-fsa)) (make-instance 'boolean-list-dfa)) (defmethod dfa-compile-parsed-boolean-list ((list-regexp list) (fsa boolean-list-fsa)) (labels ((walk (exp) (etypecase exp (list (ecase (car exp) (:seq (minimize (apply #'fsa-concat (mapcar #'walk (cdr exp))))) (:or (minimize (apply #'fsa-union (mapcar #'walk (cdr exp))))) (:? (assert (null (cddr exp))) (minimize (fsa-optional (walk (cadr exp))))) (:* (assert (null (cddr exp))) (minimize (fsa-closure (walk (cadr exp))))) (:+ (assert (null (cddr exp))) (minimize (fsa-plus (walk (cadr exp))))) (:and (fsa-symbol (cdr exp) fsa)))) (string (minimize (apply #'fsa-concat (map 'list (lambda (c) (fsa-symbol c fsa)) exp)))) (symbol (case exp (:@ (fsa-symbol exp fsa)) (otherwise (fsa-symbol exp fsa))))))) ;;(any-determinize (walk list-regexp))) #| ;; the input: -- compoundregexp -- JBJ, received Nov. 16, 1998 -- partadj removed -- N | N S replaced by N S? compound ((( P | (N & sg & indef ) |(N & sg & indef S)| (N & sg & indef) | (N & sgpl & indef S) | (V & imp) | infimp | (Det & quant & !indef) | Adv | Adj | (Pro & nom) | (Pro & nomacc)) E?)* HYPHEN?)* (Adv | N S? | V | P | Adj) suspicious . & SHORT & !FREQUENT_AS_COMPOUND |# #+test (defparameter *cp* (dfa-compile-parsed-boolean-list '(:seq (:* (:seq (:* (:seq (:or P (:seq (:and N (:or sg sgpl) indef) (:? S)) (:and C imp) infimp (:and Det quant (:not indef)) Adv Adj (:and Pro nom) (:and Pro nomacc)) (:? E))) (:? Hyphen))) (:or Adv (:seq N (:? S)) V P Adj)) (make-instance 'boolean-list-fsa))) ;(analyze-compound "barnebarn" *cp*) ;(morphology "gave") (defun morphology (word-form) (mapcar #'read-from-string (select [concat "(" [replace [gram_info] "_" " "] ")"] :from [norkompleks lexicon] :where [= [word_form] ?word-form] :flatp t))) (defparameter *bl-fsa* (dfa-compile-parsed-boolean-list '(:seq (:* (:seq (:* (:seq (:or P (:seq (:and N (:or sg sgpl) indef) (:? S)) (:and C imp) infimp (:and Det quant (:not indef)) Adv Adj (:and Pro nom) (:and Pro nomacc)) (:? E))) (:? Hyphen))) (:or Adv (:seq N (:? S)) V P Adj)) (make-instance 'boolean-list-fsa))) #+test (fsa::fsa-print *cp*) (defparameter *memo-table* (make-hash-table :test #'equal)) (defun boolean-list-delta-get (symbol state delta) (let ((relation (relation-get state delta)) (result ())) (when relation (loop for morph in (multiple-value-bind (gram foundp) (gethash symbol *memo-table*) (if foundp gram (setf (gethash symbol *memo-table*) (select [concat "(" [replace [gram_info] "_" " "] ")"] :from [norkompleks lexicon] :where [= [word_form] ?symbol] :flatp t)))) do (let* ((morph (read-from-string morph)) (states (relation-subsumed-get morph relation))) (if (listp states) (dolist (state states) (pushnew (cons state morph) result :test #'equal)) (pushnew (cons states morph) result :test #'equal)))) result))) (defmethod analyze-boolean-list-regexp (string (fsa boolean-list-dfa)) (let ((delta (fsa-delta fsa)) (start-state (fsa-start-state fsa)) (length (length string)) (result ())) (labels ((walk (state start end list) (list state (subseq string start end)) (let* ((substring (subseq string start end)) (new-states (boolean-list-delta-get substring state delta))) (dolist (new-state+morph new-states) (let ((new-state (car new-state+morph))) (cond ((= end length) (when (set-member-p new-state (fsa-final-states fsa)) (push (reverse (cons substring ;;(cons substring (cdr new-state+morph)) list)) result))) (t (unless (>= end (1- length)) (walk new-state end (+ end 1) (cons substring ;;(cons substring (cdr new-state+morph)) list))))))) (unless (= end length) (walk state start (1+ end) list))))) (walk start-state 0 2 nil) result)))