;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;; ;; Copyright (C) Paul Meurer 2000 - 2004. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen ;; ;; Reimplementation in ANSI Common Lispå of the compound analyser for Norwegian (C program) ;; written by Helge Hauglin and Lars Jørgen Tvedt, UiO 1999 ;; Used in the CG parser/tagger system (Oslo-tagger) developed at UiO ;; (Dokumentasjonsprosjektet; Tekstlaboratoriet) ;; ;;------------------------------------------------------------------------------------- ;; TO DO: ;; - CLOSify (partially done) ;; - get rid of ranking-info structure, lazy eval instead ;; - rewrite the whole thing as a fsa! ;; ;; QUESTIONS: ;; - why aren't numbers lemmata? ;;------------------------------------------------------------------------------------- (in-package "CGP") (defparameter *aux-lex-net* (let ((string-net (make-instance 'string-net::list-string-net)) (*print-circle* t)) (setf (list-net string-net) (cadr '(x #1=((#\1 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\2 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\3 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\4 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\5 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\6 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\7 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\8 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\9 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#) (#\0 (#\: (#\: (#\Space (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\null (#\^p (#\null (#\null (#\null (#\^p NIL))))))))))))))))))) . #1#))))) (list-net string-net))) #+test (print (coerce (bit-vector-to-string (encode-features (feature-vector *nbo-tagger*) 'det 'fl 'kvant)) 'list)) (let* ((tagger *nbo-tagger*) (mfv (map 'vector (lambda (f) (string-downcase f)) (feature-vector tagger)))) (setf (compound-analyser tagger) (make-instance 'compound-analyser-lexicon :morph-feature-vector mfv :morph-feature-table (let ((mft (dat::make-string-tree))) (loop for i from 0 for f across mfv do (setf (dat:string-tree-get mft (string-downcase f)) i)) mft) :fullform-net (lexicon tagger) :aux-net *aux-lex-net* :unknown-feature 'ukjent :package :cgp :compound-regexp (make-instance 'feature-regexp :name "Regexp for compound analyser" :source-regexp `(:or (:seq ;; first chunk (:or ukjent (:and subst ub ent) (:and verb inf) (:seq (:and det kvant) (:? "-")) ;; ?? (:seq (:and subst ub ent ,morph::immediate-pre-s-juncture) :s-juncture) (:seq (:and subst ub ent ,morph::pre-e-juncture) :e-juncture)) ;; inner chunks (:* (:or (:and subst ub ent) (:and verb inf) (:seq (:and subst ub ent ,morph::pre-s-juncture) :s-juncture) (:seq (:and subst ub ent ,morph::pre-e-juncture) :e-juncture))) ;; last chunk (:or subst adj)) ;; number (:seq (:+ (:and det kvant)) (:? (:seq (:and "og" konj) (:and det kvant)))) ;; verb #-test (:seq ;; first chunk (:or "u" "små") ;; last chunk (:and adj )) #+test (:seq ;; first chunk (:or ukjent det) ;; last chunk (:and adj))))))) #+nny-parser (let ((tagger *nny-tagger*)) (setf (compound-analyser tagger) (make-instance 'compound-analyser-lexicon :morph-feature-vector (feature-vector tagger) :morph-feature-table (let ((mft (dat::make-string-tree))) (loop for i from 0 for f across (feature-vector tagger) do (setf (dat:string-tree-get mft (string-downcase f)) i))) :fullform-net (lexicon tagger) :aux-net *aux-lex-net* :unknown-feature 'ukjent :package :cgp :compound-regexp (make-instance 'feature-regexp :name "Regexp for compound analyser" :source-regexp `(:or (:seq ;; first chunk (:or ukjent (:and subst ub ent) (:and verb inf) (:and det kvant) (:seq (:and subst ub ent ,morph::immediate-pre-s-juncture) :s-juncture) (:seq (:and subst ub ent ,morph::pre-e-juncture) :e-juncture)) ;; inner chunks (:* (:or (:and subst ub ent) (:and verb inf) (:seq (:and subst ub ent ,morph::pre-s-juncture) :s-juncture) (:seq (:and subst ub ent ,morph::pre-e-juncture) :e-juncture))) ;; last chunk subst) ;; number (:seq (:+ (:and det kvant)) (:? (:seq (:and "og" konj) (:and det kvant))))))))) :eof