;;;-*- Mode: Lisp; Package: CONSTRAINT-GRAMMAR-PARSER -*- (in-package :cgp) (setf *cg* *nbo-cg* *tagger* *nbo-tagger*) (print (get-features "fisk")) (print (tag-compound "fisk")) (print (analyse-compound "fiskeskjær")) (defun classify-words (file out-file &key pos-only-p) (with-open-file (stream out-file :direction :output :if-exists :supersede) (with-file-lines (word file) (let* ((ordbank (get-features word)) (scarrie (unless ordbank (get-scarrie-features word)))) (multiple-value-bind (segmentation features) (unless (or ordbank scarrie) (most-probable-compound word)) (format stream "~a~c~a~c" (or segmentation word) #\Tab (cond (ordbank "ordbank") (scarrie "scarrie") (segmentation "samset") (t "ukjent")) #\Tab) (let ((features (remove-feature-inclusions (or features scarrie ordbank))) (seen-lfs ())) (if pos-only-p (loop for fl in features when (car fl) do (let* ((cf (car (code-features (cdr fl)))) (lf (cons (car fl) cf))) (unless (find lf seen-lfs :test #'equal) (push lf seen-lfs) (write-string "[\"" stream) (write-string (car fl) stream) (write-string "\" " stream) (write-string (stringify cf) stream) (write-string "]" stream)))) (loop for fl in features when (car fl) do (write-string "[\"" stream) (write-string (car fl) stream) (write-char #\" stream) (mapc (lambda (f) (write-string " " stream) (write-string (stringify f) stream)) (code-features (cdr fl))) (write-string "]" stream)))) (terpri stream)))))) ;; adlyde+t ?? (classify-words "projects:cgp;texts;nyord.txt" "projects:cgp;texts;nyord.ann") (progn (classify-words "projects:cgp;texts;hyphen-unix.txt" "projects:cgp;texts;hyphen-unix.ann") (classify-words "projects:cgp;texts;remaining-unix.txt" "projects:cgp;texts;remaining-unix.ann")) (print (analyse-compound "abonnementsbibliotek")) (print (analyse-compound "ablegøyemakeri")) (print (analyse-compound "ablegøye")) (print (analyse-compound "makeri")) ("bibliotek" "811:1" "810:3" "810:1" "801:1" "800:3" "800:1") (print (most-probable-compound-last-chunk "abonnementsbibliotek" t)) (print (most-probable-compound "abonnementsbibliotek" t)) (print (grammar-code-to-features "bibliotek" "811:1" :features-as-list-p t)) (print (get-features "fisk")) (print (tag-compound "überhjelperne")) :eof