(in-package "CGP") #+test (time (disambiguate-file "projects:cgp;training;delkorp.cor" "projects:cgp;training;delkorp-lisp.dis")) (let* ((*cg* (gethash "nbo-navn-kh" *cg-table*)) (*tagger* (multi-tagger *cg*))) (with-open-file (stream ;; "/home/paul/raatekst3.txt" "/home/paul/koordinerte-fraser.txt" :direction :input) (with-open-file (out-stream "/home/paul/koordinerte-fraser-out.txt" ;;"/home/paul/raatekst3-out.txt" :direction :output :if-exists :supersede) (disambiguate-stream (make-instance 'tokenizer) stream :print-function (lambda (s) (print-sentence s :stream out-stream)))))) (let* ((*cg* (gethash "nbo-navn-kh" *cg-table*)) (*tagger* (multi-tagger *cg*))) (with-open-file (stream "projects:cgp;testcorpus;test-prop.txt" :direction :input) (with-open-file (out-stream "projects:cgp;testcorpus;test-prop.out" :direction :output :if-exists :supersede) (print out-stream) (disambiguate-stream (make-instance 'tokenizer) stream ;;:tagging-niveau :syntactic-named-entity-disambiguation ;;:context-size 1000 :print-function (lambda (s) #-ignore (print-sentence s :stream out-stream)))))) (let* ((*cg* (gethash "nbo" *cg-table*)) (*tagger* (multi-tagger *cg*))) (with-open-file (stream "projects:cgp;testcorpus;test-docu-bug.txt" ;;"projects:cgp;testcorpus;liten.txt" :direction :input) (with-open-file (out-stream "projects:cgp;testcorpus;test-docu-bug.out" :direction :output :if-exists :supersede) (print out-stream) (disambiguate-stream (make-instance 'tokenizer) stream :tagging-niveau :syntactic-named-entity-disambiguation :context-size 1000 :print-function (lambda (s) #-ignore (print-sentence s :stream out-stream)))))) (with-open-file (stream "projects:cgp;training;test.cor" :direction :input) (disambiguate-stream stream)) (disambiguate-nth-sentence 321 :pprint t) (disambiguate-nth-sentence 378 :pprint t) (show-rule 1741) target ((subst)) constraints ((not 0 farlige-mask-subst) (not 0 farlige-nØyt-subst) (-1c adj) (-2c prep)) boundary-mode nil frequency 542556 id 1121 type =! domain nil (show-rule 896) cg # target ((verb perf-part)) constraints ((not 0 farlige-part) (not -1 det-prep-infmerke-adjbe) (not -2 be-nØyt-det) (*-1 p-hj-verb *r) (not *r fv) (*-1 p-hj-verb *r) (not *r perf-part) (*-1 p-hj-verb *r) (not *r setn-gr) (*-1 setn-gr *r) (*r nom)) boundary-mode nil frequency 42071 id 896 type =! domain nil (show-rule 569) target ((clb)) constraints ((-1 >>>) (0 clb/konj-konj) (not 1 sbu) (not 1 spØrreadv) (not 1 %selv%)) boundary-mode nil frequency 57856 id 569 type =! domain nil target ((clb)) constraints ((not -1 replikk) (not -1 clb) (not -2 strek) (0 clb/konj-konj) (*-1c setn-gr *r) (not *r fv)) boundary-mode nil frequency 47736 id 551 type =0 domain nil target ((sbu)) constraints ((not 0 %som%) (-1 >>>) (not 1 pres/pret) (*1 int-setn-gr *l) (*l pres/pret) (*2 pres/pret *l) (not lr0 farlige-verb *l) (not lr0 subst-gen l-1) (not l-1 prep/komma) (*1 int-setn-gr r+1) (r+1 pres/pret *r) (not lr0 farlige-verb) (*1 int-setn-gr l-1) (not l-1 komma)) boundary-mode nil frequency 1345 id 1636 type =! domain nil target ((inf-merke)) constraints ((1 inf)) boundary-mode nil frequency 3450 id 23 type =! domain "<å>" target ((subst)) constraints ((not 0 farlige-mask-subst) (not 0 farlige-nØyt-subst) (-1c adj) (-2c prep)) boundary-mode nil frequency 72024 id 1121 type =! domain nil cg # target ((subst prop gen)) constraints ((0 subst-prop) (not 1 adj) (not 1 subst) (not 1 det) (not 1 anf) (not 1 setn-gr)) boundary-mode nil frequency 318 id 1110 type =0 domain nil cg # target ((sbu)) constraints ((*1c setn-gr *l) (not *l pres/pret)) boundary-mode nil frequency 3798 id 490 type =0 domain nil cg # target ((sbu)) constraints ((0 %om/enn/som/hvis%) (not 1 farlige-part) (1 perf-part) (not 1 be-ent-nØyt-subst) (not 1 be-ent-fem-subst) (not 2 subst/adj) (*1 setn-gr *l) (not *l pres/pret)) boundary-mode nil frequency 2850 id 499 type =! domain nil cg # target ((verb pret)) constraints ((not 0 farlige-forledd-verb) (*-2c setn-gr r+1) (r+1 prep *r) (not *r ikke-adv-adj-subst-pron-konj-det2) (-1c subst/pron) (not -1 %noen%) (not -1 gen)) boundary-mode nil frequency 3481 id 825 type =! domain nil cg # target ((adj fl)) constraints ((not 0 gen) (1c subst/adj) (not 1 fl)) boundary-mode nil frequency 4189 id 1257 type =0 domain nil (apply-rule 1341 #(((>>>) (>>>)) (("<*beholderen>") ("beholder" subst mask appell ent be )) (("") ("være" verb pres )) (("" "490=0") ("til" prep )) (("<å>" "23=!" "641=0") ("å" inf-merke )) (("") ("begynne" verb inf )) (("") ("med" prep )) (("" "1240=0" "1240=0" "308=!") ("hel" adj pos nøyt ub ent )) (("" "704=0") ("blank" adj pos mask fem ub ent )) (("" "548=0" "1010=0") ("og" konj )) (("") ("gjennomsiktig" adj pos mask fem ub ent ) ("gjennomsiktig" adj pos nøyt ub ent)) (("<$.>") ("$." clb ) (<<<)) ;((<<<) (<<<)) ) 10 nil) (with-open-file (stream "projects:cgp;training;delkorp.cor" :direction :input) (nth-sentence-stream 100 stream)) (disambiguate-nth-sentence 187) (pprint (disambiguate #(((>>>) (>>>)) (("<*som>") ("som" prep) ("som" sbu )) (("") ("dokumentere" adj mask fem ub ent ) ("dokumentere" adj nøyt ub ent ) ("dokumentere" verb perf-part )) (("") ("i" prep )) (("") ("en" adv) ("en" det kvant mask ent ) ("en" pron pers ent hum) ("ene" verb imp )) (("") ("rekke" subst fem appell ent ub) ("rekke" subst mask appell ent ub ) ("rekke" verb inf ) ("rekke" verb inf ) ("rekke" verb inf )) (("") ("forskningsrapport" subst mask appell fl ub )) (("") ("fra" prep )) (("") ("mange" adj komp )) (("") ("uavhengig" adj pos mask fem nøyt be ent) ("uavhengig" adj pos ub be fl )) (("") ("forskningsmiljø" subst nøyt appell fl ub )) (("<$,>") ("$," clb ) ("$," )) (("") ("bl.a." fork prep+adj prep @adv )) (("<$.>") ("$." clb <<<)))))