;;;-*- Mode: Lisp; Package: CONSTRAINT-GRAMMAR-PARSER -*- (in-package :cgp) #+test (disambiguate-file #p"/tmp/ch7-stprp360001-v02.txt" #p"/tmp/ch7-stprp360001-v02.tag" :cg (gethash "nbo" *cg-table*) :tagging-niveau :syntactic-disambiguation :print-function (lambda (s &key stream &allow-other-keys) (print-sentence s :stream stream :print-rules nil))) #+test (time (with-open-file (in-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpbm_fork.cor" #+ignore"lisp:projects;cgp;testcorpus;delkorpbm_fork.cor") (with-open-file (out-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpbm_fork.out" #+ignore"lisp:projects;cgp;testcorpus;delkorpbm_fork.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nbo" *cg-table*) :in-stream in-stream :out-stream out-stream)))) ;; Pentium 1GHz, 1CPU/Allegro ;; cpu time (non-gc) 494,420 msec (00:08:14.420) user, 1,500 msec system ;; cpu time (gc) 4,210 msec user, 20 msec system ;; cpu time (total) 498,630 msec (00:08:18.630) user, 1,520 msec system ;; real time 529,450 msec (00:08:49.450) ;; space allocation: ;; 84,670,377 cons cells, 1,410,102,712 other bytes, 45576 static bytes ;; 2 CPU: ;; cpu time (non-gc) 514,660 msec (00:08:34.660) user, 930 msec system ;; cpu time (gc) 8,020 msec user, 40 msec system ;; cpu time (total) 522,680 msec (00:08:42.680) user, 970 msec system ;; real time 539,590 msec (00:08:59.590) ;; space allocation: ;; 84,669,908 cons cells, 1,410,628,656 other bytes, 68648 static bytes ;; Pentium 1GHz, 1CPU/CMUCL ;;Evaluation took: ;; 505.96 seconds of real time ;; 472.37 seconds of user run time ;; 17.69 seconds of system run time ;; [Run times include 81.09 seconds GC run time] ;; 0 page faults and ;; 4392071936 bytes consed. #+test (time (with-open-file (in-stream #-mcl#p"/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.cor" #+mcl"lisp:projects;cgp;testcorpus;delkorpnn_fork.cor") (with-open-file (out-stream #-mcl#p"/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.out" #+mcl"lisp:projects;cgp;testcorpus;delkorpnn_fork.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nny" *cg-table*) :in-stream in-stream :out-stream out-stream)))) ;; cpu time (non-gc) 771,100 msec (00:12:51.100) user, 2,210 msec system ;; cpu time (gc) 9,090 msec user, 10 msec system ;; cpu time (total) 780,190 msec (00:13:00.190) user, 2,220 msec system ;; real time 789,846 msec (00:13:09.846) ;; space allocation: ;; 105,427,451 cons cells, -2,366,072,888 other bytes, 95000 static bytes ;; 2 CPU: ;; cpu time (non-gc) 759,270 msec (00:12:39.270) user, 1,600 msec system ;; cpu time (gc) 10,030 msec user, 0 msec system ;; cpu time (total) 769,300 msec (00:12:49.300) user, 1,600 msec system ;; real time 775,525 msec (00:12:55.525) ;; space allocation: ;; 105,427,697 cons cells, 1,929,618,448 other bytes, 48856 static bytes ;; CMUCL: ;; 3775.54 seconds of real time ;; 3598.06 seconds of user run time ;; 85.3 seconds of system run time ;; [Run times include 392.63 seconds GC run time] ;; 447 page faults and ;; 23541292312 bytes consed. ;; Allegro: ;; cpu time (non-gc) 4,011,570 msec (01:06:51.570) user, 12,290 msec system ;; cpu time (gc) 79,570 msec (00:01:19.570) user, 500 msec system ;; cpu time (total) 4,091,140 msec (01:08:11.140) user, 12,790 msec system ;; real time 4,401,428 msec (01:13:21.428) ;; space allocation: ;; 318,027,308 cons cells, 1,783,891,944 other bytes, 89288 static bytes #+test ;; syntactic disambiguation (time (with-open-file (in-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpbm_fork.cor") (with-open-file (out-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpbm_fork_synt.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nbo" *cg-table*) :in-stream in-stream :out-stream out-stream :tagging-niveau :syntactic-disambiguation)))) ;; cpu time (non-gc) 1,119,570 msec (00:18:39.570) user, 4,950 msec system ;; cpu time (gc) 6,130 msec user, 220 msec system ;; cpu time (total) 1,125,700 msec (00:18:45.700) user, 5,170 msec system ;; real time 1,138,191 msec (00:18:58.191) ;; space allocation: ;; 295,274,769 cons cells, -979,339,888 other bytes, 120640 static bytes #+test (time (with-open-file (in-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.cor") (with-open-file (out-stream #p"/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork_synt.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nny" *cg-table*) :in-stream in-stream :out-stream out-stream :tagging-niveau :syntactic-disambiguation)))) (defun map-multitagged-sentences-errors-only (stream fun &key (sentence-class *sentence-class*) print-function &allow-other-keys) (map-multitagged-sentences stream fun :print-function print-function :sentence-class sentence-class :print-only-sentences-with-errors-p t)) (defun map-multitagged-sentences (stream fun &key (sentence-class *sentence-class*) print-function print-only-sentences-with-errors-p &allow-other-keys) (let ((*sentence-class* sentence-class) (*record-discarded-correct-readings-p* t)) (with-stream-sentences (sentence stream :sentence-class sentence-class) (incf *sentence-count*) (dolist (features (token-features (last-token sentence))) (set-feature (cdr features) '<<<)) (map-tokens sentence (lambda (token) (incf *correct-count* (count-if (lambda (features) (and (not (null features)) (has-feature-p features '))) (token-features token) :key #'cdr)))) (let ((tagged-sentence (funcall fun sentence))) (multiple-value-bind (token-count reading-count error-count) (sentence-statistics sentence) (incf *token-count* token-count) (incf *reading-count* reading-count) (unless (zerop error-count) (incf *error-count* error-count) (setf (gethash *sentence-count* *error-table*) sentence)) (when (or (not print-only-sentences-with-errors-p) (> error-count 0)) (funcall print-function tagged-sentence))))))) (defmethod sentence-statistics ((sentence sentence)) (let ((token-count 0) (reading-count 0) (error-count 0)) (map-tokens sentence (lambda (token) (when token (incf token-count) (incf error-count (count-if (lambda (features) (and (not (null features)) (not (has-feature-p features ')))) (token-features token) :key #'cdr)) (incf reading-count (count-if (lambda (features) (not (null features))) (token-features token) :key #'cdr))))) (values token-count reading-count error-count))) (defparameter *test-corpora* '("test" "delkorpbm-fork1" "delkorpbm-fork2" "delkorpbm-fork3" "delkorpbm-fork4" "delkorpbm-fork")) #+test (with-open-file (in-stream "projects:cgp;training;delkorpbm_fork1.cor") (with-open-file (out-stream "projects:cgp;training;delkorpbm_fork1.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nbo" *cg-table*) :in-stream in-stream :out-stream out-stream))) #+test (time (with-open-file (in-stream ;; "projects:cgp;testcorpus;delkorpbm_fork.cor" "/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.cor") (with-open-file (out-stream ;;"projects:cgp;testcorpus;delkorpbm_fork.out" "/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nbo" *cg-table*) :in-stream in-stream :out-stream out-stream)))) #+test (time (with-open-file (in-stream ;; "projects:cgp;testcorpus;delkorpn_fork.cor" "/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.cor") (with-open-file (out-stream ;;"projects:cgp;testcorpus;delkorpnn_fork.out" "/home/paul/lisp/projects/cgp/testcorpus/delkorpnn_fork.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nny" *cg-table*) :in-stream in-stream :out-stream out-stream)))) #+test (with-open-file (in-stream "projects:cgp;testcorpus;test.cor") (with-open-file (out-stream "projects:cgp;testcorpus;test.out" :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash "nny" *cg-table*) :in-stream in-stream :out-stream out-stream ))) #+test (with-open-file (in-stream "projects:cgp;training;test.cor") (run-test-corpus (gethash "nbo" *cg-table*) :in-stream in-stream)) (defmethod run-test-corpus ((cg constraint-grammar) &key in-stream print-only-sentences-with-errors-p (out-stream *standard-output*) message-fn (tagging-niveau :morphological-disambiguation)) (clrhash *error-table*) (let ((*cg* cg) (*sentence-count* 0) (*token-count* 0) (*error-count* 0) (*reading-count* 0) (*correct-count* 0) (*package* (find-package :cgp))) (disambiguate-stream 'tokenizer in-stream ;; out-stream :mapping-function (if print-only-sentences-with-errors-p #'map-multitagged-sentences-errors-only #'map-multitagged-sentences) :message-fn message-fn :tagging-niveau tagging-niveau :print-function (lambda (sentence &key &allow-other-keys) (funcall #'print-sentence sentence :stream out-stream)) ) (let ((precision (if (zerop *reading-count*) 0 (float (/ (- *reading-count* *error-count*) *reading-count*)))) (recall (if (zerop *correct-count*) 0 (float (/ (- *reading-count* *error-count*) *correct-count*))))) (when (eq out-stream *standard-output*) (format t "~%Words: ~d, precision: ~,2f, recall: ~,2f~%" *token-count* (* precision 100.0) (* recall 100.0))) (values precision recall *sentence-count* *token-count* *error-count* *correct-count* *reading-count*)))) #+test (time (with-open-file (stream "projects:cgp;training;delkorpbm_fork.cor") (with-stream-sentences (sentence stream) sentence #+ignore (print-sentence sentence :print-features t :print-rules nil) #+ignore (write-line "---------------")))) ;; 111.445 seconds, 122,005,504 bytes of memory: reading of the multi-tagged test corpus (defmethod print-sentence-text ((sentence sentence) &key (stream *standard-output*)) (let ((first-p t)) (labels ((walk (token) (when token (let ((str (token-value token))) (unless (symbolp str) (cond ((string= str "$(") (write-string " (" stream) (setf first-p t)) ((char= (char str 0) #\$) (write-string (subseq str 1) stream) (setf first-p nil)) (first-p (write-string str stream) (setf first-p nil)) (t (write-char #\Space stream) (write-string str stream)))) (unless (eq token (last-token sentence)) (walk (token-next token))))))) (walk (first-token sentence))))) ;;; EOF