;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;; ;; Copyright (C) Paul Meurer 1999 - 2005. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen (in-package :cgp) ;; See also multi-tagger-xml.lisp; build-sentence there #+ignore (lxml::compile-xml-parser) #+debug (defparameter *tv* nil) ;; see sentence-add-token() (defmethod disambiguate-stream ((tokenizer xml-tokenizer) in-stream &rest rest &key token-fn (resolve-entities :all) (resolve-char-refs t) &allow-other-keys) (let* ((*package* (find-package :lxml)) (*tokenizer* tokenizer) (*parser-error* nil) (token-function (let ((xml-tokenizer tokenizer)) (lambda (token) #+debug(print (list :token token)) (setf (parsed-token xml-tokenizer) token) (process-wait "waiting-for-token-request" (lambda (tokenizer) (null (parsed-token tokenizer))) xml-tokenizer))))) (let ((parse-process (process-run-function "xml-file-parser" (let ((xml-tokenizer tokenizer)) (lambda (&rest args) (handler-case (let ((*package* (find-package :lxml)) (lxml::*serialize* :xml-file-pos) (lxml::*resolve-entities* resolve-entities) (*token-function* token-function)) (apply #'zebu::xml-stream-parser args) (process-wait "waiting-for-token-request" (lambda (xml-tokenizer) (null (parsed-token xml-tokenizer))) xml-tokenizer) (setf (parsed-token xml-tokenizer) :eof)) (error (cond) (setf (tokenizer-error tokenizer) cond))))) in-stream :read-fn #'read-next-line :grammar (find-grammar (zebu-grammar-name tokenizer)) :verbose nil))) (handler-case (apply (or token-fn #'call-next-method) tokenizer t rest) (error (cond) ;; if error occurs in disambiguation, kill parse process, ... (process-kill parse-process) ;; ... but do not handle the error here (error cond)))))) (defmethod disambiguate-stream ((tokenizer pretokenized-tokenizer) stream &rest rest &key &allow-other-keys) (let ((*tokenizer* *pretokenized-tokenizer*) (*terminate-on-following-insignificant-token-p* t) (*merge-hyphenated-words-p* nil) (*lookup-unknown-in-nny-lexicon-p* t)) (apply #'call-next-method tokenizer stream rest))) #+test (let ((tag-stream *standard-output*)) (with-open-file (stream "/usr/local/cwb/corpora/nc/nob/test/68ere.xml") (disambiguate-stream 'xml-tokenizer stream :tokenizer-initargs (list :paragraph-delimiter-elements '(:|p|) :headline-delimiter-elements '(:|head| :|byline| :|name|) :in-sentence-elements '(:|foreign| :|hi| :|name| :|docAuthor| :|abbr| :|num| :|title|) :include-path '(:|text|) :exclude-path ()) :tagging-niveau :morphological-disambiguation :cg (newest-cg :nbo) :print-function (lambda (s &key &allow-other-keys) (print-sentence-xml s :stream *standard-output* :print-sentence-elts-p nil))))) :eof