;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;; ;; Copyright (C) Paul Meurer 1999 - 2004. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen ;; ;; Constraint Grammar Parser ;; (See Fred Karlsson et.al.: Constraint Grammar, Mouton de Gruyter 1995) ;; ;; The numbers refer to the the sections in Chapter 2 of Karlsson & al. ;; Set *SHOW-USED-RULES* to T if you want to see the rules used for each word. ;; The rule can be retrieved from the rule number in the output with (show-rule ), or ;; with ctrl-shift-P on the rule number in MCL ;; Rule application order can be described as follows: ;; ;; The slot OPERATIONS in the CG object contains the list of parse operations to ;; be performed. ;; For each (non-heuristic) parse operation, we go sequentially through the tokens of the ;; sentence. For each token, we go through a list of rule types and try all applicable rules ;; of that type successively on all readings (as long as there is ambiguity left, but we try each ;; rule not more often than once for each reading). ;; (The type list for morphological disambiguation is for instance '(:DOMAINS-SELECT ;; :DOMAINS-STRONG-SELECT :DOMAINS-DISCARD :SELECT :STRONG-SELECT :DISCARD). ;; :DOMAINS-SELECT means: test all select rules that have a domain equal to the the (downcased) ;; word of the token. :SELECT means: test all select rules with empty domain (@w).) ;; If we successfully apply a heuristic rule, we immediately drop out and retry all non-heuristic ;; rules. ;; An important consequence of this rule application order is that the outcome of a parse only ;; depends on the list of operations to be performed and on the order of the types in the type ;; order list mentioned above. The order in which the rules are tried for each given type is ;; not important because only the type tells what the impact of a rule is. ;; Of course, since Karlsson does not explicitly define rule application order, other orders ;; could be defined where not all rules of one type would be tried consecutively; this could ;; be an advantage for the grammar writer, and more concise grammars could possibly be written, ;; but the disadvantage would be a rather order dependent grammar. ;; In addition, the chosen algorithm leaves the implementor more freedom to experiment and to ;; find an efficient implementation. One could for example try the rules most likely to succeed ;; first, based on statistics gained on a test corpus. ;; OBS: This discussion does not apply fully to mapping rules, since mapping rules have an impact ;; other than simply discarding readings. Their order is significant and has to be retained ;; under rule editing. ;; [From an email to Matthew G.:] ;; I made a mistake in my analysis by not taking into account that it depends on the target of a rule ;; which other readings are discarded in addition to the one being tested when a rule applies. But in ;; your scenario, it is clear which rule is applied first: the one that applies on the first of the ;; two readings. I am going sequencially through the readings of a cohort and test all the rules of ;; a given type. As a consequence, in the case of say two rules and two readings, the first rule discarding ;; the first and the second the second reading, the rule that applies to the first reading is evaluated, ;; the other rule may or may not be tested before that rule, but this makes no difference since it does ;; not apply to the first reading. ;; The only case where the rule order is crucial, as I said, is when two rules do apply to the same ;; reading, with different consequences for the other readings in each case. ;; In order to make the outcome independent of the rule order in the file, I now impose a reversed ;; alphabetical ordering on (the targets of) the rules of each type. (Reversed because I want more ;; specific targets to come first.) ;;------------------------------------------------------------------------------------- ;; TO DO: ;; - Test syntactic mapping and disambiguation ;; - Store morphological rules in hash tables with EQUAL keys instead of nested ;; hash tables ;;------------------------------------------------------------------------------------- (in-package "CGP") #+moved (defvar *nbo-cg*) #+moved (defvar *nny-cg*) (defparameter *record-discarded-correct-readings-p* nil) (defvar *sentence-count* nil) (defvar *token-count*) (defvar *correct-count*) (defvar *error-count*) (defvar *reading-count*) (defvar *error-table* (make-hash-table)) (defmacro with-stream-tokens ((token sentence stream) &body body) "Loops through the (multi-tagged) cohorts in the file and binds COHORT to the list representing the cohort." `(let ((,token ())) (u::with-stream-lines (,stream line) (setf line (%fix-quote line)) (cond ((char= (char line 0) #\") ; new cohort (when ,token (setf (token-used-rules ,token) (list nil)) ,@body) (setf ,token (add-token ,sentence (remove-stars (string-trim '(#\< #\> #\£ #\Tab) (read-from-string line)))))) (t (let ((*package* (find-package :cgp)) (*read-eval* nil)) (destructuring-bind (word . features) (read-from-string (concatenate 'string "(" line ")")) (push (cons (remove-stars word) (apply #'%encode-features features)) (token-features ,token))))))) (when ,token ,@body))) (defmacro with-stream-sentences ((sentence stream &key (sentence-class '*sentence-class*)) &body body) "Loops through the (multi-tagged) sentences in the file and successively binds SENTENCE to a SENTENCE object." `(let ((,sentence (get-sentence :stream stream :sentence-class ,sentence-class :cg *cg* ;;:feature-vector (feature-vector (multi-tagger *cg*)) ))) (with-stream-tokens (token sentence ,stream) (when (thereis-feature-p (cdar (token-features token)) '( )) (setf *sentence* ,sentence) ,@body (setf ,sentence (get-sentence :stream ,stream :sentence-class ,sentence-class :cg *cg* ;; :feature-vector (feature-vector (multi-tagger *cg*)) )))))) (defmethod print-object ((cg grammar) stream) (with-slots (name language) cg (print-unreadable-object (cg stream :type t :identity t) (format stream "~a:~a" (or language "") (or name ""))))) (defmethod print-object ((node constraint-node) stream) (with-slots (constraint) node (print-unreadable-object (node stream :type t :identity t) (format stream "~s" constraint)))) ;; Table holding all registered Constraint Grammars #+moved (defvar *cg-table* (make-hash-table :test #'equal :size 16)) ;; Object holding the CG rules. When parsing, dynamically bind *CG* to a grammar. #+moved (defvar *cg* t) ;; for administration purposes (defun parse-date-string (date) (destructuring-bind (year month date &optional (hour 0) (minute 0) (second 0) weekday) (mapcar (lambda (str) (parse-integer str :junk-allowed t)) (string-parse date :whitespace " ,:-()")) (declare (ignore weekday)) (encode-universal-time second minute hour date month year))) ;; administration (defun admin-info (&key language created last-change locked parent) (with-slots (creation-date change-date locked-p parent-cg) *cg* (setf (slot-value *cg* 'language) language creation-date (parse-date-string created) change-date (parse-date-string last-change) locked-p locked parent-cg parent))) (defun define-information-section (documentation) (setf (cg-documentation *cg*) documentation)) ;;(defmethod language ((cg constraint-grammar)) (language (multi-tagger cg))) ;;; sentence delimiters (4.) (defun define-sentence-delimiters (delimiter-list &optional (cg *cg*)) (setf (sentence-delimiters cg) delimiter-list)) ;;; set declarations (5.) (defparameter *debug* nil) ;; clean this up! (defmethod define-set ((cg constraint-grammar) symbol &rest definition) ;; check that sublists precede primitives ; violated in norsk.rte #+ignore (loop with primitive = nil for elt in definition unless (consp elt) do (setf primitive t) when (and primitive (consp elt)) do (error "The sublists should precede the primitives in ~s" definition)) ;; no recursion allowed (?) (let (;; fixed 21.09.2000 (sorted-definition (sort-definition (definition-remove-stars definition)))) #+original (setf (gethash symbol (set-declarations cg)) sorted-definition (gethash symbol (encoded-set-declarations cg)) (encode-definition cg sorted-definition)) ;; new, for debugging (let ((set (gethash symbol (set-declarations cg)))) (cond ((equal set sorted-definition) nil) ((or (null set) (not *debug*)) (setf (gethash symbol (set-declarations cg)) sorted-definition (gethash symbol (encoded-set-declarations cg)) (encode-definition cg sorted-definition))) (t (terpri) (print (list symbol set)) (print (list symbol sorted-definition))))))) (defmethod encode-definition ((cg constraint-grammar) definition) (cond ((listp definition) (remove-if #'null (mapcar (lambda (def) (encode-definition cg def)) definition))) ((stringp definition) definition) (t ; *** should depend on language! (or (feature-code definition) #+ignore (error "The feature \"~a\" is not defined." definition))))) (defun definition-remove-stars (definition) (cond ((listp definition) (mapcar #'definition-remove-stars definition)) ((stringp definition) (remove-stars definition)) (t definition))) (defun sort-definition (definition) (cond ((listp definition) (sort (mapcar #'sort-definition definition) (lambda (d1 d2) (or (and (not (stringp d2)) (stringp d1)) (and (stringp d2) (stringp d1) (string< d2 d1)))))) (t definition))) (defun @-set-p (symbol) (and (symbolp symbol) (find #\@ (symbol-name symbol)) #+ignore (char= (char (string symbol) 0) #\@))) (defun &-set-p (symbol) (and (symbolp symbol) (char= (char (string symbol) 0) #\&))) (defun define-sets (symbol+defininition-lists &key (cg *cg*) (clearp t)) (with-slots (set-declarations) cg (when clearp (maphash (lambda (key value) (declare (ignore value)) (unless (@-set-p key) (remhash key set-declarations))) set-declarations)) (dolist (symbol+definition symbol+defininition-lists) (apply #'define-set cg symbol+definition)))) ;; not used! (defmethod define-@-set ((cg constraint-grammar) symbol &rest definition) (unless (@-set-p symbol) (error "The name of an @-set should begin with the @ character: ~s" symbol)) (loop for elt in definition unless (syntactic-function-p cg elt) do (error "The elements of a @-set should be syntactic functions: ~s" definition)) (setf (gethash symbol (set-declarations cg)) definition)) ;; make this more efficient! (defun syntactic-set (cg symbol) (let ((set (resolve-set symbol nil))) (when (and set (find (car set) (syntactic-function-codes cg))) set))) (defun define-@-sets (symbol+defininition-lists &key (cg *cg*) (clearp t)) (with-slots (set-declarations) cg (when clearp (maphash (lambda (key value) (declare (ignore value)) (when (@-set-p key) (remhash key set-declarations))) set-declarations)) (dolist (symbol+definition symbol+defininition-lists) (apply #'define-@-set cg symbol+definition)))) (defmethod resolve-set ((cg constraint-grammar) symbol &optional (error-if-does-not-exist t)) (with-slots (encoded-set-declarations) cg (if (consp symbol) (list symbol) (or (gethash symbol encoded-set-declarations) (when error-if-does-not-exist (error "The set ~s is not declared." symbol)))))) ;;; templates (9.6) (defmethod define-template ((cg constraint-grammar) symbol conditions) (setf (gethash (string symbol) (templates cg)) conditions)) (defun define-templates (symbol+conditions-list &key (cg *cg*) (clearp t)) (when clearp (clrhash (templates cg))) (dolist (symbol+conditions symbol+conditions-list) (destructuring-bind (symbol . conditions) symbol+conditions (define-template cg symbol conditions)))) (defun $$-template-p (symbol) (and (symbolp symbol) (search "$$" (symbol-name symbol)))) (defun &&-template-p (symbol) (and (symbolp symbol) (search "&&" (symbol-name symbol)))) (defun template-p (symbol) (or ($$-template-p symbol) (&&-template-p symbol))) (defmethod resolve-template ((cg constraint-grammar) symbol) "Returns values CONDITIONS, TEMPLATE-TYPE, LEFT-P, CAREFUL-P" (let ((start (template-p symbol))) (when (not start) (error "~s is not a template." symbol)) (let* ((template-name (symbol-name symbol)) (template-type (char template-name start)) (left-p (char= #\- (char template-name 0))) (careful-p (find #\c template-name :end start :test #'char-equal)) (conditions (gethash (subseq template-name start) (templates cg)))) (when (and (char-equal template-type #\$) (not (zerop start))) (error "~a is a malformed template expression." template-name)) (when (not conditions) (error "The template ~a is not defined." (subseq template-name start))) (values conditions template-type left-p careful-p)))) ;;; syntactic function declarations (6.) (defmethod syntactic-function-p ((cg grammar) symbol) (declare (ignore symbol)) nil) (defmethod syntactic-function-p ((cg constraint-grammar) symbol) (gethash symbol (syntactic-functions cg))) (defmethod define-syntactic-function ((cg constraint-grammar) symbol) (setf (gethash symbol (syntactic-functions cg)) t)) (defun define-syntactic-functions (symbol-list &key (cg *cg*) (clearp t)) (when clearp (clrhash (syntactic-functions cg))) (mapc (lambda (symbol) (define-syntactic-function cg symbol)) symbol-list)) ;;; principal function declarations (7.) (defmethod define-principal-syntactic-function ((cg constraint-grammar) symbol) (setf (gethash symbol (syntactic-functions cg)) t (gethash symbol (principal-functions cg)) t)) (defmethod define-barrier-element ((cg constraint-grammar) symbol) (setf (gethash symbol (syntactic-functions cg)) t (gethash symbol (barrier-elements cg)) t)) ;;; position operator ;; Table for memoizing parsed position operators. This is really global. (defparameter *parsed-position-operators* (make-hash-table)) (defun parse-position-operator (op) "Returns values POS ABS-P CAREFUL-P SCAN-P BACKTRACK-P LEFT-LINK-P RIGHT-LINK-P" (apply #'values (or (gethash op *parsed-position-operators*) (setf (gethash op *parsed-position-operators*) (if (integerp op) (list op) (let ((op-str (string op))) (list (let ((num-start (position-if-not (lambda (c) (find c "*@LR" :test #'char-equal)) op-str))) (when num-start (parse-integer op-str :start num-start :junk-allowed t))) (find #\@ op-str) (find #\C op-str :test #'char-equal) (find #\* op-str) (search "**" op-str :test #'string=) ; not used (find #\L op-str :test #'char-equal) (find #\R op-str :test #'char-equal)))))))) (defun constraint-position (constraint) "Extracts the position from a constraint" (let ((op (if (eq 'NOT (car constraint)) (cadr constraint) (car constraint)))) (if (integerp op) op (let ((op-str (string op))) (let ((pos-var-p (find #\? op-str)) (num-start (position-if-not (lambda (c) (find c "*@LR" :test #'char-equal)) op-str))) (cond (pos-var-p #\?) (num-start (parse-integer op-str :start num-start :junk-allowed t)) (t nil))))))) ;; This method seems to be needed for backwards compatibility. Previously, ;; a sentence was represented as an array of tokens. (defmethod cohort ((sentence array) position) (when (and (>= position 0) (< position (length sentence))) (cdr (aref sentence position)))) (defmethod cohort ((sentence sentence) position) (with-slots (sentence-array) sentence (when (and (>= position 0) (< position (length sentence-array))) (token-features (aref sentence-array position))))) (defmethod (setf cohort) (value (sentence array) position) (when (and (>= position 0) (< position (length sentence))) (setf (cdr (aref sentence position)) value))) (defmethod (setf cohort) (value (sentence sentence) position) (with-slots (sentence-array) sentence (when (and (>= position 0) (< position (length sentence-array))) (setf (token-features (aref sentence-array position)) value)))) (defmethod sentence-length ((sentence array)) (length sentence)) (defmethod word-form ((sentence array) position) (when (and (>= position 0) (< position (length sentence))) (caar (aref sentence position)))) (defmethod word-form ((sentence sentence) position) (with-slots (sentence-array) sentence (when (and (>= position 0) (< position (length sentence-array))) (effective-token-value (aref sentence-array position))))) (defmethod sentence-word ((sentence array) position) (car (aref sentence position))) (defmethod nth-token (position (sentence array)) (aref sentence position)) (defmethod nth-token (position (sentence sentence)) (with-slots (sentence-array) sentence (aref sentence-array position))) (defparameter *print-constraints-p* nil) #+copy (defmethod syntactic-function-p ((cg constraint-grammar) symbol) (gethash symbol (syntactic-functions cg))) #+old (defun syntactic-function-unique-p (cg reading feature) (let ((found 0) (feature-vector (cdr reading)) (set (resolve-set cg feature nil))) (loop for code fixnum in (syntactic-function-codes cg) do (cond ((not (has-feature-code-p feature-vector code)) nil) ((>= found (length set)) (return-from syntactic-function-unique-p nil)) ((loop for features in set thereis (cond ((stringp features) (if (listp (car reading)) (find-if (lambda (r) (string= features r)) (car reading)) (string= features (car reading)))) ((atom features) (= code features)) (t (error "Features should be atoms, but is list: ~s." features)))) (incf found)) (t (return-from syntactic-function-unique-p nil)))) (not (zerop found)))) (defun %syntactic-function-unique-p (cg reading feature) (let ((found 0) (found-features ()) (feature-vector (cdr reading)) (set (resolve-set cg feature nil))) (loop for code fixnum in (syntactic-function-codes cg) do (cond ((not (has-feature-code-p feature-vector code)) nil) ((>= found (length set)) (return-from %syntactic-function-unique-p nil)) ((loop for features in set thereis (when (cond ((stringp features) (if (listp (car reading)) (find-if (lambda (r) (string= features r)) (car reading)) (string= features (car reading)))) ((atom features) (= code features)) (t (error "Features should be atoms, but is list: ~s." features))) (push features found-features))) (incf found)) (t (return-from %syntactic-function-unique-p nil)))) ;;(not (zerop found)) found-features)) (defun syntactic-function-unique-p (cg cohort feature) (let ((features ())) (loop for reading in cohort always (or (null reading) (let ((found-features (%syntactic-function-unique-p cg reading feature))) (and found-features (if features (equal features found-features) (setf features found-features)))))))) (defmethod check-cohort-constraint ((cg constraint-grammar) cohort constraint careful-p check-all-p readings reading) (let ((feature (car constraint))) (when *print-constraints-p* (print (list feature careful-p check-all-p cohort))) (let ((res (cond ((null cohort) nil) ((or careful-p (careful-p cg)) ;; is that OK?? (if (find #\@ (symbol-name feature)) ;;(syntactic-function-p cg feature) #-new (syntactic-function-unique-p cg cohort feature) #+old (and (= (count-if-not #'null (print cohort)) 1) ; morphologically unambiguous (%syntactic-function-unique-p cg (find-if-not #'null cohort) feature)) (loop for reading in cohort always (or ;; !! necessary since discarded readings are not ;; removed but set to NIL (null reading) (constraint-satisfied-p cg feature reading))))) (check-all-p (loop for reading in (or readings cohort) when (constraint-satisfied-p cg feature reading) collect reading)) (reading (when *print-constraints-p* (print (cons :reading reading))) (constraint-satisfied-p cg feature reading)) (t (loop for reading in cohort ;; (or readings cohort) ;; 02.10.2000 do (when *print-constraints-p* (print (list feature reading readings cohort))) thereis (constraint-satisfied-p cg feature reading)))))) (when *print-constraints-p* (print res)) res ))) ;; debugging (defparameter *check-constraint-count* 0) ;; It occurs to me that each (car constraint) has to be a declared set, or a list of ;; syntactic constraints. Check for that when defining a rule! *** ;; ** improve code! (defmethod constraint-satisfied-p ((cg constraint-grammar) feature reading) (and reading (incf *check-constraint-count*) (loop for features in (resolve-set cg feature nil) thereis (cond ((stringp features) (if (listp (car reading)) ;; new 25.2.2001 (find-if (lambda (r) (string= features r)) (car reading)) (string= features (car reading)))) ((atom features) ;; **** syntactic features are now in the same bitvector as all other features! (has-feature-code-p (cdr reading) features)) ((stringp (car features)) (and (if (listp (car reading)) ;; new 25.2.2001 (find-if (lambda (r) (string= (car features) r)) (car reading)) (string= (car features) (car reading))) (has-feature-codes-p (cdr reading) (cdr features)))) (t (has-feature-codes-p (cdr reading) features)))))) ;; (9.5) (defun boundary-reached-p (cohort careful-p) (if careful-p (loop for reading in cohort thereis (thereis-feature-p (cdr reading) '(clb **clb <**clb>))) (loop for reading in cohort always (thereis-feature-p (cdr reading) '(clb **clb <**clb>))))) (defparameter *check-constraint* 0) (defmethod check-constraint ((cg constraint-grammar) sentence position constraint boundary-mode &optional link-pos link-code check-all-p readings reading) "Returns a boolean value and (optionally) a link position and a link code. POSITION is the current position in the sentence." (declare (fixnum position) (optimize (speed 3) (safety 0))) #+test (print (list sentence position constraint boundary-mode link-pos link-code check-all-p readings)) (incf *check-constraint*) (cond ((null constraint) t) ((eq (car constraint) 'NOT) (multiple-value-bind (satisfied-p abs-pos link) (check-constraint cg sentence position (cdr constraint) boundary-mode link-pos link-code) (values (not satisfied-p) abs-pos link))) (t (multiple-value-bind (pos abs-p careful-p scan-p backtrack-p left-link-p right-link-p) (parse-position-operator (car constraint)) (declare (ignore backtrack-p)) ; not used yet *** (when *print-constraints-p* (print (list :args scan-p abs-p pos boundary-mode link-pos link-code check-all-p readings))) (let ((abs-pos (cond (abs-p pos) ((and left-link-p right-link-p) ;; LR0; necessary? link-pos) (left-link-p (+ link-pos (or pos -1))) (right-link-p (+ link-pos (or pos 1))) (t (+ position pos)))) (sentence-length (sentence-length sentence))) (cond ((not scan-p) ;; easiest case: we have to check only one position, but possibly ;; have to remember which readings satisfy the constraint (cf 9.2) (values (check-cohort-constraint cg (cohort sentence abs-pos) (cdr constraint) careful-p check-all-p (unless (zerop pos) readings) (when (zerop pos) reading)) abs-pos (caddr constraint))) (t (let* ((scan-pos abs-pos) (forward-p (>= abs-pos position)) (satisfied-pos (when ;; test if there is nothing to test ;-) (and (not (cond (left-link-p (if (< link-pos position) (= abs-pos -1) (= abs-pos position))) (right-link-p (if (< link-pos position) (= abs-pos position) (= abs-pos sentence-length))) (t nil))) (loop while (cond ((or (not boundary-mode) (not forward-p)) t) ;; if we move forward, clb starts a new clause ((eq boundary-mode '**clb) (not (boundary-reached-p (cohort sentence scan-pos) nil))) ((eq boundary-mode '**clb-c) (not (boundary-reached-p (cohort sentence scan-pos) t)))) thereis (check-cohort-constraint cg (cohort sentence scan-pos) (cdr constraint) careful-p nil nil nil) while (and (cond ((or (not boundary-mode) forward-p) t) ;; if we move backward, the first clb encountered ;; is in the same clause, therefore we test after ;; constraint check ((eq boundary-mode '**clb) (not (boundary-reached-p (cohort sentence scan-pos) nil))) ((eq boundary-mode '**clb-c) (not (boundary-reached-p (cohort sentence scan-pos) t)))) (cond (left-link-p (if (< abs-pos position) (>= (decf scan-pos) 0) (> (decf scan-pos) position))) (right-link-p (if (< abs-pos position) (< (incf scan-pos) position) (< (incf scan-pos) sentence-length))) (t (if (< abs-pos position) (>= (decf scan-pos) 0) (< (incf scan-pos) sentence-length))))))) scan-pos))) (when satisfied-pos (values t satisfied-pos (caddr constraint))))))))))) ;; READING is non-NIL for mapping rules, for special treatment of position 0 ;; this is a bug! Remove reading arg! asap (defmethod check-constraints ((cg constraint-grammar) sentence position constraints boundary-mode &optional link-position link readings reading) (when *print-constraints-p* (print constraints) (print readings)) (or (null constraints) (let* ((constraint (car constraints)) ;; check if this and the next constraint apply to the same position ;; and are simple constraints (check-all-p (and (cdr constraints) (integerp (car constraint)) (eql (car constraint) (caadr constraints))))) (multiple-value-bind (satisfied-p/readings link-pos link-code) (check-constraint cg sentence position constraint boundary-mode link-position link check-all-p #+ignore readings ;; 02.10.2000 (when check-all-p readings) reading) (when satisfied-p/readings (check-constraints cg sentence position (cdr constraints) boundary-mode link-pos link-code (when check-all-p satisfied-p/readings) reading)))))) ;;; Rules (defparameter *print-long-rule* nil) (defmethod print-object ((rule rule) stream) (print-unreadable-object (rule stream :type t :identity nil) (if *print-long-rule* (format stream "~a:~d [target: ~a, constraints: ~a]" (language rule) ;(multi-tagger *cg*)) (rule-id rule) (rule-target rule) (rule-constraints rule)) (format stream "~a:~d" (language rule) ;(language (multi-tagger *cg*)) (rule-id rule))))) #+old (defmethod print-object ((rule rule) stream) (if *print-long-rule* (format stream "#<~a ~a:~d [target: ~a, constraints: ~a]>" (class-name (class-of rule)) (language rule) ;(multi-tagger *cg*)) (rule-id rule) (rule-target rule) (rule-constraints rule)) (format stream "#<~a ~a:~d>" (class-name (class-of rule)) (language rule);(language (multi-tagger *cg*)) (rule-id rule)))) (defmethod rule-domain ((rule rule)) nil) (defmethod rule-type ((rule mapping-rule)) '=m) (defmethod rule-type ((rule morphological-heuristics-rule)) '=hm) (defmethod rules-equal-p ((rule1 rule) (rule2 rule)) (and (equal (rule-target rule1) (rule-target rule2)) (equal (rule-constraints rule1) (rule-constraints rule2)))) (defmethod rules-equal-p ((rule1 disambiguation-rule) (rule2 disambiguation-rule)) (and (eq (rule-type rule1) (rule-type rule2)) (equal (rule-domain rule1) (rule-domain rule2)) (call-next-method rule1 rule2))) (defmethod rules-equal-p ((rule1 syntactic-disambiguation-rule) (rule2 syntactic-disambiguation-rule)) (and (eq (rule-type rule1) (rule-type rule2)) (equal (rule-domain rule1) (rule-domain rule2)) (call-next-method rule1 rule2))) (defmethod rules-equal-p ((rule1 morphological-heuristics-rule) (rule2 morphological-heuristics-rule)) (and (equal (rule-string-conditions rule1) (rule-string-conditions rule2)) (equal (rule-string-changes rule1) (rule-string-changes rule2)) (call-next-method rule1 rule2))) (defmethod rules-equal-p ((rule1 mapping-rule) (rule2 mapping-rule)) (and (call-next-method) ; rule1 rule2) ;; should be something like set-equal-p *** (equal (rule-labels rule1) (rule-labels rule2)))) (defmethod show-rule ((rule-id integer)) (describe (aref (rule-array *cg*) rule-id))) (defmethod expand-constraints ((cg constraint-grammar) constraints &optional common (position 0)) "Returns a list of context condition lists" (let ((constr (car constraints))) (cond ((null constraints) (list common)) (($$-template-p constr) (let* ((template-constraints (resolve-template cg constr)) (pos (constraint-position (car (last template-constraints))))) (expand-constraints (cdr constraints) (append common template-constraints) pos))) ((&&-template-p constr) (multiple-value-bind (condition-list template-type left-p careful-p) (resolve-template cg constr) (declare (ignore template-type)) (reduce #'append (mapcar (lambda (conditions) (let ((abs-pos (abs position))) (expand-constraints (cdr constraints) (append common (loop for c in conditions do (incf abs-pos) collect (cons (cond (careful-p (intern (format nil "~dC" (if left-p (- abs-pos) abs-pos)) :cgp)) (left-p (- abs-pos)) (t abs-pos)) c))) (if left-p (- abs-pos) abs-pos)))) condition-list)))) ((find constr '(**clb **clb-c)) (expand-constraints cg (cdr constraints) (append common (list constr)) nil)) (t (let ((pos (constraint-position constr))) (if (eq pos #\?) (let* ((neg-p (eq (car constr) 'NOT)) (var-pos-code (string (if neg-p (cadr constr) (car constr)))) (var-pos (position #\? var-pos-code)) (pos-code (intern (format nil "~a~d~a" (subseq var-pos-code 0 var-pos) (1+ position) (subseq var-pos-code (1+ var-pos))) :cgp))) (expand-constraints cg (cdr constraints) (if neg-p (append common (list (list 'NOT pos-code (cddr constr)))) (append common (list (cons pos-code (cdr constr))))) (1+ position))) (expand-constraints cg (cdr constraints) (append common (list constr)) pos))))))) #+obsolete (defmethod define-disambiguation-rule ((cg constraint-grammar) rule-definition id &optional heuristic-p rule-previous-version group-comment) (destructuring-bind (domain operator target . constraints) rule-definition (let ((comment nil) (last (car (last constraints)))) (when (stringp last) (setf constraints (butlast constraints) comment last)) (let ((expanded-constraints (expand-constraints cg constraints))) (mapc (lambda (constraints) (make-disambiguation-rule cg domain operator target constraints id comment heuristic-p rule-previous-version group-comment)) expanded-constraints))))) ;; new 31.01.2001, changed 7.2.2001 (defmethod define-rule ((cg constraint-grammar) rule-definition id &key comment ;; rule-class previous-version group-comment group-comment-table insert-before) (let ((last (car (last rule-definition)))) ;; extract comment and group-comment from rule (cond ((stringp last) (setf rule-definition (butlast rule-definition) comment last)) ((null last) (setf rule-definition (butlast rule-definition))) ((and (consp last) (or (find :comment last) (find :group last))) (setf rule-definition (butlast rule-definition)) (when (find :comment last) (setf comment (getf last :comment))) (when-let (group-id (getf last :group)) (unless (gethash group-id group-comment-table) (setf (gethash group-id group-comment-table) (list nil))) (setf group-comment (gethash group-id group-comment-table)))))) (if (listp (cadr rule-definition)) ; is mapping-rule (define-mapping-rule cg rule-definition id ;:rule-class rule-class :previous-version previous-version :comment comment :group-comment group-comment :insert-before insert-before) (destructuring-bind (domain operator target . constraints) rule-definition (let ((expanded-constraints (expand-constraints cg constraints))) (assert (null (cdr expanded-constraints))) ;; seems we don't use templates (ecase operator ((=!! =!!h =! =!h =0 =0h #+ignore-yet =**clb) (make-rule cg domain operator target (car expanded-constraints) id comment previous-version group-comment)) ((=s! =s!h1 =s!h2 =s!h3 =s0 =s0h1 =s0h2 =s0h3 =n! =n0 =n!h =n0h =n!h1 =n0h1 =n!h2 =n0h2 =n!h3 =n0h3) ;; ** TO DO: make sure targets for syntactic rules are atoms (when (consp target) (setf target (car target))) (make-syntactic-rule cg domain operator target constraints id comment previous-version group-comment))))))) (defmethod define-mapping-rule ((cg constraint-grammar) rule-definition id &key comment previous-version group-comment insert-before) (destructuring-bind (target constraints labels &optional %comment) rule-definition (when (atom target) (error "A mapping rule target cannot be a set name: ~s" target)) (when (find-if (lambda (f) (and (stringp f) (char= (char f 0) #\<))) target) (error "A mapping rule target cannot contain word forms: ~s" target)) (let ((expanded-constraints (expand-constraints cg constraints))) (assert (null (cdr expanded-constraints))) ;; seems we don't use templates (make-mapping-rule cg target (car expanded-constraints) labels id (or comment %comment) previous-version group-comment insert-before) #+ignore (mapc (lambda (constraints) (make-mapping-rule cg target constraints labels id (or comment %comment) previous-version group-comment insert-before)) expanded-constraints)))) (defmethod define-morphological-heuristics-rule ((cg constraint-grammar) rule-definition) (destructuring-bind (domain operator target string-conditions string-changes . constraints) rule-definition (assert (and (eq domain '@w) (eq operator '=h))) (let ((expanded-constraints (expand-constraints cg constraints))) (mapc (lambda (constraints) (make-morphological-heuristics-rule cg target string-conditions string-changes constraints)) expanded-constraints)))) #+obsolete (defun define-mapping-rules (rule-definition-list &key (cg *cg*) clearp group-comment) (with-slots (morphosyntactic-mappings %mapping-features) cg (when clearp (clrhash morphosyntactic-mappings) (setf %mapping-features t)) (mapc (lambda (rule-definition) (if (eq (car rule-definition) 'comment) (let ((group-comment (list (cadr rule-definition)))) (define-mapping-rules (cddr rule-definition) :cg cg :group-comment group-comment)) (define-mapping-rule cg rule-definition nil nil group-comment))) (reverse rule-definition-list))) nil) ; return nil because form is often evaluated on toplevel; don't want to see all the rules again ;; new 31.01.2001, changed 8.2.2001 (defun define-rules (rule-definition-list &key (cg *cg*) clearp clear-types) (%define-rules rule-definition-list :cg cg :clearp clearp :clear-types clear-types) (with-slots (rules domain-rules %feature-codes %domain-feature-codes) cg #+ignore (optimize-rule-order disambiguation-constraints) #+disambiguation-rule-tree (maphash (lambda (type tree) ;(declare (ignore type)) (setf (gethash type rules) (optimize-rules-tree tree))) rules) #+disambiguation-rule-tree (maphash (lambda (domain rules) (declare (ignore domain)) (maphash (lambda (type tree) ;(declare (ignore type)) (setf (gethash type rules) (optimize-rules-tree tree))) rules)) domain-rules) nil)) (defun %define-rules (rule-definition-list &key (cg *cg*) clearp clear-types group-comment) (declare (ignore clearp)) (let ((group-comments (make-hash-table))) (with-slots (rules domain-rules %feature-codes %domain-feature-codes) cg (dolist (type clear-types) (remhash type rules) (remhash type %feature-codes) (maphash (lambda (domain rule-table) (declare (ignore domain)) (remhash type rule-table)) domain-rules) (clrhash %domain-feature-codes)) (mapc (lambda (rule-definition) (case (car rule-definition) (comment ;; this is obsolete in the generated files (let ((group-comment (list (cadr rule-definition)))) (%define-rules (cddr rule-definition) :cg cg :group-comment group-comment))) (group-comment ;; is no rule definition in this case of course ;;(format t "~%group-comment: ~a" (cadr rule-definition)) (if (gethash (cadr rule-definition) group-comments) (setf (car (gethash (cadr rule-definition) group-comments)) (caddr rule-definition)) (setf (gethash (cadr rule-definition) group-comments) ;; LIST containing comment, ;; rules having that comment are pushed into the list's cdr (cddr rule-definition)))) (otherwise (define-rule cg rule-definition nil :group-comment group-comment :group-comment-table group-comments)))) (reverse rule-definition-list))))) (defmethod optimize-rules-tree ((obj t)) nil) ;(reduce #'intersection '((1 2 3 4) (1 3 5 6) (0 1 2 3 4 5))) ;(reduce #'intersection '()) ;; to do: rule-target -> (car rule-target) (defun find-maximal-partition (sets) (let ((partition-list ())) (dolist (set sets) (dolist (elt set) (pushnew set (getf partition-list elt)))) (let ((max-partitions ())) (loop with length = 0 for (elt partition) on partition-list by #'cddr do (cond ((> (length partition) length) (setf length (length partition) max-partitions (list (cons (list elt) partition)))) ((= (length partition) length) (push (cons (list elt) partition) max-partitions)) (t nil))) (cond ((null (cdr max-partitions)) (values (caar max-partitions) (cdar max-partitions))) (t (let ((compressed-partition-list ())) (dolist (partition max-partitions) (unless (loop for c-partition in compressed-partition-list when (equal (cdr partition) (cdr c-partition)) do (setf (car c-partition) (append (car c-partition) (car partition))) (return t)) (push partition compressed-partition-list))) (let ((elts+max-partition nil)) (loop with length = 0 for elts+partition in compressed-partition-list when (> (length (car elts+partition)) length) do (setf length (length (car elts+partition)) elts+max-partition elts+partition)) (values (car elts+max-partition) (cdr elts+max-partition))))))))) #+ignore (((1) #2=(1 7) #1=(1 2 7)) ((7) #2# #1#) ((2) (2 6 3) #1#)) ;(find-maximal-partition '((1 2 7) (2 6 3) (1 7))) ;(find-maximal-partition '((1 2 5 7) (2 5 6 3) (1 5 7))) ;(build-minimal-tree '((a 1 2 5 7) (b 2 5 6 3) (c 1 5 f 7))) ;(build-minimal-tree '((a 1 2 7) (b 2 6 3) (c 1 f 7))) (defun build-minimal-tree (sets &key (node-test #'integerp)) (when sets (multiple-value-bind (elements max-sets) (find-maximal-partition sets) (labels ((build-sub (elts) (let ((node (find-if node-test elts))) (cond ((and elts (not node)) (list elts)) (node (let ((rest (remove node elts))) (if (find-if node-test rest) (list node (build-sub rest)) (cons node (build-sub rest))))) (t (build-minimal-tree (remove-if #'null (mapcar (lambda (set) (set-difference set elements)) max-sets)))))))) (cons (if (find-if node-test elements) (build-sub elements) elements) (build-minimal-tree (set-difference sets max-sets))))))) (defun tree-and-stem (minimal-tree &key (node-test #'integerp)) (cond ((null minimal-tree) nil) ((cdr minimal-tree) minimal-tree) (t (multiple-value-bind (tree stem) (tree-and-stem (cdar minimal-tree)) (if (funcall node-test (caar minimal-tree)) (values tree (cons (caar minimal-tree) stem)) (values (car minimal-tree) stem)))))) ;(tree-and-stem (build-minimal-tree '((0 a 1 2 5 7) (b 2 5 6 0 3) (c 1 5 0 f 7)))) ;(tree-and-stem '((8 (12 (3 (')))))) ;(build-minimal-tree '((0) (b ) (c ))) ;(tree-and-stem (build-minimal-tree nil)) (defmethod optimize-rules-tree ((tree constraint-node)) (labels ((optimize-node (node) (with-slots (child-constraints rules) node (multiple-value-bind (rule-subtree common-rule-features) (tree-and-stem (build-minimal-tree (mapcar (lambda (rule) (cons rule (mapcar #'feature-code (car (rule-target rule))))) rules))) (multiple-value-bind (children-subtree common-child-features) (tree-and-stem (build-minimal-tree (mapcar #'optimize-node child-constraints))) (labels ((remove-one-element-lists (list) ; *** change name! (if (integerp (car list)) (list list) ;list #+ignore (mapcar (lambda (child) (cond ((atom child) child) ((integerp (car child)) child) (t (car child)))) list)))) (cond ((and rules child-constraints) (let ((common-features (intersection common-rule-features common-child-features))) (labels ((add-stem (stem tree) (if stem (add-stem (cdr stem) (list (car stem) tree)) tree))) (setf rules (remove-one-element-lists (add-stem (set-difference common-rule-features common-features) rule-subtree)) child-constraints (remove-one-element-lists (add-stem (set-difference common-child-features common-features) children-subtree))) (cons node common-features)))) (rules (setf rules (remove-one-element-lists rule-subtree)) (cons node common-rule-features)) (child-constraints (setf child-constraints (remove-one-element-lists children-subtree)) (cons node common-child-features)) (t (list node))))))))) (car (optimize-node tree)))) (defun define-morphological-heuristics-rules (rule-definition-list &optional (cg *cg*)) (with-slots (morphological-heuristics) cg (setf morphological-heuristics ()) (mapc (lambda (rule-definition) (define-morphological-heuristics-rule cg rule-definition)) (reverse rule-definition-list))) nil) (defun stored-rules-equal-p (rule1 rule2 &key (compare-comments-p t)) (cond ((atom rule1) (when (atom rule2) (and (or (not compare-comments-p) (equal (rule-comment rule1) (rule-comment rule2))) (rules-equal-p rule1 rule2)))) ((atom rule2) nil) (t (and (equal (car rule1) (car rule2)) (and (or (not compare-comments-p) (equal (rule-comment (cdr rule1)) (rule-comment (cdr rule2)))) (rules-equal-p (cdr rule1) (cdr rule2))))))) #+old?? (defmethod add-rule-to-tree ((cg constraint-grammar) rule tree) (let ((constraints (rule-constraints rule))) (labels ((add (constraints tree) (let ((constraint (car constraints))) (if (null constraint) (pushnew rule (constraint-rules tree)) (let ((sub-tree (find constraint (child-constraints tree) :test #'equal :key #'constraint))) (child-constraints tree) (when (null sub-tree) (setf sub-tree (make-instance 'constraint-node :constraint constraint :parent tree :rules (when (null (cdr constraints)) (list rule)))) (child-constraints tree) (push sub-tree (child-constraints tree))) (add (cdr constraints) sub-tree)))))) (add constraints tree)))) ;; new 22.01.2001 #+does-not-work-yet (defun add-subtree (tree sub-tree) (push sub-tree (child-constraints tree)) #+test(let* ((new-constraint (constraint sub-tree)) (st-not-p (eq (car new-constraint) 'NOT))) (multiple-value-bind (st-pos st-abs-p st-careful-p st-scan-p st-backtrack-p st-left-link-p st-right-link-p) (parse-position-operator (when st-not-p (cadr new-constraint) (car new-constraint))) (labels ((insertion-pos (constraints) (when constraints (let* ((constraint (constraint (car constraints))) (not-p (eq (car constraint) 'NOT))) (multiple-value-bind (pos abs-p careful-p scan-p backtrack-p left-link-p right-link-p) (parse-position-operator (when not-p (cadr constraint) (car constraint))) (cond #|((and scan-p (not st-scan-p)) constraints) ((and (not scan-p) st-scan-p) (insertion-pos (cdr constraints))) ((and not-p (not st-not-p)) constraints) ((and (not not-p) st-not-p) (insertion-pos (cdr constraints))) ((and careful-p (not st-careful-p)) constraints) ((and (not careful-p) st-careful-p) (insertion-pos (cdr constraints)))|# ((null pos) constraints) ((null st-pos) (insertion-pos (cdr constraints))) ((< pos st-pos) constraints) ((> pos st-pos) (insertion-pos (cdr constraints))) (t constraints))))))) (print new-constraint) (print (child-constraints tree)) (let ((rest (insertion-pos (child-constraints tree)))) (cond ((eq rest (child-constraints tree)) (push sub-tree (child-constraints tree))) ((null rest) (setf (cdr (last (child-constraints tree))) (list sub-tree))) (t (labels ((walk (c-list) (if (eq (cdr c-list) rest) (push sub-tree (cdr c-list)) (walk (cdr c-list))))) (walk (child-constraints tree)))))) (print (child-constraints tree)) (terpri) )))) #+test (defun add-subtree (tree st-pos) (labels ((insertion-pos (constraints) (when constraints (let* ((pos (car constraints))) (cond ((null pos) constraints) ((null st-pos) (insertion-pos (cdr constraints))) ((< pos st-pos) constraints) ((> pos st-pos) (insertion-pos (cdr constraints))) (t constraints)))))) (let ((rest (insertion-pos (cdr tree)))) (cond ((eq rest (cdr tree)) (push st-pos (cdr tree))) ((null rest) (setf (cdr (last (cdr tree))) (list st-pos))) (t (labels ((walk (c-list) (if (eq (cdr c-list) rest) (push st-pos (cdr c-list)) (walk (cdr c-list))))) (walk (cdr tree)))))) tree)) #+test (let ((tree '(a 2)) (list '(2 4 5 8 2 4 4 7 1 0))) (dolist (n list) (add-subtree tree n)) tree) (defmethod add-rule-to-tree ((cg constraint-grammar) rule tree) (let ((constraints (rule-constraints rule))) (labels ((add (constraints tree) (let ((constraint (car constraints))) (if (null constraint) (pushnew rule (constraint-rules tree)) (let ((sub-tree (find constraint (child-constraints tree) :test #'equal :key #'constraint))) (when (null sub-tree) (setf sub-tree (make-instance 'constraint-node :constraint constraint :parent tree :rules (when (null (cdr constraints)) (list rule)))) #+test(add-subtree tree sub-tree) (push sub-tree (child-constraints tree))) (add (cdr constraints) sub-tree)))))) (add constraints tree)))) ;; inverse of previous method (defmethod remove-rule-from-tree ((cg constraint-grammar) rule tree) (let ((constraints (rule-constraints rule))) (labels ((remove-rule (constraints tree) (let ((constraint (car constraints))) (if (null constraint) (setf (constraint-rules tree) (delete rule (constraint-rules tree))) (let ((sub-tree (find constraint (child-constraints tree) :test #'equal :key #'constraint))) (remove-rule (cdr constraints) sub-tree) (when (and (null (constraint-rules sub-tree)) (null (child-constraints sub-tree))) (setf (child-constraints tree) (delete sub-tree (child-constraints tree))))))))) (remove-rule constraints tree)))) (defmethod encode-constraints ((cg constraint-grammar) constraints) (print constraints)) (defmethod operator-to-rule-class ((cg constraint-grammar) operator) (values (ecase operator ((=!! =!!h) 'strong-select-rule) ((=! =!h) 'select-rule) ((=0 =0h) 'discard-rule) ((=s! =s!h1 =s!h2 =s!h3) 'syntactic-select-rule) ((=s0 =s0h1 =s0h2 =s0h3) 'syntactic-discard-rule) ((=n! =n!h =n!h1 =n!h2 =n!h3) 'named-entity-select-rule) ((=n0 =n0h =n0h1 =n0h2 =n0h3) 'named-entity-discard-rule) (=**clb 'clb-rule)) (ecase operator ((=!! =! =0 =s! =s0 =**clb =n! =n0) 0) ((=!!h =!h =0h =s0h1 =s!h1 =n0h =n!h =n0h1 =n!h1) 1) ((=s0h2 =s!h2 =n0h2 =n!h2) 2) ((=s0h3 =s!h3 =n0h3 =n!h3) 3)))) ;; new 31.01.2001 #-disambiguation-rule-tree (defmethod make-rule ((cg constraint-grammar) domain operator target constraints id comment &optional rule-previous-version group-comment) (when (stringp domain) (setf domain (string-trim "<>" domain)) (remhash domain (%domain-feature-codes cg))) ;; has to be recalculated (labels ((base-form-error? (feature features) ; (9.1.) (when (stringp feature) (error "The first feature of a target may not be a base form: ~s" features)))) (with-slots (rules domain-rules rule-array group-comments language) cg (multiple-value-bind (rule-class heuristic-level) (operator-to-rule-class cg operator) (let* ((resolved-target (resolve-set cg target)) (boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (%id (if (and id (< id (fill-pointer rule-array))) id (fill-pointer rule-array))) (rule (make-instance rule-class :language language :type operator :domain (unless (eq domain '@w) domain) :target resolved-target :constraints ; constraints (order-constraints constraints) ;; 12.12.2002 :boundary-mode boundary-mode :comment comment :heuristic-level heuristic-level ;; *** still needed?? :previous-version rule-previous-version :id %id))) (when group-comment (push (rule-id rule) (cdr group-comment))) ;; collect rules with same group ;; store rule in rule array ;; *** OBS: rule should only be stored if it is not already in one of the ;; rule tables. Isn't very important though. *** ??? (cond ((eq %id id) ; id exists already (setf (aref rule-array id) rule) (when group-comment (setf (aref group-comments id) group-comment))) (t (vector-push-extend rule rule-array) (vector-push-extend group-comment group-comments))) ;; store rule both with the primary feature of the resolved target ;; and the unresolved target as keys (let ((table (if (eq domain '@w) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))) (let ((rules (or (gethash domain domain-rules) (setf (gethash domain domain-rules) (make-hash-table :size 5))))) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5)))))) (found-rule nil)) (dolist (features resolved-target) (cond ((consp features) ; a feature set (base-form-error? (car features) features) (let ((old-cdr+rule (find (cons (cdr features) rule) (gethash (car features) table) :test #'stored-rules-equal-p))) (if old-cdr+rule (setf found-rule (cdr old-cdr+rule)) ;; new 20.06.2001; put new version at same place as old version (%add-features+rule cg rule table (car features) (cdr features) t) #+ignore (pushnew (cons (cdr features) rule) (gethash (car features) table) :test #'stored-rules-equal-p)))) (t (base-form-error? features features) (let ((old-cdr+rule (find (cons nil rule) (gethash features table) :test #'stored-rules-equal-p))) (if old-cdr+rule (setf found-rule (cdr old-cdr+rule)) (%add-features+rule cg rule table features) #+ignore (pushnew (cons nil rule) (gethash features table) :test #'stored-rules-equal-p)))))) (unless (consp target) (base-form-error? target target) ;; new 20.06.2001; put new version at same place as old version (cond ((and rule-previous-version (find rule-previous-version (gethash target table))) (setf (gethash target table) (nsubstitute rule rule-previous-version (gethash target table)))) (t (pushnew rule (gethash target table) :test #'stored-rules-equal-p)))) ;; return old rule if found found-rule)))))) #+old (defun %add-features+rule (rule table feature &optional rest-features) (pushnew (cons rest-features rule) (gethash feature table) :test #'stored-rules-equal-p)) #+ignore (defmacro insert (value place predicate &optional (key #'identity)) (multiple-value-bind (dummies vals store-var setter getter) (get-setf-method place) (let ((valvar (gensym))) `(let* ((,valvar ,value) ,@(mapcar #'list dummies vals) (,(car store-var) (cons ,valvar ,getter))) ,@dummies ,(car store-var) ,setter)))) ;; move to u! use get-setf-method (defmacro insert (elt place predicate &key (key #'identity)) (let ((list (gensym)) (head (gensym))) `(labels ((sub-insert (,head ,list) (cond ((or (null ,list) (funcall ,predicate (funcall ,key ,elt) (funcall ,key (car ,list)))) (if (eq ,list ,place) (setf ,place (cons ,elt ,list)) (setf (cdr ,head) (cons ,elt ,list)))) (t (sub-insert ,list (cdr ,list)))))) (sub-insert nil ,place) ,place))) (defmethod %add-features+rule ((cg constraint-grammar) rule table feature &optional rest-features replace-prev-version-if-possible-p) (let* ((*cg* cg) (*tagger* (multi-tagger cg)) (prev-features+rule (and replace-prev-version-if-possible-p (find (rule-id rule) (gethash feature table) :key (lambda (f+r) (rule-id (cdr f+r))))))) ;;(print (list feature (gethash feature table) prev-features+rule)) (if prev-features+rule (setf (gethash feature table) (nsubstitute (cons rest-features rule) prev-features+rule (gethash feature table))) (if (gethash feature table) (insert (cons rest-features rule) (gethash feature table) #'target< :key #'car) (setf (gethash feature table) (list (cons rest-features rule))))) ;;(print (list feature (gethash feature table))) #+ignore (pushnew (cons rest-features rule) (gethash feature table) :test #'stored-rules-equal-p))) (defun feature< (feature1 feature2) (with-slots (feature-precedence) *cg* (let* ((c1 (feature-code feature1)) (c2 (feature-code feature2)) (pos1 (position c1 feature-precedence)) (pos2 (position c2 feature-precedence))) (cond ((and pos1 pos2) (< pos1 pos2)) (pos1 t) (pos2 nil) (t (< c1 c2)))))) (defun target< (target1 target2) (let ((f1 (car target1)) (f2 (car target2))) (cond ((and f1 f2) (or (feature< f1 f2) (and (eq f1 f2) (target< (cdr target1) (cdr target2))))) (f1 t) (f2 nil) (t nil)))) #+test (let* ((*cg* (gethash "nbo-feb" *cg-table*)) (*tagger* (multi-tagger *cg*))) (sort '((det) (det adj) (adj det) (verb subst) (verb) (verb det pron)) #'target<)) ;(defun parse-position-operator (op) ; "Returns values POS ABS-P CAREFUL-P SCAN-P BACKTRACK-P LEFT-LINK-P RIGHT-LINK-P" ;(optimize-constraints '((-2 subst) (-1 komma/konj) (not -1 clb) (1 komma) (2 subst))) ;; does not help (defun optimize-constraints (constraints) constraints #+ignore (let* ((rest (member-if (lambda (constraint) (let ((c (if (eq (car constraint) 'NOT) (cadr constraint) (car constraint)))) (multiple-value-bind (pos abs-p careful-p scan-p backtrack-p left-link-p right-link-p) (parse-position-operator c) (declare (ignore pos abs-p careful-p scan-p backtrack-p)) (or left-link-p right-link-p)))) constraints)) (main-constraints (if rest (loop for cs on constraints until (eq cs rest) collect (car cs)) (copy-seq constraints)))) (append (sort main-constraints (lambda (c1 c2) (let ((neg1 (eq (car c1) 'NOT)) (neg2 (eq (car c2) 'NOT))) (multiple-value-bind (pos1 abs-p1 careful-p1 scan-p1 backtrack-p1) (parse-position-operator (if neg1 (cadr c1) (car c1))) (declare (ignore abs-p1 backtrack-p1)) (multiple-value-bind (pos2 abs-p2 careful-p2 scan-p2 backtrack-p2) (parse-position-operator (if neg2 (cadr c2) (car c2))) (declare (ignore abs-p2 backtrack-p2)) #-ignore (or (and careful-p2 (not careful-p1)) (< pos1 pos2) (and (= pos1 pos2) (or (and neg1 (not neg2)) (and (not scan-p1) scan-p2)))) #+ignore ; 25479 (or (and (not scan-p1) scan-p2) (and careful-p2 (not careful-p1)) (< pos1 pos2) (and (= pos1 pos2) (not neg1) neg2)) #+ignore ; 23692 (or (and (not scan-p1) scan-p2) (and careful-p2 (not careful-p1)) (> pos1 pos2) (and (= pos1 pos2) neg2 (not neg1)))))))) rest))) ;; new 25.2.2001 #+disambiguation-rule-tree (defmethod make-rule ((cg constraint-grammar) domain operator target constraints id comment &optional rule-previous-version group-comment) (labels ((base-form-error? (feature features) ; (9.1.) (when (stringp feature) (error "The first feature of a target may not be a base form: ~s" features)))) (multiple-value-bind (rule-class heuristic-level) (operator-to-rule-class cg operator) (let ((domain (if (eq domain '@w) domain (string-trim "<>" domain))) (resolved-target (resolve-set cg target))) (when (stringp domain) (remhash domain (%domain-feature-codes cg))) ;; has to be recalculated (with-slots (rules domain-rules rule-array group-comments language) cg (dolist (feature (car resolved-target)) (let* ((boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (%id (if (and id (< id (fill-pointer rule-array))) id (fill-pointer rule-array))) (rule (make-instance rule-class :language language :type operator :domain (unless (eq domain '@w) domain) :target resolved-target :constraints (optimize-constraints constraints) :boundary-mode boundary-mode :comment comment :heuristic-level heuristic-level ;; *** still needed?? :previous-version rule-previous-version :id %id))) (cond ((eq %id id) ; id exists already (setf (aref rule-array id) rule) (when group-comment (setf (aref group-comments id) group-comment))) (t (vector-push-extend rule rule-array) (vector-push-extend group-comment group-comments))) (if (typep rule 'syntactic-disambiguation-rule) ; *** better done via class dispatch!! (let* ((table (if (eq domain '@w) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))) (let ((rules (or (gethash domain domain-rules) (setf (gethash domain domain-rules) (make-hash-table :size 5))))) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))))))) (when group-comment (push rule (cdr group-comment))) ;; collect rules with same group (add-rule-to-tree cg rule (or (gethash feature table) (setf (gethash feature table) (make-instance 'constraint-node))))) (let* ((node (if (eq domain '@w) (or (gethash operator rules) (setf (gethash operator rules) (make-instance 'constraint-node))) (let ((rules (or (gethash domain domain-rules) (setf (gethash domain domain-rules) (make-hash-table :size 5))))) (or (gethash operator rules) (setf (gethash operator rules) (make-instance 'constraint-node))))))) (when group-comment (push rule (cdr group-comment))) ;; collect rules with same group (add-rule-to-tree cg rule node)))) nil)))))) #+(and old disambiguation-rule-tree) (defmethod make-rule ((cg constraint-grammar) domain operator target constraints id comment &optional rule-previous-version group-comment) (labels ((base-form-error? (feature features) ; (9.1.) (when (stringp feature) (error "The first feature of a target may not be a base form: ~s" features)))) (multiple-value-bind (rule-class heuristic-level) (operator-to-rule-class cg operator) (let ((domain (if (eq domain '@w) domain (string-trim "<>" domain))) (resolved-target (resolve-set cg target))) (with-slots (rules domain-rules rule-array group-comments language) cg (dolist (feature (car resolved-target)) (let* ((boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (%id (if (and id (< id (fill-pointer rule-array))) id (fill-pointer rule-array))) (rule (make-instance rule-class :language language :type operator :domain (unless (eq domain '@w) domain) :target resolved-target :constraints constraints ;(optimize-constraints constraints) :boundary-mode boundary-mode :comment comment :heuristic-level heuristic-level ;; *** still needed?? :previous-version rule-previous-version :id %id))) (cond ((eq %id id) ; id exists already (setf (aref rule-array id) rule) (when group-comment (setf (aref group-comments id) group-comment))) (t (vector-push-extend rule rule-array) (vector-push-extend group-comment group-comments))) (let* ((table (if (eq domain '@w) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))) (let ((rules (or (gethash domain domain-rules) (setf (gethash domain domain-rules) (make-hash-table :size 5))))) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))))))) (when group-comment (push rule (cdr group-comment))) ;; collect rules with same group (add-rule-to-tree cg rule (or (gethash feature table) (setf (gethash feature table) (make-instance 'constraint-node)))))) nil)))))) (defmethod make-syntactic-rule ((cg constraint-grammar) domain operator target constraints id comment rule-previous-version group-comment) "The (resolved) TARGET is a disjunction of syntactic features. Store the rule under each feature." (assert (atom target)) (unless (consp target) (assert (or (@-set-p target) (&-set-p target)))) (multiple-value-bind (rule-class heuristic-level) (operator-to-rule-class cg operator) (let ((domain (if (eq domain '@w) domain (string-trim "<>" domain))) (resolved-target (list target) #+ignore (if (consp target) target (resolve-set cg target)))) (with-slots (rules domain-rules rule-array group-comments language) cg (dolist (s-feature resolved-target) ;;(print (list :s-feature s-feature)) (let* ((boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (%id (if (and id (< id (fill-pointer rule-array))) id (fill-pointer rule-array))) (rule (make-instance rule-class :heuristic-level heuristic-level ;; ** needed?? :language language :type operator :domain (unless (eq domain '@w) domain) :target s-feature :constraints #+test constraints (order-constraints constraints) ;; 12.12.2002 :boundary-mode boundary-mode :comment comment :previous-version rule-previous-version :id %id))) (cond ((eq %id id) ; id exists already (setf (aref rule-array id) rule) (when group-comment (setf (aref group-comments id) group-comment))) (t (vector-push-extend rule rule-array) (vector-push-extend group-comment group-comments))) (let* ((table (if (eq domain '@w) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))) (let ((rules (or (gethash domain domain-rules) (setf (gethash domain domain-rules) (make-hash-table :size 5))))) (or (gethash operator rules) (setf (gethash operator rules) (make-hash-table :size 5))))))) (when group-comment (push (rule-id rule) (cdr group-comment))) ;; collect rules with same group (add-rule-to-tree cg rule (or (gethash s-feature table) (setf (gethash s-feature table) (make-instance 'constraint-node)))))) nil))))) ;; somewhat hacky (defmethod mapping-rule-class+accessor ((cg constraint-grammar) labels) (declare (ignore labels)) (values 'mapping-rule #'morphosyntactic-mappings #'syntactic-functions)) #+copy (defmethod mapping-rule-class+accessor ((cg ne-constraint-grammar) labels) (print (cons :labels labels)) (if (gethash (car labels) (syntactic-functions cg)) (values 'mapping-rule #'morphosyntactic-mappings #'syntactic-functions) (values 'named-entity-mapping-rule #'named-entity-mappings #'named-entity-tags))) (defun %nthcdr (n list) (nthcdr n list)) (defun (setf %nthcdr) (value index list) (if (= index 0) value (setf (cdr (nthcdr (1- index) list)) value))) (defmethod make-mapping-rule ((cg constraint-grammar) target constraints labels id comment &optional rule-previous-version group-comment insert-before) (with-slots (rule-array group-comments language) cg (multiple-value-bind (rule-class rule-table-accessor tags-accessor) (mapping-rule-class+accessor cg labels) (when (find-if-not (lambda (label) (gethash label (funcall tags-accessor cg))) labels) (error "All labels must be declared as syntactic functions: ~s" labels)) (let* ((mappings (funcall rule-table-accessor cg)) (boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) ;; not used in "norsk-v2" (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (%id (if (and id (< id (fill-pointer rule-array))) id (fill-pointer rule-array))) (rule (make-instance rule-class :language language :target target :constraints constraints :labels labels :boundary-mode boundary-mode :comment comment :previous-version rule-previous-version :id %id))) (prog1 (cond (insert-before (let ((rule-pos (position insert-before (gethash (car target) mappings) :key (lambda (pair) (rule-id (cdr pair)))))) #+test (assert (not (null rule-pos))) (if (or (null rule-pos) (zerop rule-pos)) (push (cons (cdr target) rule) (gethash (car target) mappings)) (push (cons (cdr target) rule) (%nthcdr rule-pos (gethash (car target) mappings))))) nil) (t (let ((old-cdr+rule (find (cons (cdr target) rule) (gethash (car target) mappings) :test #'stored-rules-equal-p))) (if old-cdr+rule (cdr old-cdr+rule) ;; return old rule (progn (push (cons (cdr target) rule) (gethash (car target) mappings)) nil))))) ;; store rule in rule array ;; *** OBS: rule should only be stored if it is not already in one of the ;; rule tables. Isn't very important though. (cond ((eq %id id) ; id exists already (setf (aref rule-array id) rule) (when group-comment (setf (aref group-comments id) group-comment))) (t (vector-push-extend rule rule-array) (vector-push-extend group-comment group-comments))) (when group-comment (push (rule-id rule) (cdr group-comment))) ;; collect rules with same group ))))) (defmethod update-rule-constraints ((cg constraint-grammar) (rule rule) constraints) (let* ((boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints))) #+ignore-yet(backup-old-rule rule) (setf (rule-constraints rule) constraints (boundary-mode rule) boundary-mode))) (defmethod make-morphological-heuristics-rule ((cg constraint-grammar) target string-conditions string-changes constraints) (assert (and (eq (car string-conditions) '@1) ; silly convention! (eq (car string-changes) '@2))) (with-slots (morphological-heuristics language) cg (let* ((boundary-mode (when (find (car (last constraints)) '(**clb **clb-c)) (car (last constraints)))) (constraints (if boundary-mode (butlast constraints) constraints)) (rule (make-instance 'morphological-heuristics-rule ;:cg cg :language language :target target :constraints constraints :boundary-mode boundary-mode :string-conditions (when (cadr string-conditions) (cdr string-conditions)) :string-changes (when (cadr string-changes) (cdr string-changes))))) (pushnew rule morphological-heuristics :test #'stored-rules-equal-p)))) (defparameter *show-used-rules* t) (defparameter *tried-rules-count* 0) (defparameter *used-rules-count* 0) ;; Record the rule used. (defmethod apply-rule :around ((rule rule) (sentence array) position reading) (declare (ignore reading)) (when (call-next-method) (when *show-used-rules* (let ((word-list (aref sentence position))) (setf (cdar word-list) (cons (concat (write-to-string (language *tagger*)) (write-to-string (rule-id rule)) (write-to-string (rule-type rule))) (cdar word-list))))) t)) (defmethod rule-string ((obj t)) (format nil "~a" obj)) (defmethod rule-string ((rule rule)) (concat (case (language *tagger*) (:nbo "b") (:nny "n") (t "")) (write-to-string (rule-id rule)) (write-to-string (rule-type rule)))) (defmethod apply-rule :around ((rule rule) (sentence sentence) position reading) (declare (ignore reading)) (incf *tried-rules-count*) (when (call-next-method) (incf *used-rules-count*) (when *show-used-rules* (push rule #+ignore (rule-string rule) (token-used-rules (nth-token position sentence)))) t)) ;; debug (defmethod apply-rule ((rule-id integer) sentence position reading) (apply-rule (aref (rule-array *cg*) rule-id) sentence position reading)) ;; ******** these need special treatment!! #-disambiguation-rule-tree (defmethod apply-rule ((rule strong-select-rule) sentence position reading) (cond ((check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule)) (when *record-discarded-correct-readings-p* (let ((reading (nth reading (cohort sentence position)))) (loop for cohort in (cohort sentence position) unless (eq cohort reading) do (record-discarded-correct-readings sentence position cohort)))) (setf (cohort sentence position) (list (nth reading (cohort sentence position))))) (t (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position (nth reading (cohort sentence position)))) (setf (nth reading (cohort sentence position)) nil))) ;; this rule does always have some inpact t) (defmethod record-discarded-correct-readings ((sentence sentence) position reading) (with-slots (sentence-array) sentence (let* ((token (aref sentence-array position)) (features (cdr reading))) (when (and features (has-feature-p features ')) (setf (correct-reading-discarded-p token) t))))) ;;; *** this is buggy!!! should be applied regardless of constraints #+disambiguation-rule-tree (defmethod apply-rule ((rule strong-select-rule) sentence position reading) (if (check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule)) (setf (cohort sentence position) (list (nth reading (cohort sentence position)))) (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position reading)) (setf (nth reading (cohort sentence position)) nil)) ;; this rule does always have some inpact t) #-disambiguation-rule-tree (defmethod apply-rule ((rule select-rule) sentence position reading) (declare (ignore reading)) (when (check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule)) (let ((at-least-one-reading-discarded-p nil)) (setf (cohort sentence position) (loop for reading in (cohort sentence position) if (and reading (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features reading))) collect reading else collect (when reading (setf at-least-one-reading-discarded-p t) (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position reading)) #+verbose (format t "~%select-discarded (~d): ~s" (rule-id rule) reading) nil) ;; needed because (the position of the current) reading must not change )) at-least-one-reading-discarded-p))) #+disambiguation-rule-tree (defmethod apply-rule ((rule select-rule) sentence position reading) (declare (ignore reading)) (let ((at-least-one-reading-discarded-p nil)) (loop for reading+rest on (cohort sentence position) do (unless (and (car reading+rest) (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features (car reading+rest)))) (when (car reading+rest) (setf at-least-one-reading-discarded-p t) (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position (cdr reading+test)))) (setf (car reading+rest) nil))) at-least-one-reading-discarded-p)) #+(and disambiguation-rule-tree old) (defmethod apply-rule ((rule select-rule) sentence position reading) (declare (ignore reading)) (let ((at-least-one-reading-discarded-p nil)) (setf (cohort sentence position) (loop for reading in (cohort sentence position) if (and reading (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features reading))) collect reading else collect (when reading (setf at-least-one-reading-discarded-p t) (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position reading)) nil) ;; needed because (the position of the current) reading must not change )) at-least-one-reading-discarded-p)) #-disambiguation-rule-tree (defmethod apply-rule ((rule discard-rule) sentence position reading) (declare (ignore reading)) (when ;; make sure that we won't discard all readings if rule applies (loop for reading in (cohort sentence position) thereis (and reading (not (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features reading))))) (when (check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule)) (dotimes (n (length (cohort sentence position))) (let ((r (nth n (cohort sentence position)))) (when (and r (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features r))) (when *record-discarded-correct-readings-p* (record-discarded-correct-readings sentence position r)) (setf (nth n (cohort sentence position)) nil)))) t))) #+disambiguation-rule-tree (defmethod apply-rule ((rule discard-rule) sentence position reading) (declare (ignore reading)) (dotimes (n (length (cohort sentence position))) (let ((r (nth n (cohort sentence position)))) (when (and r (loop for features in (rule-target rule) thereis (feature-subset-p *cg* features r))) (setf (nth n (cohort sentence position)) nil)))) t) (defmethod apply-rule ((rule clb-rule) sentence position reading) (when (check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule)) (setf (nth reading (cohort sentence position)) (cons 'clb (cdr (nth reading (cohort sentence position))))) t)) ;; (8.) (defmethod apply-rule ((rule mapping-rule) sentence position reading) (let ((reading (nth reading (cohort sentence position)))) ;;(let ((*print-constraints-p* (= (rule-id rule) 65))) ;; (when *print-constraints-p* (print (list :reading! reading))) (when #-rule-tree-xx(check-constraints *cg* sentence position (rule-constraints rule) (boundary-mode rule) nil nil (list reading) ;;nil ;; this is wrong #+bug reading) #+rule-tree-xx t (dolist (label (rule-labels rule)) (set-feature (cdr reading) label)) t)));;) ;; (12.) (defmethod apply-rule ((rule syntactic-select-rule) sentence position reading) (declare (ignore sentence position)) (dolist (code (syntactic-function-codes *cg*)) (setf (sbit (cdr reading) code) (if (= code (feature-code (rule-target rule))) 1 0))) t) ;; (12.) (defmethod apply-rule ((rule syntactic-discard-rule) sentence position reading) (declare (ignore sentence position)) (setf (sbit (cdr reading) (feature-code (rule-target rule))) 0) t) (defun check-string-condition (condition word) (if (eq (car condition) 'NOT) (not (check-string-condition (cdr condition) word)) (destructuring-bind (op str) condition (let ((end (length word)) (len (length str))) (ecase op (* (search str word :start2 0 :end2 end)) (^ (and (>= end (length str)) (string= str word :start2 0 :end2 len))) (~ (and (> end 2) (search str word :start2 1 :end2 (1- end)))) ($ (and (>= end (length str)) (string= str word :start2 (- end len) :end2 end)))))))) (defun check-string-conditions (conditions word) (loop for condition in conditions always (check-string-condition condition word))) ;; (14.) (defmethod apply-rule ((rule morphological-heuristics-rule) sentence position ignore) (declare (ignore ignore)) (with-slots (target constraints string-conditions string-changes) rule (let ((word (sentence-word sentence position) ;; *** ??? why not CAAR? #+old (car (aref sentence position)))) (when (and (check-string-conditions string-conditions word) (check-constraints *cg* sentence position constraints (boundary-mode rule))) (let ((base-form word #+old(subseq word 1 (1- (length word))))) (destructuring-bind (&optional strip add) string-changes (when strip ; what if BASE-FORM does not end in STRIP? (let ((pos (- (length base-form) (length strip)))) (when (and (> pos 0) (string= base-form strip :start1 pos)) (setf base-form (subseq base-form 0 pos))))) (when add (setf base-form (concatenate 'string base-form add)))) (setf (cohort sentence position) #+old(cdr (aref sentence position)) ;; there might happen destructive changes in target! (mapcar (lambda (reading) (cons base-form (copy-seq reading))) target)) t))))) (defmethod apply-rule :after ((rule rule) sentence position reading) (declare (ignore position reading sentence)) (incf (rule-frequency rule))) ;; optimizing rule order (19. p.58) ;; *** Still has to be invoked manually. ******************* (defmethod optimize-rule-order ((cg constraint-grammar)) (with-slots (rules) cg (dolist (type '(=! =!! =0 =!h =!!h =0h)) (when-let (table (gethash type rules)) (maphash (lambda (feature rule-list) (setf (gethash feature table) (sort rule-list #'> :key (lambda (fs+rule) (rule-frequency (cdr fs+rule)))))) table))) rules)) ;(optimize-rule-order *nbo-cg*) #+old (defmethod optimize-rule-order ((cg constraint-grammar)) (with-slots (disambiguation-constraints heuristic-disambiguation-constraints syntactic-constraints heuristic-syntactic-constraints morphosyntactic-mappings) cg (dolist (table (list disambiguation-constraints heuristic-disambiguation-constraints syntactic-constraints (aref heuristic-syntactic-constraints 0) (aref heuristic-syntactic-constraints 1) (aref heuristic-syntactic-constraints 2) (aref heuristic-syntactic-constraints 3) morphosyntactic-mappings)) (optimize-rule-order table)))) ;; tull? (defun min-context (rule) (reduce #'min (rule-constraints rule) :key (lambda (constraint) (or (parse-position-operator (if (eq 'not (car constraint)) (cadr constraint) (car constraint))) 1000)) :initial-value 1000)) #+old (defmethod optimize-rule-order ((table hash-table)) (maphash (lambda (key rule-list) (setf (gethash key table) (stable-sort rule-list (lambda (r1 r2) (let* ((rl1 (if (atom r1) (cons nil r1) r1)) (rl2 (if (atom r2) (cons nil r2) r2)) (r1 (cdr rl1)) (r2 (cdr rl2)) ;(l1 (car rl1)) ;(l2 (car rl2)) ) (cond ;; try rules with non-null domain first ((and (rule-domain r1) (not (rule-domain r2))) t) ((and (not (rule-domain r1)) (rule-domain r2)) nil) #+ignore ((not (eq (rule-type r1) (rule-type r2))) (or (member (rule-type r1) '(=0 =s0)) (and (member (rule-type r1) '(=! =!! =s!)) (not (member (rule-type r2) '(=0 =s0)))))) #+ign ((< (length l1) (length l2)) nil) #+ign ((> (length l1) (length l2)) t) #+ign ((< (min-context r1) (min-context r2)) t) #+ign ((> (min-context r1) (min-context r2)) nil) #+ignore (t (print (< (rule-frequency r1) (rule-frequency r2)))) #-ignore ((eq (rule-type r1) (rule-type r2)) (> (rule-frequency r1) (rule-frequency r2))) (t ;; discard < select (or (member (rule-type r1) '(=0 =s0)) (and (member (rule-type r1) '(=! =!! =s!)) (not (member (rule-type r2) '(=0 =s0))))) t) )))))) table)) #+test (optimize-rule-order *cg*) #+new (defmethod optimize-rule-order ((table hash-table)) (maphash (lambda (key rule-list) (setf (gethash key table) (stable-sort rule-list (lambda (r1 r2) (let* ((rl1 (if (atom r1) (cons nil r1) r1)) (rl2 (if (atom r2) (cons nil r2) r2)) (r1 (cdr rl1)) (r2 (cdr rl2)) (l1 (car rl1)) (l2 (car rl2))) (cond ;; try rules with non-null domain first ((and (rule-domain r1) (not (rule-domain r2))) t) ((and (not (rule-domain r1)) (rule-domain r2)) nil) #+ignore ((not (eq (rule-type r1) (rule-type r2))) (or (member (rule-type r1) '(=0 =s0)) (and (member (rule-type r1) '(=! =!! =s!)) (not (member (rule-type r2) '(=0 =s0)))))) ((< (length l1) (length l2)) nil) ((> (length l1) (length l2)) t) ;; new ((< (min-context r1) (min-context r2)) t) ((> (min-context r1) (min-context r2)) nil) ((not (eq (rule-type r1) (rule-type r2))) ;; discard < select (or (member (rule-type r1) '(=0 =s0)) (and (member (rule-type r1) '(=! =!! =s!)) (not (member (rule-type r2) '(=0 =s0)))))) ((< (length (rule-constraints r1)) (length (rule-constraints r2))) t) ((> (length (rule-constraints r1)) (length (rule-constraints r2))) nil))))))) table)) ;; adj < subst: 19 ;; det < adj: 29 ;; subst < verb: 321 ;; verb < subst: 209 ;; test (defun sort-cohort (cohort cg) (labels ((precedes-p (x y &optional (precedence-list (feature-precedence cg))) (let ((code (car precedence-list))) (or (null x) (null y) (null code) (has-feature-code-p x code) (and (not (has-feature-code-p y code)) (precedes-p x y (cdr precedence-list))))))) (stable-sort cohort #'precedes-p :key #'cdr))) (defun sort-cohorts (sentence) (loop for i from 1 to (1- (sentence-length sentence)) do (setf (cohort sentence i) (sort-cohort (cohort sentence i) (constraint-grammar sentence)))) sentence) (defmethod feature-subset-p ((cg constraint-grammar) primitives feature-list) "Checks if all the primitives occur in FEATURE-LIST." (has-features-p (cdr feature-list) primitives)) (defmethod code-vector-sort-array ((cg constraint-grammar)) (code-vector-sort-array (multi-tagger cg))) (defmethod feature-vector ((cg constraint-grammar)) (feature-vector (multi-tagger cg))) #+obsolete (defmethod disambiguation-domain-feature-codes ((cg constraint-grammar) domain heuristic-rules-p) (let ((features (if heuristic-rules-p (%heuristic-disambiguation-domain-features cg) (%disambiguation-domain-features cg)))) (multiple-value-bind (feature-codes exists-p) (gethash domain features) (if exists-p feature-codes (setf (gethash domain features) (when-let (domain-table (gethash domain (if heuristic-rules-p (heuristic-disambiguation-constraints cg) (disambiguation-constraints cg)))) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature domain-table) collect (feature-code feature)))))))) #+obsolete (defmethod disambiguation-domain-feature-codes ((cg constraint-grammar) domain heuristic-rules-p) (let ((features (if heuristic-rules-p (%heuristic-disambiguation-domain-features cg) (%disambiguation-domain-features cg)))) (multiple-value-bind (feature-codes exists-p) (gethash domain features) (if exists-p feature-codes (setf (gethash domain features) (when-let (domain-table (gethash domain (domain-rules cg))) (collecting (dolist (type (if heuristic-rules-p '(=!h =!!h =0h) '(=! =!! =0))) (when-let (table (gethash type domain-table)) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature table) do (collect (feature-code feature)))))))))))) #+test (rule-type-feature-codes (gethash "nbo-feb02" *cg-table*) '=0) #-disambiguation-rule-tree (defmethod rule-type-feature-codes ((cg constraint-grammar) type &optional domain) (with-slots (rules %feature-codes domain-rules %domain-feature-codes) cg (if domain (multiple-value-bind (domain-table exists-p) (gethash domain %domain-feature-codes) (if exists-p (gethash type domain-table) (multiple-value-bind (domain-table exists-p) ;; find out if domain is used (gethash domain domain-rules) (declare (ignore domain-table)) (when exists-p (let ((domain-table (make-hash-table))) (setf (gethash domain %domain-feature-codes) domain-table) (dolist (type (collecting (maphash (lambda (type val) (declare (ignore val)) (collect type)) (gethash domain domain-rules)))) (setf (gethash type domain-table) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (when-let (type-rules (gethash type (gethash domain domain-rules))) (gethash feature type-rules)) collect (feature-code feature))))))))) (multiple-value-bind (feature-codes exists-p) (gethash type %feature-codes) (if exists-p feature-codes (setf (gethash type %feature-codes) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature (gethash type (rules cg))) collect (feature-code feature)))))))) #+obsolete (defmethod disambiguation-select-feature-codes ((cg constraint-grammar) heuristic-rules-p) (macrolet ((slot () `(if heuristic-rules-p '%heuristic-disambiguation-select-features '%disambiguation-select-features))) (when (eq (slot-value cg (slot)) t) (setf (slot-value cg (slot)) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature (gethash (if heuristic-rules-p '=!h '=!) (rules cg))) collect (feature-code feature)))) (slot-value cg (slot)))) #+obsolete (defmethod disambiguation-discard-feature-codes ((cg constraint-grammar) heuristic-rules-p) (macrolet ((slot () `(if heuristic-rules-p '%heuristic-disambiguation-discard-features '%disambiguation-discard-features))) (when (eq (slot-value cg (slot)) t) (setf (slot-value cg (slot)) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature (gethash (if heuristic-rules-p '=0h '=0) (rules cg))) collect (feature-code feature)))) (slot-value cg (slot)))) #+obsolete (defmethod disambiguation-select-feature-codes ((cg constraint-grammar) heuristic-rules-p) (macrolet ((slot () `(if heuristic-rules-p '%heuristic-disambiguation-select-features '%disambiguation-select-features))) (when (eq (slot-value cg (slot)) t) (setf (slot-value cg (slot)) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature (gethash '=! (if heuristic-rules-p (heuristic-disambiguation-constraints cg) (disambiguation-constraints cg)))) collect (feature-code feature)))) (slot-value cg (slot)))) #+ignore (let ((*cg* *nbo-cg*) (*tagger* *nbo-tagger*)) (disambiguation-discard-feature-codes *cg* t)) #+obsolete (defmethod disambiguation-discard-feature-codes ((cg constraint-grammar) heuristic-rules-p) (macrolet ((slot () `(if heuristic-rules-p '%heuristic-disambiguation-discard-features '%disambiguation-discard-features))) (when (eq (slot-value cg (slot)) t) (setf (slot-value cg (slot)) (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature (gethash '=0 (if heuristic-rules-p (heuristic-disambiguation-constraints cg) (disambiguation-constraints cg)))) collect (feature-code feature)))) (slot-value cg (slot)))) (defmethod mapping-feature-codes ((cg constraint-grammar)) (with-slots (%mapping-features morphosyntactic-mappings) cg (when (eq %mapping-features t) (setf %mapping-features (loop for pos across (code-vector-sort-array cg) for feature = (aref (feature-vector cg) pos) when (gethash feature morphosyntactic-mappings) collect (feature-code feature)))) %mapping-features)) #-disambiguation-rule-tree (defmacro do-disambiguation-rules ((rule reading word-form cg heuristic-rules-p rule-type) &body body) "Loops over the RULEs applicable for the given reading executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (domain (gensym)) (%rule (gensym)) (code (gensym)) (type (gensym)) (stripped-type (gensym))) `(let ((,constraints-table (rules ,cg)) (,type ,rule-type) (,domain nil)) (declare (dynamic-extent ,domain)) (when-let (,stripped-type (type-strip-domain ,type)) (setf ,domain (string-downcase ,word-form)) (let ((,rules-table (gethash ,domain (domain-rules ,cg)))) #+debug (when ,rules-table (print (list ,domain (collecting (maphash (lambda (k v) (collect (cons k v))) ,rules-table))))) (setf ,constraints-table ,rules-table ,type ,stripped-type))) (when ,constraints-table (when-let (,rules-table (gethash (type+level-to-type ,type (if ,heuristic-rules-p 1 0)) ,constraints-table)) (loop for ,code in (rule-type-feature-codes ,cg (type+level-to-type ,type (if ,heuristic-rules-p 1 0)) ,domain) when (has-feature-code-p (cdr ,reading) ,code) do (dolist (,%rule (gethash (code-feature ,code) ,rules-table)) (let ((,rule (cond ((atom ,%rule) ,%rule) ((feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)) (t nil)))) (when ,rule ,@body))))))))) #+disambiguation-rule-tree (defmacro do-disambiguation-rules ((rule reading word-form sentence position cg heuristic-nivau rule-type) &body body) "Loops over the rules applicable for the given READING executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (rule-tree (gensym)) (type (gensym)) (level-type (gensym)) (stripped-type (gensym)) (domain (gensym))) `(let ((,constraints-table (rules ,cg)) (,type ,rule-type) (,domain nil)) (declare (dynamic-extent ,domain)) (when-let (,stripped-type (type-strip-domain ,type)) (setf ,domain (string-downcase ,word-form)) (let ((,rules-table (gethash ,domain (domain-rules ,cg)))) (setf ,constraints-table ,rules-table ,type ,stripped-type))) (when ,constraints-table (let ((,level-type (type+level-to-type ,type ,heuristic-nivau))) (when-let (,rule-tree (gethash ,level-type ,constraints-table)) (block feature-rules (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position (cdr ,reading)) ,@body)))))))) #+(and disambiguation-rule-tree old) (defmacro do-disambiguation-rules ((rule reading word-form sentence position cg heuristic-nivau rule-type) &body body) "Loops over the rules applicable for the given READING executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (rule-tree (gensym)) (code (gensym)) (type (gensym)) (level-type (gensym)) (stripped-type (gensym)) (domain (gensym))) `(let ((,constraints-table (rules ,cg)) (,type ,rule-type) (,domain nil)) (declare (dynamic-extent ,domain)) (when-let (,stripped-type (type-strip-domain ,type)) (setf ,domain (string-downcase ,word-form)) (let ((,rules-table (gethash ,domain (domain-rules ,cg)))) (setf ,constraints-table ,rules-table ,type ,stripped-type))) (when ,constraints-table (let ((,level-type (type+level-to-type ,type ,heuristic-nivau))) (when-let (,rules-table (gethash ,level-type ,constraints-table)) (loop for ,code in (rule-type-feature-codes ,cg ,level-type ,domain) when (has-feature-code-p (cdr ,reading) ,code) do (block feature-rules ;;(Print ,code) (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position ,reading) ;;(print (list "trying:" ,rule ,sentence ,position)) (when (feature-subset-p ,cg (cdar (rule-target ,rule)) ,reading) ;;(print (list ,rule ,sentence ,position)) ,@body))))))))))) ;; *nbo-cg* (defun type+level-to-type (type &optional (heuristic-level 0)) (case type (:select (case heuristic-level (0 '=!) (1 '=!h))) (:strong-select (case heuristic-level (0 '=!!) (1 '=!!h))) (:discard (case heuristic-level (0 '=0) (1 '=0h))) (:syntactic-select (ecase heuristic-level (0 '=s!) (1 '=s!h1) (2 '=s!h2) (3 '=s!h3))) (:syntactic-discard (ecase heuristic-level (0 '=s0) (1 '=s0h1) (2 '=s0h2) (3 '=s0h3))) (:named-entity-select (ecase heuristic-level (0 '=n!) (1 '=n!h1) (2 '=n!h2) (3 '=n!h3))) (:named-entity-discard (ecase heuristic-level (0 '=n0) (1 '=n0h1) (2 '=n0h2) (3 '=n0h3))) (:syntactic-map (case heuristic-level (0 '=m))) (:named-entity-map (case heuristic-level (0 '=nm))) (otherwise type))) (defmethod cohort-length ((token list)) (1- (length token))) (defmethod cohort-length ((token token)) (length (token-features token))) (defmethod token-cohort ((token list)) (cdr token)) (defmethod (setf token-cohort) (value (token list)) (setf (cdr token) value)) (defmethod token-cohort ((token token)) (token-features token)) (defmethod (setf token-cohort) (value (token token)) (setf (token-features token) value)) ;; *** preliminary, not thread safe! (defparameter *constraints-memo-table/check-all* (make-hash-table)) (defparameter *constraints-memo-table* (make-hash-table)) (defparameter *memo-counter* 0) #+copy (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional (heuristic-level 0) (codes (syntactic-function-codes cg)) (types '(:domains-syntactic-select :domains-syntactic-discard :syntactic-select :syntactic-discard))) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil)) (dotimes (i cohort-length) (let* ((reading (nth i cohort)) (features (cdr reading))) ;; disambiguation is needed only if there is more than one syntactic label (when (and reading (syntactic-functions-ambiguous-p (cdr reading) codes)) (block reading (dolist (type types) (do-syntactic-rules (rule word-form sentence position features cg heuristic-level type codes) (apply-rule rule sentence position reading) (setf label-discarded-p t) (when (or (select-rule-p type) (not (syntactic-functions-ambiguous-p (cdr reading) codes))) ;; successfully disambiguated (return-from reading)) (unless (zerop heuristic-level) ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)) (return-from feature-rules))))))) label-discarded-p)) ; *nbo-cg* ; *sentence* #+mcl (defun report-time (form thunk) (let ((initial-consed (total-bytes-allocated))) (multiple-value-prog1 (funcall thunk) (let* ((s *trace-output*) (bytes-consed (- (total-bytes-allocated) initial-consed (if (fixnump initial-consed) 0 16)))) (unless (eql 0 bytes-consed) (format s "~% ~:D bytes of memory allocated." bytes-consed)) (format s "~&"))))) #+mcl (defparameter *total-bytes* 0) #+mcl (defmacro runtime (&body body) (let ((run-start (gensym)) (real-start (gensym)) (initial-consed (gensym))) `(let ((,initial-consed (ccl::total-bytes-allocated)) (,real-start (get-internal-real-time)) (,run-start (get-internal-run-time))) (multiple-value-prog1 ,@body (format *trace-output* "~&Realtime: ~:d ms, runtime: ~:d ms; ~:d bytes allocated" (- (get-internal-real-time) ,real-start) (- (get-internal-run-time) ,run-start) (let ((bytes (- (ccl::total-bytes-allocated) ,initial-consed (if (ccl::fixnump ,initial-consed) 0 16)))) (incf *total-bytes* bytes) bytes)))))) #+test (runtime (let ((*total-bytes* 0)) (d "Dette er en test.") *total-bytes*)) (defmethod apply-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-rules-p) "Applies rules at one sentence position. Returns T if a disambiguation occurred." (declare (optimize speed) (fixnum position)) (let* ((token (nth-token position sentence)) (cohort-length (cohort-length token)) (word-form (word-form sentence position)) ;;(tried-rules ()) (reading-discarded-p nil)) (unless (<= cohort-length 1) (dolist (type '(:domains-select :domains-strong-select :domains-discard :select :strong-select :discard)) (dotimes (i cohort-length) (when-let (reading (nth i (token-cohort token))) (block reading (progn ;runtime (do-disambiguation-rules #+disambiguation-rule-tree (rule reading word-form sentence position cg (if heuristic-rules-p 1 0) type) #-disambiguation-rule-tree (rule reading word-form cg heuristic-rules-p type) (unless nil ;; (find rule tried-rules) (when (apply-rule rule sentence position i) (setf reading-discarded-p t) ;; check ordinary rules for whole sentence if heuristic rule ;; was successful (10.) (when (= (count-if-not #'null (token-cohort token)) 1) (setf (token-cohort token) (delete-if #'null (token-cohort token))) (return-from apply-disambiguation-rules t)) (when heuristic-rules-p (disambiguate-sentence cg sentence position)) ;; after successful application of heuristics rule try ;; ordinary disambiguation if there is ambiguity left (10.) (return-from reading))))))))) reading-discarded-p))) ;; simply maps over all rules in a rule tree and applies FN to the rules (defmethod map-tree-rules ((cg constraint-grammar) rule-tree fn) (labels ((walk (tree) (mapc fn (constraint-rules tree)) (mapc #'walk (child-constraints tree)))) (walk rule-tree))) ;; new 27.02.2001 (defmacro do-tree-rules ((rule cg rule-tree sentence position features &optional boundary-mode) &body body) (let ((constraint (gensym)) (next-constraint (gensym)) (check-all-p (gensym)) (satisfied-p/readings (gensym)) (link-pos (gensym)) (link-code (gensym)) (rules (gensym)) (c/a-p (gensym)) (sub-tree (gensym)) (code (gensym)) (not-c/a-p (gensym)) (c/a-satisfied-p/readings (gensym)) (c/a-link-pos (gensym)) (c/a-link-code (gensym))) `(labels ((walk-constraint (tree link-position link readings) (if (listp tree) ;; must be feature constraint (from rule target) (let ((,code (car tree))) (if (integerp ,code) ;; *** remove this later! (when (has-feature-code-p ,features ,code) (mapc (lambda (sub-tree) (walk-constraint sub-tree link-position link readings)) (cdr tree))) (walk-constraint (car tree) link-position link readings))) ;; is constraint node (let* ((,constraint (constraint tree)) (,rules (constraint-rules tree)) (,c/a-p nil) ; T if constraint has been tested (,not-c/a-p nil) ,c/a-satisfied-p/readings ,c/a-link-pos ,c/a-link-code ,satisfied-p/readings ,link-pos ,link-code) (when ,rules (multiple-value-setq (,satisfied-p/readings ,link-pos ,link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link)) (setf ,not-c/a-p t) (when ,satisfied-p/readings (labels ((walk-rule (,rule) (if (listp ,rule) (let ((,code (car ,rule))) (if (integerp ,code) ;; *** remove this later! (when (has-feature-code-p ,features ,code) (mapc #'walk-rule (cdr ,rule))) (walk-rule ,code))) (progn ,@body)))) (declare (dynamic-extent walk-rule)) (mapc #'walk-rule ,rules)))) (labels ((walk-child (,sub-tree) (if (listp ,sub-tree) (let ((,code (car ,sub-tree))) (if (integerp ,code) ;; *** remove this later! (when (has-feature-code-p ,features ,code) (mapc #'walk-child (cdr ,sub-tree))) (walk-child ,code))) ;; check if this and the next constraint apply to the same position ;; and are simple constraints (let* ((,next-constraint (constraint ,sub-tree)) (,check-all-p (and (integerp (car ,constraint)) (eql (car ,constraint) (car ,next-constraint))))) (cond (,check-all-p (unless ,c/a-p ; compute those only in case we need them (multiple-value-setq (,c/a-satisfied-p/readings ,c/a-link-pos ,c/a-link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link t readings)) (setf ,c/a-p t)) (when ,c/a-satisfied-p/readings (walk-constraint ,sub-tree ,c/a-link-pos ,c/a-link-code ,c/a-satisfied-p/readings))) (t (unless ,not-c/a-p (multiple-value-setq (,satisfied-p/readings ,link-pos ,link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link)) (setf ,not-c/a-p t)) (when ,satisfied-p/readings (walk-constraint ,sub-tree ,link-pos ,link-code nil)))))))) (declare (dynamic-extent walk-child)) (mapc #'walk-child (child-constraints tree))))))) (declare (dynamic-extent walk-constraint)) (walk-constraint ,rule-tree nil nil nil)))) (defmacro do-syntactic-tree-rules ((rule cg rule-tree sentence position &optional boundary-mode) &body body) (let ((constraint (gensym)) (next-constraint (gensym)) (check-all-p (gensym)) (satisfied-p/readings (gensym)) (link-pos (gensym)) (link-code (gensym)) (rules (gensym)) (c/a-p (gensym)) (not-c/a-p (gensym)) (c/a-satisfied-p/readings (gensym)) (c/a-link-pos (gensym)) (c/a-link-code (gensym))) `(labels ((walk (tree link-position link readings) (let* ((,constraint (constraint tree)) (,rules (constraint-rules tree)) (,c/a-p nil) ; T if constraint has been tested (,not-c/a-p nil) ,c/a-satisfied-p/readings ,c/a-link-pos ,c/a-link-code ,satisfied-p/readings ,link-pos ,link-code) (when ,rules (multiple-value-setq (,satisfied-p/readings ,link-pos ,link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link)) (setf ,not-c/a-p t) (when ,satisfied-p/readings (dolist (,rule ,rules) (progn ,@body)))) (dolist (sub-tree (child-constraints tree)) ;; check if this and the next constraint apply to the same position ;; and are simple constraints (let* ((,next-constraint (constraint sub-tree)) (,check-all-p (and (integerp (car ,constraint)) (eql (car ,constraint) (car ,next-constraint))))) (cond (,check-all-p (unless ,c/a-p ; compute those only in case we need them (multiple-value-setq (,c/a-satisfied-p/readings ,c/a-link-pos ,c/a-link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link t readings)) (setf ,c/a-p t)) (when ,c/a-satisfied-p/readings (walk sub-tree ,c/a-link-pos ,c/a-link-code ,c/a-satisfied-p/readings))) (t (unless ,not-c/a-p (multiple-value-setq (,satisfied-p/readings ,link-pos ,link-code) (check-constraint ,cg ,sentence ,position ,constraint ,boundary-mode link-position link)) (setf ,not-c/a-p t)) (when ,satisfied-p/readings (walk sub-tree ,link-pos ,link-code nil))))))))) (walk ,rule-tree nil nil nil)))) (defmacro do-syntactic-rules ((rule word-form sentence position features cg heuristic-level rule-type codes) &body body) "Loops over the rules applicable for the given READING executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (rule-tree (gensym)) (code (gensym)) (type (gensym)) (stripped-type (gensym)) (domain (gensym))) `(let ((,constraints-table (rules ,cg)) (,type ,rule-type) (,domain nil)) (declare (dynamic-extent ,domain)) (when-let (,stripped-type (type-strip-domain ,type)) (setf ,domain (string-downcase ,word-form)) (let ((,rules-table (gethash ,domain (domain-rules ,cg)))) (setf ,constraints-table ,rules-table ,type ,stripped-type))) (when ,constraints-table (when-let (,rules-table (gethash (type+level-to-type ,type ,heuristic-level) ,constraints-table)) (loop for ,code in ,codes when (has-feature-code-p ,features ,code) do (block feature-rules (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-syntactic-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body))))))))) (defun type-strip-domain (type) (case type (:domains-select :select) (:domains-strong-select :strong-select) (:domains-discard :discard) (:domains-syntactic-select :syntactic-select) (:domains-syntactic-discard :syntactic-discard) (:domains-named-entity-select :named-entity-select) (:domains-named-entity-discard :named-entity-discard) (otherwise nil))) #+copy (defun type+level-to-type (type heuristic-level) (ecase type (:select (ecase heuristic-level (0 '=!) (1 '=!h))) (:strong-select (ecase heuristic-level (0 '=!!) (1 '=!!h))) (:discard (ecase heuristic-level (0 '=0) (1 '=0h))) (:syntactic-select (ecase heuristic-level (0 '=s!) (1 '=s!h1) (2 '=s!h2) (3 '=s!h3))) (:syntactic-discard (ecase heuristic-level (0 '=s0) (1 '=s0h1) (2 '=s0h2) (3 '=s0h3))) (:named-entity-select (ecase heuristic-level (0 '=n!) (1 '=n!h1) (2 '=n!h2) (3 '=n!h3))) (:named-entity-discard (ecase heuristic-level (0 '=n0) (1 '=n0h1) (2 '=n0h2) (3 '=n0h3))))) #+old (defmacro do-syntactic-rules ((rule word-form sentence position features cg heuristic-level rule-type &optional table codes) &body body) "Loops over the rules applicable for the given READING executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (rule-tree (gensym)) (function-codes (gensym)) (code (gensym)) (downcase-form (gensym))) `(let ((,constraints-table (or ,table (if ,heuristic-level (aref (heuristic-syntactic-constraints ,cg) ,heuristic-level) (syntactic-constraints ,cg)))) (,function-codes (or ,codes (syntactic-function-codes ,cg)))) ,(ecase rule-type (:domains `(let* ((,downcase-form (string-downcase ,word-form)) (,rules-table (gethash ,downcase-form ,constraints-table))) (declare (dynamic-extent ,downcase-form)) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) do (block feature-rules (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body))))))) ((=s! =n!) `(let ((,rules-table (gethash ',rule-type ,constraints-table))) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) do (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body)))))) ((=s0 =n0) `(let ((,rules-table (gethash ',rule-type ,constraints-table))) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) do (block feature-rules (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body))))))))))) #+old (defmacro do-syntactic-rules ((rule word-form sentence position features cg heuristic-level rule-type &optional table codes) &body body) "Loops over the rules applicable for the given READING executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (rule-tree (gensym)) (function-codes (gensym)) (code (gensym)) (downcase-form (gensym))) `(let ((,constraints-table (or ,table (if ,heuristic-level (aref (heuristic-syntactic-constraints ,cg) ,heuristic-level) (syntactic-constraints ,cg)))) (,function-codes (or ,codes (syntactic-function-codes ,cg)))) ,(ecase rule-type (:domains `(let* ((,downcase-form (string-downcase ,word-form)) (,rules-table (gethash ,downcase-form ,constraints-table))) (declare (dynamic-extent ,downcase-form)) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) do (block feature-rules (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body))))))) ((=s! =n!) `(let ((,rules-table (gethash ',rule-type ,constraints-table))) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) ;; do (print ,code) do (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body)))))) ((=s0 =n0) `(let ((,rules-table (gethash ',rule-type ,constraints-table))) (when ,rules-table (loop for ,code in ,function-codes when (has-feature-code-p ,features ,code) do (block feature-rules (when-let (,rule-tree (gethash (code-feature ,code) ,rules-table)) (do-tree-rules (,rule ,cg ,rule-tree ,sentence ,position) ,@body))))))))))) #+old (defmethod syntactic-function-codes ((cg constraint-grammar)) (with-slots (syntactic-functions %syntactic-function-codes) cg (when (eq %syntactic-function-codes t) (setf %syntactic-function-codes ()) (maphash (lambda (syntactic-function value) (declare (ignore value)) (push (feature-code syntactic-function) %syntactic-function-codes)) syntactic-functions)) %syntactic-function-codes)) (defmethod syntactic-function-codes ((cg constraint-grammar)) (with-slots (syntactic-functions %syntactic-function-codes) cg (when (eq %syntactic-function-codes t) (setf %syntactic-function-codes ()) (maphash (lambda (syntactic-function value) (declare (ignore value)) (push (feature-code syntactic-function) %syntactic-function-codes)) syntactic-functions)) %syntactic-function-codes)) (defun syntactic-functions-ambiguous-p (features syntactic-function-codes) (loop with foundp = nil for code in syntactic-function-codes when (= 1 (sbit features code)) do (if foundp (return-from syntactic-functions-ambiguous-p t) (setf foundp t))) nil) (defun select-rule-p (type) (find type '(:select :domains-select :syntactic-select :domains-syntactic-select :named-entity-select :domains-named-entity-select))) ;; 1.2.2001 (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional (heuristic-level 0) (codes (syntactic-function-codes cg)) (types '(:domains-syntactic-select :domains-syntactic-discard :syntactic-select :syntactic-discard))) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil)) (dotimes (i cohort-length) (let* ((reading (nth i cohort)) (features (cdr reading))) ;; disambiguation is needed only if there is more than one syntactic label (when (and reading (syntactic-functions-ambiguous-p (cdr reading) codes)) (block reading (dolist (type types) (do-syntactic-rules (rule word-form sentence position features cg heuristic-level type codes) (apply-rule rule sentence position reading) (setf label-discarded-p t) (when (or (select-rule-p type) (not (syntactic-functions-ambiguous-p (cdr reading) codes))) ;; successfully disambiguated (return-from reading)) (unless (zerop heuristic-level) ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)) (return-from feature-rules))))))) label-discarded-p)) ;; new 4.11.2000 #+old (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-level) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil) (syntactic-function-codes (syntactic-function-codes cg))) (dotimes (i cohort-length) (let* ((reading (nth i cohort)) (features (cdr reading))) ;; disambiguation is needed only if there is more than one syntactic label (when (and reading (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes)) (block reading (do-syntactic-rules (rule word-form sentence position features cg heuristic-level :domains) (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)) (return-from feature-rules)) (do-syntactic-rules (rule word-form sentence position features cg heuristic-level =s!) (apply-rule rule sentence position reading) (setf label-discarded-p t) (return-from reading)) (do-syntactic-rules (rule word-form sentence position features cg heuristic-level =s0) (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)) (return-from feature-rules)))))) label-discarded-p)) ;; new 3.11.2000 #+old (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-level) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil) (syntactic-function-codes (syntactic-function-codes cg))) (dotimes (i cohort-length) (let ((reading (nth i cohort))) ;; disambiguation is needed only if there is more than one syntactic label (when (and reading (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes)) (block reading (do-syntactic-rules (rule code word-form #+rule-tree sentence #+rule-tree position reading cg heuristic-level :domains) (when (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)))) (do-syntactic-rules (rule code word-form #+rule-tree sentence #+rule-tree position reading cg heuristic-level :select) (when (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)))) (do-syntactic-rules (rule code word-form #+rule-tree sentence #+rule-tree position reading cg heuristic-level :discard) ;(setf (sbit (cdr reading) code) 0) (when (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)) (return-from feature-rules))))))) label-discarded-p)) #+old (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-level) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil) (syntactic-function-codes (syntactic-function-codes cg))) (dotimes (i cohort-length) (let ((reading (nth i cohort))) ;; disambiguation is needed only if there is more than one syntactic label (when (and reading (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes)) (block reading (do-syntactic-rules (rule word-form reading cg heuristic-level) (when (apply-rule rule sentence position reading) (setf label-discarded-p t) (unless (syntactic-functions-ambiguous-p (cdr reading) syntactic-function-codes) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)))))))) label-discarded-p)) #+old-old (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-level) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position)) (label-discarded-p nil)) (dotimes (i cohort-length) (let* ((reading (nth i cohort)) (reading-tail (last reading)) (last-feature (car reading-tail)) (syntactic-labels (when (and (consp last-feature) #-ignore(cdr last-feature) #+ignore(> (length last-feature) 1)) last-feature))) ;; disambiguation is needed only if there is more than one syntactic label (when syntactic-labels (block reading (do-syntactic-rules (rule word-form syntactic-labels cg heuristic-level) (when (apply-rule rule sentence position reading-tail) (setf label-discarded-p t) (when (not (cdar reading-tail)) #+ignore(= (length (car reading-tail)) 1) ;; successfully disambiguated (return-from reading)) (when heuristic-level ; not disambiguated: try the ordinary rules (13.) (s-disambiguate-sentence cg sentence position)))))))) label-discarded-p)) #+old (defmethod apply-syntactic-disambiguation-rules ((cg constraint-grammar) sentence position &optional heuristic-rules-p) (let* ((cohort (cohort sentence position)) (cohort-length (length cohort)) (word-form (word-form sentence position))) (dotimes (i cohort-length) (let* ((reading (nth i cohort)) (reading-tail (last reading)) (last-feature (car reading-tail)) (syntactic-labels (when (and (consp last-feature) (> (length last-feature) 1)) last-feature))) ;; disambiguation is needed only if there is more than one syntactic label (when syntactic-labels (block reading (do-syntactic-rules (rule word-form syntactic-labels cg heuristic-rules-p) (cond ((apply-rule rule sentence position reading-tail) ;(print rule) ;; successfully disambiguated (return-from reading)) ;; *** fix: try ordinary rules on *all* words from POSITION onwards! (heuristic-rules-p ; not disambiguated: try the ordinary rules (13.) (block ordinary-rules (do-syntactic-rules (rule word-form syntactic-labels cg nil) (when (apply-rule rule sentence position reading-tail) ;; successfully disambiguated (return-from ordinary-rules))))))))))))) ;; apply rules until nothing changes. (defmethod disambiguate-sentence ((cg constraint-grammar) sentence &optional (start-position 0)) (declare (ignore start-position)) ;;(sort-cohorts sentence) ; do this only once! *** (let ((count 0)) (loop with ; repeat-pos = start-position and repeat-p = nil do (loop for pos from 0 #+ignore repeat-pos to (1- (sentence-length sentence)) do (loop while (apply-disambiguation-rules cg sentence pos nil) do (setf ; repeat-pos pos repeat-p t))) while repeat-p do (setf repeat-p nil) (incf count)) sentence)) (defmethod h-disambiguate-sentence ((cg constraint-grammar) sentence) (dotimes (pos (sentence-length sentence)) (apply-disambiguation-rules cg sentence pos t)) sentence) (defmethod s-disambiguate-sentence ((cg constraint-grammar) sentence &optional (start-position 0)) (when *debug-mem* (print (list :s-disambiguate-sentence sentence))) (loop with repeat-pos = start-position and repeat-p = nil do (loop for pos from repeat-pos to (1- (sentence-length sentence)) when (apply-syntactic-disambiguation-rules cg sentence pos) do (setf repeat-pos pos repeat-p nil)) ;; set REPEAT-P to T for iteration! while repeat-p do (setf repeat-p nil)) sentence) #+old (defmethod s-disambiguate-sentence ((cg constraint-grammar) sentence &optional (start-position 0)) (dotimes (pos (length sentence)) (apply-syntactic-disambiguation-rules cg sentence pos)) sentence) (defmethod h-s-disambiguate-sentence ((cg constraint-grammar) sentence &optional (level 1)) (dotimes (pos (sentence-length sentence)) (apply-syntactic-disambiguation-rules cg sentence pos level)) sentence) (defmethod h-s-disambiguate-sentence-3 ((cg constraint-grammar) sentence) (h-s-disambiguate-sentence cg sentence 3)) #+old (defmacro do-mapping-rules ((rule reading cg) &body body) "Loops over the RULEs applicable for the given READING executing BODY." (let ((rule-list (gensym)) (%rule (gensym))) `(dolist (feature ,reading) (let ((,rule-list (gethash feature (morphosyntactic-mappings ,cg)))) (dolist (,%rule ,rule-list) (let ((,rule (when (feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)))) (when ,rule ,@body))))))) #+copy (defmacro do-disambiguation-rules ((rule reading word-form cg heuristic-rules-p rule-type) &body body) "Loops over the RULEs applicable for the given reading executing BODY." (let ((constraints-table (gensym)) (rules-table (gensym)) (domain (gensym)) (%rule (gensym)) (code (gensym)) (type (gensym)) (stripped-type (gensym))) `(let ((,constraints-table (rules ,cg)) (,type ,rule-type) (,domain nil)) (declare (dynamic-extent ,domain)) (when-let (,stripped-type (type-strip-domain ,type)) (setf ,domain (string-downcase ,word-form)) (let ((,rules-table (gethash ,domain (domain-rules ,cg)))) (setf ,constraints-table ,rules-table ,type ,stripped-type))) (when ,constraints-table (when-let (,rules-table (gethash (type+level-to-type ,type (if ,heuristic-rules-p 1 0)) ,constraints-table)) (loop for ,code in (rule-type-feature-codes ,cg (type+level-to-type ,type (if ,heuristic-rules-p 1 0)) ,domain) when (has-feature-code-p (cdr ,reading) ,code) do (dolist (,%rule (gethash (code-feature ,code) ,rules-table)) (let ((,rule (cond ((atom ,%rule) ,%rule) ((feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)) (t nil)))) (when ,rule ,@body))))))))) #+old--rule-tree-xx (defmacro do-mapping-rules ((rule reading word-form cg) &body body) "Loops over the RULEs applicable for the given READING executing BODY." (let ((%rule (gensym)) (domain (gensym))) `(with-slots (morphosyntactic-mappings) ,cg (let ((,domain (string-downcase ,word-form))) (declare (dynamic-extent ,domain)) (dolist (,%rule (gethash ,domain morphosyntactic-mappings)) (let ((,rule (cdr ,%rule))) (when ,rule ,@body))) (loop for code in (mapping-feature-codes ,cg) when (has-feature-code-p (cdr ,reading) code) do (dolist (,%rule (gethash (code-feature code) morphosyntactic-mappings)) (let ((,rule (when (feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)))) (when ,rule ,@body)))))))) ;; list all features used in mapping rules #+test (let* ((*cg* *nbo-cg*) (*tagger* (multi-tagger *cg*)) (bv (make-array (code-vector-length) :element-type 'bit :initial-element 0))) (maphash (lambda (key rules) (declare (ignore key)) (dolist (target.rule rules) (let ((rule (cdr target.rule))) (rule-used-features rule bv)))) (morphosyntactic-mappings *nbo-cg*)) (print (loop for i from 0 for bit across bv when (= bit 1) collect (code-feature i)))) #+test (defmethod rule-used-features ((rule mapping-rule) bv) (with-slots (target constraints labels) rule (with-slots (encoded-set-declarations) *cg* (dolist (f labels) (let ((code (gethash f (feature-table (multi-tagger *cg*))))) (when code (setf (bit bv code) 1)))) (dolist (f target) (unless (stringp f) (let ((code (gethash f (feature-table (multi-tagger *cg*))))) (when code (setf (bit bv code) 1))))) (dolist (constraint constraints) (let ((set-exp (if (eq (car constraint) 'not) (caddr constraint) (cadr constraint)))) (labels ((set-feature (set-exp) (cond ((integerp set-exp) (setf (bit bv set-exp) 1)) ((consp set-exp) (mapc #'set-feature set-exp)) (t nil)))) (set-feature (gethash set-exp encoded-set-declarations)))))))) #+test (print (rule-used-features *rule*)) #-rule-tree-xx (defmacro do-mapping-rules ((rule reading word-form cg) &body body) "Loops over the RULEs applicable for the given READING executing BODY." (let ((%rule (gensym)) (domain (gensym))) `(with-slots (morphosyntactic-mappings) ,cg (let ((,domain (string-downcase ,word-form))) (declare (dynamic-extent ,domain)) (dolist (,%rule (gethash ,domain morphosyntactic-mappings)) (let ((,rule (when (feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)))) (when ,rule ,@body))) (loop for code in (mapping-feature-codes ,cg) when (has-feature-code-p (cdr ,reading) code) do (dolist (,%rule (gethash (code-feature code) morphosyntactic-mappings)) (let ((,rule (when (feature-subset-p ,cg (car ,%rule) ,reading) (cdr ,%rule)))) (when ,rule ,@body)))))))) (defmethod apply-mapping-rules ((cg constraint-grammar) sentence position) (when position (let* ((cohort (cohort sentence position)) (cohort-length (length cohort))) (dotimes (i cohort-length) (let ((word-form (word-form sentence position)) (reading (nth i cohort))) (when reading (block reading (do-mapping-rules (rule #+rule-tree-xx sentence #+rule-tree-xx position reading (when (stringp word-form) word-form) cg) (when (apply-rule rule sentence position i) (return-from reading)))))))))) (defmethod apply-morphological-heuristics-rules ((cg constraint-grammar) sentence position) ;; applies only to unanalized words (when (null (cohort sentence position)) (dolist (rule (morphological-heuristics cg)) (when (apply-rule rule sentence position nil) (return-from apply-morphological-heuristics-rules))))) (defmethod analize-unknown-words ((cg constraint-grammar) sentence) (dotimes (pos (sentence-length sentence)) (apply-morphological-heuristics-rules cg sentence pos)) sentence) (defmethod map-sentence ((cg constraint-grammar) sentence) (dotimes (pos (sentence-length sentence)) (apply-mapping-rules cg sentence pos)) sentence) #+test (defun disambiguate (sentence &optional (operations nil op-supplied-p) cg) (let* ((cg (or cg *cg*)) (operations (if op-supplied-p operations (parse-operations cg)))) (cond ((null operations) sentence) ((listp sentence) (disambiguate (collecting (dolist (s sentence) (let ((res (funcall (car operations) cg s))) (if (listp res) (collect-append res) (collect res))))) (cdr operations) cg)) (t (disambiguate (funcall (car operations) cg sentence) (cdr operations) cg))))) (defmethod disambiguate ((sentence regexp-sentence) &optional (operations nil op-supplied-p) cg) #+debug(print (cons sentence (length operations))) (let* ((cg (or cg *cg*)) (operations (if op-supplied-p operations (parse-operations cg)))) (when (null (pending-operations sentence)) (setf (pending-operations sentence) operations)) (cond ((null operations) sentence) ((eq operations (pending-operations sentence)) (setf (pending-operations sentence) (cdr (pending-operations sentence))) #+debug (print (list :op operations)) (when-let (sentence (funcall (car operations) cg sentence)) (disambiguate sentence (cdr operations) cg))) (t ;; delayed sentence (disambiguate sentence (pending-operations sentence) ;;(cdr operations) cg))))) (defmethod disambiguate ((sentence sentence) &optional (operations nil op-supplied-p) cg) #+debug(print (list sentence operations)) (let* ((cg (or cg *cg*)) (operations (if op-supplied-p operations (parse-operations cg)))) (if operations (disambiguate (funcall (car operations) cg sentence) (cdr operations) cg) sentence))) (defmethod initialize-sentence-array ((sentence sentence) &key (sort-cohorts-p t)) (with-slots (sentence-array) sentence (setf (fill-pointer sentence-array) 1) (labels ((push-token (token) (when token (when (and (stringp (effective-token-value token)) (not (or (insignificant-token-p token) (insignificant-p token)))) (vector-push-extend token sentence-array)) (push-token (token-next token))))) (push-token (first-token sentence)))) (when sort-cohorts-p (sort-cohorts sentence)) sentence) (defun disambiguate-from-string (string &key (stream *standard-output*) cg compare-cg (print-function #'print-sentence) (tagging-niveau :morphological-disambiguation) context-size token-print-fn total-disambiguate-p feature-filter #+ignore(platform $encoding)) #+pcl(setf string (delete (code-char 129) string)) (with-input-from-string (in-stream string) (disambiguate-stream *tokenizer* in-stream ;; stream :cg cg :compare-cg compare-cg :print-function ;; print-function (lambda (s &rest rest) (apply print-function s :stream stream rest)) :tagging-niveau tagging-niveau :total-disambiguate-p total-disambiguate-p :feature-filter feature-filter :context-size context-size :token-print-fn token-print-fn ;;:platform platform ))) (defmethod apply-feature-filter ((sentence sentence) (f t)) nil) (defmethod apply-feature-filter ((token token) (f t)) nil) (defmethod apply-feature-filter ((sentence sentence) (feature-filter cons)) (let ((feature-filter (code-from-features feature-filter))) (map-tokens sentence (lambda (token) (when (stringp (token-value token)) (loop for reading in (token-features token) when (cdr reading) do (feature-intersection (cdr reading) feature-filter)))) :all-p t)) sentence) (defmethod apply-feature-filter ((token token) (feature-filter cons)) (let ((feature-filter (code-from-features feature-filter))) (when (stringp (token-value token)) (loop for reading in (token-features token) when (cdr reading) do (feature-intersection (cdr reading) feature-filter))))) (defmethod apply-feature-filter ((sentence sentence) (feature-filter feature-filter)) (map-tokens sentence (lambda (token) (setf (token-features token) (filter-features feature-filter (token-features token)))) :all-p t) (with-slots (feature-vector) sentence (setf feature-vector (map-filter-features feature-filter)) #+debug(describe sentence))) (defmethod apply-feature-filter ((token token) (feature-filter feature-filter)) (setf (token-features token) (filter-features feature-filter (token-features token)))) #+old (defmethod clone-sentence ((sentence document-sentence) &key focus-token) (let ((cloned-focus-token nil) (cloned-sentence (get-sentence :sentence-class (class-of sentence) :tokenizer (tokenizer sentence) :document (sentence-document sentence) :cg (constraint-grammar sentence)))) (map-tokens sentence (lambda (token) (let ((cloned-token (add-token cloned-sentence (token-value token) :attributes (token-attributes token)))) (when (eq token focus-token) (setf cloned-focus-token cloned-token))))) (values cloned-sentence cloned-focus-token))) (defmethod clone-sentence ((sentence document-sentence) &key focus-token) (let ((cloned-focus-token nil) (cloned-sentence (get-sentence :sentence-class (class-of sentence) :tokenizer (tokenizer sentence) :document (sentence-document sentence) :cg (constraint-grammar sentence)))) (labels ((walk (token concat-token) (cond ((null token) nil) ((cgp::token-expansion token) (do ((ex-token (car (cgp::token-expansion token)) (token-next ex-token))) ((eq ex-token (cdr (cgp::token-expansion token))) (walk ex-token token)) (walk ex-token token)) (walk (token-next token) nil)) (t (let ((cloned-token (add-token cloned-sentence (token-value token) :attributes (token-attributes token)))) (when (eq token focus-token) (setf cloned-focus-token cloned-token))) (unless (or (eq token (last-token sentence)) concat-token) (walk (token-next token) nil)))))) (walk (first-token sentence) nil) (values cloned-sentence cloned-focus-token)))) (defmethod disambiguate-stream ((tokenizer tokenizer) in-stream ;; a character stream &key (print-function #'print-sentence) mapping-function ;; defaults to map-sentences() (tagging-niveau :morphological-disambiguation) total-disambiguate-p feature-filter message-fn (cg *cg*) compare-cg context-size sentence-class token-print-fn (collapse-readings-p t) (apply-feature-filter-p t) (encoding $encoding) &allow-other-keys) #+debug(print (list :niveau tagging-niveau)) (when (and compare-cg (not (eq (multi-tagger cg) (multi-tagger compare-cg)))) (error "CG versions to compare must have identical multi-taggers. \"~a\" and \"~a\" don't." (name cg) (name compare-cg))) (let* ((*cg* cg) (*tagger* (multi-tagger cg)) (*analyser-lexicon* (compound-analyser *tagger*)) (*sentence-start-p-fun* (sentence-start-p-fun tokenizer)) (*sentence-class* (or sentence-class (if (find tagging-niveau '(:named-entity-disambiguation :named-entity-recognition-only :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation :syntactic-named-entity-disambiguation-logon :syntactic-disambiguation-regexp :term-extraction :term-ne-extraction :np-recognition :sd-np-recognition)) 'regexp-sentence *sentence-class*))) (*token-class* (if (find tagging-niveau '(:named-entity-disambiguation :named-entity-recognition-only :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation :syntactic-named-entity-disambiguation-logon :syntactic-disambiguation-regexp :term-extraction :term-ne-extraction :np-recognition :sd-np-recognition)) 'regexp-token *token-class*)) (*token-memory* (when (or context-size (eq *token-class* 'regexp-token)) (make-instance 'memory :context-size (or context-size *context-size*))))) ;; debug (setf *memory* *token-memory*) #+debug(print (list :context-size context-size :apply-feature-filter-p apply-feature-filter-p :feature-filter feature-filter)) (funcall (or mapping-function #'map-sentences) ;; mapping-function does tokenization and multi-tagging in-stream (lambda (s) ;; function that runs the CG parser on a sentence object (s) (when (and message-fn *sentence-count* (zerop (mod *sentence-count* 1))) (funcall message-fn (format nil "~4d sentences disambiguated." *sentence-count*))) (labels ((sort-disambiguate-ff (cg s) (declare (ignore cg)) (sort-readings-by-ngram-frequency s :total-disambiguate-p total-disambiguate-p :feature-filter (when apply-feature-filter-p feature-filter) :collapse-readings-p collapse-readings-p #+ignore (when feature-filter (code-from-features feature-filter)))) (sort-disambiguate (cg s) (declare (ignore cg)) (sort-readings-by-ngram-frequency s :total-disambiguate-p total-disambiguate-p :collapse-readings-p collapse-readings-p :feature-filter nil)) (filter-features (cg s) (declare (ignore cg)) (when apply-feature-filter-p (apply-feature-filter s feature-filter)))) (ecase tagging-niveau ((:none :multi-tagging) s) (:morphological-disambiguation (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate-ff))) (:syntactic-mapping (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'map-sentence #'sort-disambiguate-ff))) (:syntactic-disambiguation (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence #'sort-disambiguate-ff))) (:named-entity-mapping (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'named-entity-map-sentence #'sort-disambiguate-ff))) (:named-entity-disambiguation (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence ;;#'regexp-merge-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'named-entity-map-sentence #'named-entity-disambiguate-sentence (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 2)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 3)) #'named-entity-disambiguate-sentence #'sort-disambiguate-ff))) (:named-entity-recognition-only (disambiguate (initialize-sentence-array s) (list ;;#'regexp-merge-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'named-entity-map-sentence #'named-entity-disambiguate-sentence (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 2)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 3)) #'named-entity-disambiguate-sentence #'filter-features))) (:term-extraction (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :regexp-parser *term-extractor* :use-lc-features-p nil :first-uppercase-p nil :label :te :no-overlap-p nil)) #'filter-features))) (:np-recognition (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :regexp-parser *np-recognizer* :use-lc-features-p nil :first-uppercase-p nil :label :te)) #'filter-features))) (:sd-np-recognition (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :regexp-parser *np-recognizer* :use-lc-features-p nil :first-uppercase-p nil :label :te)) #'filter-features))) (:term-ne-extraction (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :regexp-parser *name-term-extractor* :concatenate-p nil :label :te :no-overlap-p nil)) (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :regexp-parser *term-extractor* :use-lc-features-p nil :concatenate-p nil :first-uppercase-p nil :label :te :no-overlap-p nil)) #'filter-features))) (:syntactic-named-entity-disambiguation (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'regexp-merge-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'named-entity-map-sentence #'named-entity-disambiguate-sentence (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 2)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 3)) #'named-entity-disambiguate-sentence #'filter-features))) (:syntactic-named-entity-disambiguation-logon (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :use-head-features-p t)) #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence #'sort-disambiguate (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil :use-head-features-p t)) #'named-entity-map-sentence #'named-entity-disambiguate-sentence (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 2)) (lambda (cg sentence) (h-named-entity-disambiguate-sentence cg sentence 3)) #'named-entity-disambiguate-sentence #'filter-features))) (:syntactic-named-entity-mapping (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate #'regexp-merge-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence #'map-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'named-entity-map-sentence #'filter-features))) (:syntactic-disambiguation-named-entity-mapping (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate #'regexp-merge-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'named-entity-map-sentence #'filter-features))) (:syntactic-disambiguation-regexp (disambiguate (initialize-sentence-array s) (list #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'sort-disambiguate #'regexp-merge-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #'s-disambiguate-sentence (lambda (cg sentence) (regexp-merge-sentence cg sentence :enqueue-sentence-p nil)) #'filter-features)))) s)) :multitag-p (not (eq tagging-niveau :none)) :sentence-class (if compare-cg 'compare-sentence *sentence-class*) :print-function print-function #+test (lambda (s) (funcall print-function s :stream out-stream :platform platform)) :compare-cg compare-cg :encoding encoding :token-print-fn token-print-fn))) (defmethod disambiguate-stream ((tokenizer-class symbol) in-stream &rest rest &key tokenizer-initargs &allow-other-keys) (let* ((tokenizer (apply #'make-instance tokenizer-class tokenizer-initargs)) (*tokenizer* tokenizer)) (apply #'disambiguate-stream tokenizer in-stream rest))) (defun disambiguate-file (in-file out-file &key (print-function #'print-sentence) (tagging-niveau :morphological-disambiguation) ;;(platform $encoding) ;;(print-rules-p t) (cg *cg*)) (with-open-file (in-stream in-file :direction :input) (with-open-file (out-stream out-file :direction :output :if-exists :supersede :if-does-not-exist :create) (disambiguate-stream *tokenizer* in-stream ;; out-stream :print-function (lambda (s &rest rest) (apply print-function s :stream out-stream rest)) :tagging-niveau tagging-niveau ;; :platform platform :cg cg)))) ;;; lispify input from multitagger #+ignore (defmacro with-stream-lines ((line stream) &body body) `(loop for ,line = (read-line ,stream nil nil) while ,line do ,@body)) #+ignore (defmacro with-file-lines ((line path) &body body) (let ((stream (gensym))) `(with-open-file (,stream ,path) (with-stream-lines (,line ,stream) ,@body)))) ;; lispification... (defun %fix-quote (str) (let ((quote-pos (search "$\"" str))) (if quote-pos (concatenate 'string (subseq str 0 (1+ quote-pos)) "\\" (%fix-quote (subseq str (1+ quote-pos)))) str))) ;; for debugging (defparameter *report-uncorrect-readings-p* t) (defparameter *uncorrect-readings* (make-hash-table)) (defun pprint-sentence (sentence &optional (out-stream t)) (loop for cohort across sentence when (stringp (caar cohort)) do (prin1 (caar cohort) out-stream) (dolist (rule-id (reverse (cdar cohort))) (write-char #\Space out-stream) (write-string rule-id out-stream)) (terpri out-stream) (dolist (reading (cdr cohort)) (when (and reading (not (eq (car reading) '<<<))) (write-char #\Tab out-stream) (dolist (feature reading) (cond ((stringp feature) (format out-stream "~s " feature)) ((integerp feature) (format out-stream "~d " feature)) (t (if (listp feature) (format out-stream "(~{~s~^ ~})" feature) (write-string (string-downcase (string feature)) out-stream)) (write-char #\Space out-stream)))) (write-char #\Newline out-stream))))) ;; debugging (defun find-uncorrect-readings-stream (stream &optional (out-stream *standard-output*)) (declare (ignore out-stream)) (clrhash *uncorrect-readings*) (let ((sentence-count 0) (total-word-count 0)) (declare (optimize speed)) (declare (fixnum total-word-count sentence-count)) (with-stream-sentences (sentence stream) (print sentence) (incf sentence-count) #-ignore (when (= sentence-count 6394) ;; ****** (return-from find-uncorrect-readings-stream total-word-count)) (let ((d-sentence (disambiguate sentence nil)) (word-count 0)) (loop for token across (sentence-array d-sentence) when (stringp (token-value token)) do (incf word-count) (dolist (reading (token-features token)) (when (and reading (not (eq (car reading) '<<<))) (when (and *report-uncorrect-readings-p* (not (find ' reading))) (pushnew (cons word-count (car reading)) (gethash sentence-count *uncorrect-readings*) :test #'equal) #+ignore (format out-stream "~4d: ~a~%" sentence-count reading))))) (incf total-word-count word-count))) total-word-count)) #+test (let ((*cg* *nbo-cg*) (*tagger* *nbo-tagger*)) (with-open-file (stream "projects:cgp;training;delkorpbm-fork.cor" :direction :input) (find-uncorrect-readings-stream stream))) ;; count words with wrong readings #+test (let ((count 0)) (maphash (lambda (key val) (declare (ignore key)) (incf count (length val))) *uncorrect-readings*) count) ;; debugging (defun nth-sentence-stream (n stream) (let ((sentence-count 0)) (declare (optimize speed)) (declare (fixnum sentence-count)) (with-stream-sentences (sentence stream) (incf sentence-count) (when (= n sentence-count) (return-from nth-sentence-stream sentence))))) (defun disambiguate-nth-sentence (n &key pprint (file "projects:cgp;training;delkorp.cor")) (with-open-file (stream file :direction :input) (let ((sentence (nth-sentence-stream n stream))) (if pprint (pprint sentence) (pprint-sentence sentence)) (when pprint (terpri) (terpri)) (format t "----------------------------------------~%") (if pprint (pprint (disambiguate sentence)) (pprint-sentence (disambiguate sentence)))))) #+test (with-open-file (stream "projects:cgp;training;delkorp.dis" :direction :input) (find-uncorrect-readings-stream stream)) #+test (with-open-file (stream "projects:cgp;training;delkorp.dis" :direction :input) (nth-sentence-stream 103 stream)) #+test (time (disambiguate-file "projects:cgp;training;delkorpbm.cor" "projects:cgp;training;delkorpbm-lisp.dis")) #+test (time ;; 40,110 ms ; 30,442 ms ; 33,750 ms (131,402 124,771 with syntax rules, PM7600) (disambiguate-file "projects:cgp;training;benchmark-test.cor" "projects:cgp;training;benchmark-test.dis")) #+test (with-open-file (stream "projects:cgp;training;delkorp.cor" :direction :input) (disambiguate-stream stream)) #+test (with-open-file (stream "projects:cgp;training;test1.cor" :direction :input) (disambiguate-stream stream)) #+obsolete (defun convert-rule-file (in-file out-file) (with-open-file (stream out-file :direction :output :if-exists :supersede :if-does-not-exist :create) (with-file-lines (line in-file) (write-line (convert-rule-line line) stream)))) #+obsolete (defun convert-rule-line (line) (labels ((convert (line) (let* ((start (position-if (lambda (c) (find c " (")) line)) (start (when start (position-if-not (lambda (c) (find c " (")) line :start start))) (end (when start (position-if (lambda (c) (find c " )")) line :start start))) (not-in-quote-p (and start (not (char= (char line start) #\"))))) (when (and start (not (find-if #'lower-case-p line :start start :end end)) (find-if #'upper-case-p line :start start :end end)) (nsubstitute #\% #\" line :start start :end end)) (cond ((null start) line) ((null end) (if not-in-quote-p (concat (subseq line 0 start) (%replace-upper-ascii (subseq line start))) line)) (t (if not-in-quote-p (concat (subseq line 0 start) (subseq line start end) (convert (subseq line end))) (concat (subseq line 0 end) (convert (subseq line end))))))))) (convert line))) ;; expects language as keyword (defun newest-cg (language) (let ((newest-cg nil)) (maphash (lambda (name cg) (declare (ignore name)) (when (and (eq (language cg) language) (or (null newest-cg) (< (change-date newest-cg) (change-date cg)))) (setf newest-cg cg))) *cg-table*) newest-cg)) #+test (convert-rule-file "projects:cgp;rules;norsk.rle" "projects:cgp;rules;norsk.lisp") #+test (convert-rule-file "projects:cgp;rules;nynorsk.rle" "projects:cgp;rules;nynorsk.lisp") #+test (convert-rule-file "projects:cgp;rules;norsk-map_v3.rle" "projects:cgp;rules;norsk-map.lisp") #+test (convert-rule-file "projects:cgp;rules;norsk-syn_v3.rle" "projects:cgp;rules;norsk-syn.lisp") #+test (convert-rule-file "projects:cgp;rules;norsk-map.lisp" "projects:cgp;rules;norsk-map1.lisp") #+test (convert-rule-file "projects:cgp;rules;norsk-syn.lisp" "projects:cgp;rules;norsk-syn1.lisp") #+test (convert-rule-file "projects:cgp;rules;nny.lisp" "projects:cgp;rules;nny1.lisp") #+test (convert-rule-file "/home/paul/lisp/projects/cgp/rules/norsk-map_nn.rle" "projects:cgp;rules;nny-map.lisp") #+test (convert-rule-file "/home/paul/lisp/projects/cgp/rules/norsk-syn_nn.rle" "projects:cgp;rules;nny-syn.lisp") (defmethod count-rules ((cg constraint-grammar)) (let ((morph-count 0) (map-count 0) (synt-count 0) (ne-count 0)) (loop for rule across (rule-array cg) do (ecase (rule-type rule) ((=! =!h =!! =!!h =0 =0h) (incf morph-count)) ((=m) (incf map-count)) ((=s! =s!h1 =s!h2 =s!h3 =s0 =s0h1 =s0h2 =s0h3) (incf synt-count)) ((=nm =n! =n!h =n!h1 =n!h2 =n!h3 =n0 =n0h =n0h1 =n0h2 =n0h3) (incf ne-count)))) (print (list :morph-count morph-count :map-count map-count :synt-count synt-count :ne-count ne-count)))) #+test (count-rules (gethash "nbo-feb02" *cg-table*)) ;;(:MORPH-COUNT 1988 :MAP-COUNT 365 :SYNT-COUNT 813 :NE-COUNT 3) #+test (count-rules (gethash "nny-sept" *cg-table*)) ;;(:MORPH-COUNT 3693 :MAP-COUNT 0 :SYNT-COUNT 0 :NE-COUNT 0) #+test (with-open-file (stream "/home/paul/lisp/projects/cgp/texts/test-andra.txt") (disambiguate-stream 'tokenizer stream :cg *nbo-cg*)) ;;; EOF