;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;; ;; Copyright (C) Paul Meurer 2001-2004. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen ;; (in-package :cgp) #+moved (defparameter *context-size* 1000) #+test (defparameter *context-size* 1) ;; debug #+moved (defparameter *memory* nil) (defclass token-memory () ((context-size :initform 1000 :initarg :context-size :accessor context-size) (token-queue :initform (make-queue) :accessor token-queue) (fill-count :initform 0 :accessor fill-count) (token-table :initform (make-hash-table :test #'equal) :accessor token-table))) (defclass memory () ((multi-tagger-memory :initform (make-instance 'token-memory) :reader multi-tagger-memory) (regexp-memory :initform (make-instance 'token-memory) :reader regexp-memory) (multi-tagger-delay-queue :initform (make-queue) :accessor multi-tagger-delay-queue) (regexp-delay-queue :initform (make-queue) :accessor regexp-delay-queue) (regexp-delay-count :initform 0 :accessor regexp-delay-count))) (defmethod initialize-instance :after ((memory memory) &key context-size &allow-other-keys) (setf (context-size (multi-tagger-memory memory)) context-size (context-size (regexp-memory memory)) context-size)) (defmethod memoize-token ((memory token-memory) token &key (key :lemma)) (with-slots (context-size token-queue fill-count token-table) memory (u::enqueue token token-queue) (cond ((< (1+ fill-count) context-size) (incf fill-count)) (t (let ((removed-token (pop-queue token-queue))) (when (member key '(:lemma :lemma+value)) (dolist (features (token-features removed-token)) (let* ((word (string-downcase (car features))) (token-list (gethash word token-table))) (if (cdr token-list) (setf (gethash word token-table) (delete removed-token token-list)) (remhash word token-table))))) (when (member key '(:value :lemma+value)) (let* ((word (string-downcase (token-value removed-token))) (token-list (gethash word token-table))) (if (cdr token-list) (setf (gethash word token-table) (delete removed-token token-list)) (remhash word token-table))))))) (when (member key '(:lemma :lemma+value)) (dolist (features (token-features token)) (pushnew token (gethash (string-downcase (car features)) token-table)))) (when (member key '(:value :lemma+value)) (pushnew token (gethash (string-downcase (token-value token)) token-table)))) token) (defmethod mark-match ((sentence regexp-sentence) first last) (map-tokens sentence (lambda (token) (setf (match token) (cond ((eq token first) (if (eq token last) :whole-match :match-start)) ((eq token last) :match-end) (t :match)))) :start first :walk-function (lambda (token) (unless (eq token last) (token-next token))))) (defmethod regexp-merge-sentence ((cg ne-constraint-grammar) (sentence regexp-sentence) &key (enqueue-sentence-p t) (regexp-parser *regexp-parser*) (use-lc-features-p t) use-head-features-p (first-uppercase-p t) (concatenate-p t) (label :rx) (no-overlap-p t)) (when *debug-mem* (print (list :regexp-merge-sentence sentence :enqueue enqueue-sentence-p))) (with-slots (regexp-memory regexp-delay-queue) *token-memory* (with-slots (fill-count context-size token-table) regexp-memory (when (and *debug-mem* (matches sentence)) (print (list :regexp-merge-sentence-before sentence (matches sentence) :use-lc-features-p use-lc-features-p :first-uppercase-p first-uppercase-p))) ;; put tokens into memory (map-tokens sentence (lambda (token) (when token (memoize-token regexp-memory token :key :lemma+value)))) #+debug(Print (list :first-token (first-token sentence))) ;; try to find regexp matches; adds matches to (MATCHES SENTENCE) (match-boolean-list-regexp sentence (first-token sentence) ;; prelim regexp-parser ;;(or (regexp-parser cg) *regexp-parser*) :use-lc-features-p use-lc-features-p :first-uppercase-p first-uppercase-p :no-overlap-p no-overlap-p) (when (and *debug-mem* (matches sentence)) (print (list :regexp-merge-sentence sentence (matches sentence)))) (if concatenate-p (labels ((compare (token match-token last-token) (cond ((null token) nil) ((null match-token) nil) ((not (stringp (token-value token))) (compare (token-next token) match-token last-token)) ((not (stringp (token-value match-token))) (compare token (token-next match-token) last-token)) ((string= (token-value token) (token-value match-token)) (if (eq match-token last-token) token (compare (token-next token) (token-next match-token) last-token)))))) (dolist (match (matches sentence)) (when *debug-mem* (print (list :match-found match))) (destructuring-bind (first-token last-token head used-p head-features) match (unless used-p (setf (cadddr match) t) ;; find equal expressions in memory (backward context search) (let ((initials (gethash (string-downcase (token-value first-token)) token-table))) (dolist (initial initials) (unless (match initial) ;; already matched (let ((last (compare initial first-token last-token))) (when (and last (not (eq initial last))) (when *debug-mem* (print (list :initial initial :sentence (token-chain initial)))) (mark-match sentence initial last) (concatenate-match-tokens (token-chain initial) initial last :head head :head-features head-features :label label :use-head-features-p use-head-features-p)))))) ;; concatenate the tokens (if (eq first-token last-token) (pushnew label (token-used-rules first-token)) (concatenate-match-tokens sentence first-token last-token :head head :head-features head-features :label label :use-head-features-p use-head-features-p))))) (map-tokens sentence (lambda (token) (when *debug-mem* (print (list :mapping token :match (match token)))) ;; find equal expressions in memory (forward context search) (unless (match token) (let ((initials (gethash (string-downcase (token-value token)) token-table))) (dolist (initial initials) #+debug(print (list :initial initial :match (match initial) )) (when (eq (match initial) :match-start) ;; already matched (let* ((match (find initial (matches (token-chain initial)) :key #'car)) (end (cadr match)) (last (compare token initial end))) (when (and last (not (eq token last))) (when *debug-mem* (print (list :conc-map token last))) (mark-match sentence token last) (concatenate-match-tokens (token-chain token) token last :head (caddr match) :head-features (caddr (cddr match)) :label label :use-head-features-p use-head-features-p)))))) ;; find token as head in memory (forward context search) (dolist (feature (token-features token)) (dolist (head-candidate (gethash (string-downcase (car feature)) token-table)) (when (and (match head-candidate) ;; already matched (find head-candidate ;; is head (matches (token-chain head-candidate)) :key #'cddr)) (when *debug-mem* (print (list :prop-head-memory token))) (dolist (feature (token-features token)) (when feature (set-feature (cdr feature) 'prop) (reset-feature (cdr feature) 'appell))))))))) (cond (enqueue-sentence-p (u::enqueue sentence regexp-delay-queue) (incf fill-count (sentence-token-length sentence)) #+debug(print (list :fill-count fill-count :context-size context-size)) (when-let (sentence (and (> fill-count context-size) (pop-queue regexp-delay-queue))) (decf fill-count (sentence-token-length sentence)) ;; +-1?? ;;(print (pending-operations sentence)) sentence)) (t sentence))) sentence)))) (defparameter *sen* nil) #+debug (setf *debug-mem* t) ;; :features not used (yet?) (defmethod concatenate-match-tokens ((sentence regexp-sentence) first-token last-token &key features head head-features label use-head-features-p) (let ((cg (constraint-grammar sentence)) (token (concatenate-tokens first-token last-token :new-p t :label label))) (when *debug-mem* (print (list :concatenating first-token last-token))) (setf (token-features token) (cond (features features) ((and head use-head-features-p) (if head-features (let ((features (or (lc-features head) (token-features head)))) (mapcar (lambda (l.bv) (when l.bv (cons (car l.bv) (let ((bv (copy-seq (cdr l.bv)))) (apply #'add-features bv head-features) bv)))) features)) (or (lc-features head) (token-features head)))) (t (progn (when *debug-mem* (print (list :prop-concat-match token))) nil) (list (cons (token-value token) (apply 'encode-features (collecting (cond (head (collect-append '(prop rx)) (dolist (f '(subst mask fem nøyt gen)) (unless (find-if-not (lambda (fv) (or (null fv) (has-feature-p fv f))) (token-features head) :key #'cdr) (collect f))) ;; add head's syntactic features (maphash (lambda (f val) (declare (ignore val)) (unless (find-if-not (lambda (fv) (or (null fv) (has-feature-p fv f))) (token-features head) :key #'cdr) (collect f))) (syntactic-functions cg)) ;; add head's ne features (mapc (lambda (f) (when (find-if (lambda (fv) (or (null fv) (has-feature-p fv f))) (token-features head) :key #'cdr) (collect f))) '( ))) (t (when *debug-mem* (print (list :concatenate-match-tokens token))) (collect-append '(subst prop rx)))) (block map-tokens (map-tokens sentence (lambda (token) (when (stringp (token-value token)) (maphash (lambda (f val) (declare (ignore val)) (unless (find-if-not (lambda (fv) (or (null fv) (has-feature-p fv f))) (token-features token) :key #'cdr) (collect f))) (suffix-table (multi-tagger cg)))) (when (eq token last-token) (return-from map-tokens))) :start first-token))))))))) (initialize-sentence-array sentence :sort-cohorts-p nil) token)) ;; *memory* ;; *sentence* :eof