;;; -*- Mode: LISP; Package: CGP; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;; ;; Copyright (C) Paul Meurer 1999, 2000. All rights reserved. ;; paul.meurer@hit.uib.no ;; HIT-centre, University of Bergen ;; ;; Version 0.95 ;; ;; Constraint Grammar Parser editing functions ;; ;;------------------------------------------------------------------------------------- ;; TO DO: handle comments properly when reloading a stored grammar ;; ;;------------------------------------------------------------------------------------- (in-package "CGP") (defun %write-constraint-to-string (constraint &key (alternate-case-p t)) (with-output-to-string (stream) (write-char #\( stream) (loop with upcasep = alternate-case-p for (sym . rest) on constraint do (cond ((eq sym 'not) (write-string "NOT" stream)) (upcasep (format stream "~:@(~s~)" sym) (setf upcasep nil)) (t (format stream "~(~s~)" sym) (setf upcasep alternate-case-p))) when rest do (write-char #\Space stream)) (write-char #\) stream))) (defun %feature-to-string-list (f-list language &key (to-platform :sgml) additional-conversions) (declare (ignore language)) (mapcar (lambda (ct) (delete #\| (%write-constraint-to-string ct)) #+ignore (convert-string (delete #\| (%write-constraint-to-string ct)) :mac to-platform additional-conversions)) f-list)) ;; make this nicer! (defun %labels-to-string-list (labels language &key (to-platform :sgml) additional-conversions) (declare (ignore language)) (delete #\| (%write-constraint-to-string labels :alternate-case-p nil)) #+old (convert-string (delete #\| (%write-constraint-to-string labels :alternate-case-p nil)) :mac to-platform additional-conversions)) (defmethod write-rule-body ((rule rule) stream &key language html-p (to-platform :sgml) (end-parenthesis t)) (with-slots (type domain target constraints heuristic-level) rule (let ((*print-pretty* nil)) (if html-p (progn #m #S(format nil "(~s ~s ~a" (or domain '@w) type (delete #\\ (write-to-string (if (consp target) (car target) (list target))))) (dolist (string (%feature-to-string-list constraints language :to-platform nil)) #m (br/) #m #s (format nil " ~a" string))) (format stream "(~s ~s ~a~{~% ~a~}" (or domain '@w) #+old(if domain (convert-string domain $encoding to-platform) '@w) type (delete #\\ (write-to-string (if (consp target) (car target) (list target))) #+old (convert-string (write-to-string (if (consp target) (car target) (list target))) :mac to-platform)) (%feature-to-string-list constraints language :to-platform nil #+old to-platform)))) (when end-parenthesis (write-char #\) stream)))) (defmethod write-rule-body ((rule mapping-rule) stream &key language html-p (to-platform :sgml) (end-parenthesis t)) (with-slots (target constraints labels) rule (let ((*print-pretty* nil)) (if html-p (progn #m #s(format nil "(~a" (delete #\\ (write-to-string target))) #m(br/) (write-string " (" stream) (loop for (string rest) on (%feature-to-string-list constraints language :to-platform nil) do #m #s string when rest do #m(br/) (write-string " " stream)) #m(br/) #m #s(format nil " ~a" (%labels-to-string-list labels language))) (format stream "(~a~% (~{~a~^~% ~})~% ~a" (delete #\\ #+old(convert-string (write-to-string target) :mac to-platform) (write-to-string target)) (%feature-to-string-list constraints language :to-platform to-platform) (%labels-to-string-list labels language :to-platform to-platform)))) (when end-parenthesis (write-char #\) stream)))) #+old (defmethod write-rule-body ((rule mapping-rule) stream &key language html-p (to-platform :sgml) (end-parenthesis t)) (with-slots (target constraints labels) rule (let ((*print-pretty* nil)) (format stream (if html-p "(~a
(~{~a~^
~})
~a" "(~a~% (~{~a~^~% ~})~% ~a") (delete #\\ (convert-string (write-to-string target) :mac to-platform (when html-p '(#\< "<" #\> ">" #\& "&")))) (%feature-to-string-list constraints language :to-platform to-platform :additional-conversions (when html-p '(#\< "<" #\> ">" #\& "&"))) (%labels-to-string-list labels language :to-platform to-platform :additional-conversions (when html-p '(#\< "<" #\> ">" #\& "&"))))) (when end-parenthesis (write-char #\) stream)))) (defmethod rule-table ((cg constraint-grammar) constraint-type domain) (cond ((find constraint-type '(=m :syntactic-map)) (morphosyntactic-mappings cg)) (domain (gethash domain (domain-rules cg))) (t (rules cg)))) (defmethod rule-table ((cg ne-constraint-grammar) constraint-type domain) (declare (ignore domain)) (case constraint-type ((=nm :named-entity--map) (named-entity-mappings cg)) (otherwise (call-next-method)))) (defmethod get-rules ((cg constraint-grammar) &key id type heuristic-niveau domain features rule-table constraint-node keep-groups-together-p) (when (and type (not (eq heuristic-niveau :alle))) (setf type (or (type+level-to-type type (or heuristic-niveau 0)) t))) ;(print (list type heuristic-niveau)) (unless (eq type t) (let ((rules (cond (id (list (aref (rule-array cg) id))) ((eq heuristic-niveau :alle) (reduce #'append (mapcar (lambda (heuristic-niveau) (get-rules cg :type type :heuristic-niveau heuristic-niveau :domain domain :features features)) '(0 1 2 3)))) ((and (null type) (null rule-table) (null constraint-node)) (get-rules cg :type '(=0 =! =s0 =s!) :domain domain :features features)) ((consp type) (reduce #'append (mapcar (lambda (type) (get-rules cg :type type :domain domain :features features)) type))) (domain (if (eq domain :alle) (collecting (maphash (lambda (dom rule-table) (declare (ignore dom)) (collect-append (get-rules cg :features features :type type :rule-table rule-table))) (domain-rules cg))) (when-let (rule-table (gethash domain (domain-rules cg))) (get-rules cg :features features :type type :rule-table rule-table)))) (type (case type ((=0 =! =!! =0h =!h =!!h) (when-let (rule-table (gethash type (or rule-table (rules cg)))) (get-rules cg :features features :rule-table rule-table))) (=m (when-let (rule-table (morphosyntactic-mappings cg)) (get-rules cg :features features :rule-table rule-table))) (=nm (when-let (rule-table (named-entity-mappings cg)) (get-rules cg :features features :rule-table rule-table))) ((=s0 =s! =s0h1 =s!h1 =s0h2 =s!h2 =s0h3 =s!h3 =n0 =n! =n0h =n!h =n0h1 =n!h1 =n0h2 =n!h2 =n0h3 =n!h3) (if features (when-let (rule-table (gethash type (or rule-table (rules cg)))) (when-let (constraint-node (gethash (car features) rule-table)) (get-rules cg :features features :constraint-node constraint-node))) (collecting (when-let (rule-table (gethash type (or rule-table (rules cg)))) (maphash (lambda (feature constraint-node) (collect-append (get-rules cg :features (list feature) :constraint-node constraint-node))) rule-table))))))) ((and (null features) rule-table) (collecting (maphash (lambda (feature rules) (declare (ignore feature)) (dolist (features+rule rules) (collect (cdr features+rule)))) rule-table))) (constraint-node (collecting (labels ((walk (node) (dolist (rule (constraint-rules node)) (collect rule)) (mapc #'walk (child-constraints node)))) (walk constraint-node)))) (t (u:collecting (let () (dolist (features+rule (gethash (car features) rule-table)) (when (subsetp (cdr features) (car features+rule)) (u:collect (cdr features+rule)))))))))) (if keep-groups-together-p (let ((used-groups ())) (collecting (dolist (rule rules) (when (and rule (not (equal rule 0))) ; *** fix this! initialize copied rule array with nil (let ((group-comment (aref (rule-group-comments cg) (rule-id rule)))) ;(print group-comment) (cond ((null group-comment) (collect rule)) ((null (car group-comment)) ;; bug somewhere! (collect rule)) ((find group-comment used-groups) nil) (t (push group-comment used-groups) (dolist (rule-id (cdr group-comment)) (collect (aref (rule-array cg) rule-id)))))))))) rules)))) (defun %copy-seq (obj) (if (listp obj) (copy-seq obj) obj)) (defun copy-hash-table (table &key (value-copy-function #'identity)) (let ((copied-table (make-hash-table :test (hash-table-test table) :size (hash-table-size table)))) (maphash (lambda (key value) (setf (gethash key copied-table) (funcall value-copy-function value))) table) copied-table)) (defmethod copy-constraint-node ((node constraint-node) &key parent) (let ((copied-node (make-instance (class-of node) :constraint (constraint node) :parent parent :rules (copy-seq (constraint-rules node))))) (setf (child-constraints copied-node) (mapcar (lambda (child) (copy-constraint-node child :parent copied-node)) (child-constraints node))) copied-node)) (defmethod deep-copy-hash-table (obj) (typecase obj (cons (copy-tree obj)) (constraint-node (copy-constraint-node obj)) (hash-table (copy-hash-table obj :value-copy-function 'deep-copy-hash-table)) (t obj))) (defmethod copy-cg ((cg constraint-grammar) &key name) (let ((copied-cg (make-instance (class-of cg) :name name :operations (parse-operations cg) :feature-precedence (feature-precedence cg) :multi-tagger (multi-tagger cg) :parent-cg (name cg)))) (with-slots (sentence-delimiters set-declarations encoded-set-declarations templates syntactic-functions principal-functions barrier-elements rules domain-rules %mapping-features %syntactic-function-codes morphosyntactic-mappings morphological-heuristics rule-array careful-p ; constraint-tree language documentation group-comments) copied-cg (setf sentence-delimiters (copy-seq (sentence-delimiters cg)) set-declarations (set-declarations cg) encoded-set-declarations (encoded-set-declarations cg) templates (templates cg) syntactic-functions (copy-hash-table (syntactic-functions cg)) principal-functions (copy-hash-table (principal-functions cg)) barrier-elements (copy-hash-table (barrier-elements cg)) rules (deep-copy-hash-table (rules cg)) domain-rules (deep-copy-hash-table (domain-rules cg)) %mapping-features (%copy-seq (%mapping-features cg)) %syntactic-function-codes (%copy-seq (%syntactic-function-codes cg)) morphosyntactic-mappings (deep-copy-hash-table (morphosyntactic-mappings cg)) morphological-heuristics (deep-copy-hash-table (morphological-heuristics cg)) rule-array (make-array (length (rule-array cg)) :initial-contents (rule-array cg) :adjustable t :fill-pointer t) careful-p (careful-p cg) ;;constraint-tree (constraint-tree cg) language (language cg) documentation (cg-documentation cg) group-comments (make-array (length (rule-array cg)) :adjustable t :fill-pointer 0)) (loop for grc across (rule-group-comments cg) with grc-list = nil and copy-grc = nil do (cond ((null grc) (vector-push nil group-comments)) ((eq grc grc-list) (vector-push copy-grc group-comments)) (t (setf grc-list grc copy-grc (copy-seq grc)) (vector-push copy-grc group-comments))))) copied-cg)) (defmethod copy-cg :around ((cg ne-constraint-grammar) &key &allow-other-keys) (let ((copied-cg (call-next-method))) (with-slots (%named-entity-mapping-features %named-entity-tag-codes named-entity-mappings named-entity-tags) copied-cg (setf %named-entity-mapping-features (%copy-seq (%named-entity-mapping-features cg)) %named-entity-tag-codes (%copy-seq (%named-entity-tag-codes cg)) named-entity-mappings (deep-copy-hash-table (named-entity-mappings cg)) named-entity-tags (copy-hash-table (named-entity-tags cg)))) copied-cg)) #+test (inspect (copy-cg *nbo-cg*)) #+test (make-named-cg-copy *nbo-cg* "NBO-test") ;(defparameter *cg-table* (make-hash-table :test #'equal)) (defmethod make-named-cg-copy ((cg constraint-grammar) name) (setf (gethash name *cg-table*) (copy-cg cg :name name))) (defun save-constraint-grammars (&key (cg-table *cg-table*)) (maphash (lambda (name cg) (write-cg-to-file cg :path (concat "projects:cgp;rules;" name ".lisp") :temp-path (concat "projects:cgp;rules;" name ".temp"))) cg-table)) #+test (save-constraint-grammars) #+test (let ((path "projects:cgp;rules;xx.lisp")) (list (setf (pathname-type path) "temp") path)) (defmethod write-cg-to-file ((cg constraint-grammar) &key path temp-path default-p version-comment) (print :path) (print temp-path) (let ((temp-path (or temp-path #-allegro(setf (pathname-type path) "temp") #+allegro(merge-pathnames ".temp" path))) (success-p nil)) (print :temp-path) (print temp-path) (print path) (unwind-protect (progn (with-open-file (stream temp-path :direction :output :if-exists :supersede :if-does-not-exist :create) (write-cg-to-stream cg :stream stream :default-p default-p :version-comment version-comment)) (setf success-p t)) (if success-p (rename-file temp-path path) (delete-file temp-path))))) #+test (defun write-comment (string &optional (stream *standard-output*)) (dolist (line (split string #\Newline)) (format stream ";; ~a~%" line))) #+test (write-cg-to-stream (gethash "nbo" *cg-table*)) #+test (defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*) (encoding $encoding) version-comment) (let ((*package* (find-package :cgp)) (*print-case* :downcase)) (format stream "~s~%" '(foo bar)))) ;; changed 7.2.2001 (defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*) (encoding $encoding) default-p version-comment) (let ((*package* (find-package :cgp)) (*print-case* :downcase)) (with-slots (group-comments creation-date change-date) cg (setf change-date (get-universal-time)) ;; set change-date when writing (labels ((encode (obj) obj #+ignore-yet (cond ((eq encoding $encoding) obj) ((stringp obj) (if (eq $encoding encoding) obj (convert-string (copy-seq obj) $encoding encoding))) ((consp obj) (mapcar #'encode obj)) (t obj)))) (write-line ";;;-*- Mode: Lisp; Package: CGP -*-" stream) ; mode line (terpri stream) (when version-comment (format stream "~{;;*;; ~a~%~}~%" (split version-comment #\Newline))) (terpri stream) (write-string ";;; Denne filen er automatisk generert. " stream) (u::format-universal-time (get-universal-time) stream :timestamp :nbo) (terpri stream) (terpri stream) (write-line "(in-package :cgp)" stream) (terpri stream) (let* ((language (language cg)) (name (if default-p (string-downcase language) (name cg)))) (format stream "(let* ((*tagger* *~a-tagger*) (*cg* (make-instance '~s :language ~s :name \"~a\" :multi-tagger *~a-tagger* :locked-p ~s :operations (list ;#'analize-unknown-words #'disambiguate-sentence #'h-disambiguate-sentence #'disambiguate-sentence #'map-sentence #'s-disambiguate-sentence (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 1)) (lambda (cg sentence) (h-s-disambiguate-sentence cg sentence 3)) #+ignore #'h-s-disambiguate-sentence) :feature-precedence (mapcar #'feature-code '(det adj verb subst pron adv fork konj clb))))) (setf (gethash ~s *cg-table*) *cg*)~%~%" language (type-of cg) language (if default-p (string-downcase language) name) language (cg-locked-p cg) (if default-p (string-downcase language) name) )) (format stream "(admin-info~% :language ~s~% :created ~s~% :last-change ~s~% :locked ~s~% :parent ~s)~%~%" (language cg) (u::format-universal-time creation-date nil :timestamp :nbo) (u::format-universal-time change-date nil :timestamp :nbo) (cg-locked-p cg) (parent-cg cg)) (terpri stream) (format stream ";; Information section~%(define-information-section~% ~s)~%~%" (if (eq $encoding encoding) (cg-documentation cg) (convert-string (copy-seq (cg-documentation cg)) $encoding encoding))) (format stream ";; Sentence delimiters~%(define-sentence-delimiters~% '~s)~%~%" (sentence-delimiters cg)) (write-line ";; set declarations" stream) (write-line ";; \"strings\" are converted to %symbols% for faster lookup" stream) (format stream "(define-sets~% '(~%") (let ((definitions (sort (collecting (maphash (lambda (symbol def) (collect (cons symbol (encode def)))) (set-declarations cg))) #'string< :key #'car))) (dolist (def definitions) (format stream " ~s~%" def))) (write-line "))" stream) (terpri stream) (write-line ";; syntactic function declarations" stream) (format stream "(define-syntactic-functions~% '~s)~%~%" (sort (collecting (maphash (lambda (symbol val) (declare (ignore val)) (collect symbol)) (syntactic-functions cg))) #'string<)) (terpri stream) (when (typep cg 'ne-constraint-grammar) (write-line ";; named entity tags" stream) (format stream "(define-named-entity-tags~% '~s)~%~%" (sort (collecting (maphash (lambda (symbol val) (declare (ignore val)) (collect symbol)) (named-entity-tags cg))) #'string<)) (terpri stream)) (write-line ";; Reglene er listet i rekkefølgen de brukes av taggeren." stream) (terpri stream) (write-line ";; Morphological disambiguation rules" stream) (format stream "(define-rules~% '(~%") (let* ((written-groups ()) (group-id 0) (domains (sort (collecting ; sort the features first (maphash (lambda (domain rules-table) (declare (ignore rules-table)) (collect domain)) (domain-rules cg))) #'string<)) (rule-type-tables (append (mapcar (lambda (domain) (gethash domain (domain-rules cg))) domains) (list (rules cg))))) (dolist (rule-type-table rule-type-tables) (dolist (type '(=! =!! =0 =!h =!!h =0h)) (when-let (rule-table (gethash type rule-type-table)) (let ((features (sort (collecting ;; sort the features first (maphash (lambda (feature features+rule) (declare (ignore features+rule)) (collect feature)) rule-table)) #'string<))) (dolist (feature features) (dolist (features+rule (gethash feature rule-table)) (let* ((rule (cdr features+rule)) (rule-group (aref group-comments (rule-id rule)))) (when (and rule-group (not (find rule-group written-groups :key #'cdr))) (format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group)) (push (cons group-id rule-group) written-groups)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (format stream "~% (") (when (rule-comment rule) (format stream ":comment ~s" (encode (rule-comment rule))) (when rule-group (format stream "~% "))) (when rule-group (format stream ":group ~a" (car (find rule-group written-groups :key #'cdr)))) (write-line "))" stream)) (terpri stream))))))) (write-line ";; Morphosyntactic mapping rules" stream) (let* ((rule-table (morphosyntactic-mappings cg)) (features (sort (collecting ; sort the features first (maphash (lambda (feature features+rule) (declare (ignore features+rule)) (collect feature)) rule-table)) #'string<))) (dolist (feature features) (dolist (features+rule (gethash feature rule-table)) (let* ((rule (cdr features+rule)) (rule-group (aref group-comments (rule-id rule)))) (when (and rule-group (not (find rule-group written-groups :key #'cdr))) (format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group)) (push (cons group-id rule-group) written-groups)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (format stream "~% (") (when (rule-comment rule) (format stream ":comment ~s" (encode (rule-comment rule))) (when rule-group (format stream "~% "))) (when rule-group (format stream ":group ~a" (car (find rule-group written-groups :key #'cdr)))) (write-line "))" stream)) (terpri stream)))) (write-line ";; Syntactic disambiguation rules" stream) (write-line ";; Syntactic disambiguation rules") (dolist (rule-type-table rule-type-tables) (dolist (type '(=s! =s0 =s!h1 =s0h1 =s!h2 =s0h2 =s!h3 =s0h3)) (when-let (rule-table (gethash type rule-type-table)) (let ((features (sort (collecting ; sort the features first (maphash (lambda (feature rule-tree) (declare (ignore rule-tree)) (collect feature)) rule-table)) #'string<))) (dolist (feature features) (map-tree-rules cg (gethash feature rule-table) (lambda (rule) (let ((rule-group (aref group-comments (rule-id rule)))) (when (and rule-group (not (find rule-group written-groups :key #'cdr))) (format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group)) (push (cons group-id rule-group) written-groups)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (format stream "~% (") (when (rule-comment rule) (format stream ":comment ~s" (encode (rule-comment rule))) (when rule-group (format stream "~% "))) (when rule-group (format stream ":group ~a" (car (find rule-group written-groups :key #'cdr)))) (write-line "))" stream)) (terpri stream)))))))) (when (typep cg 'ne-constraint-grammar) (write-line ";; Named entity mapping rules" stream) (let* ((rule-table (named-entity-mappings cg)) (features (sort (collecting ; sort the features first (maphash (lambda (feature features+rule) (declare (ignore features+rule)) (collect feature)) rule-table)) #'string<))) (dolist (feature features) (dolist (features+rule (gethash feature rule-table)) (let* ((rule (cdr features+rule)) (rule-group (aref group-comments (rule-id rule)))) (when (and rule-group (not (find rule-group written-groups :key #'cdr))) (format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group)) (push (cons group-id rule-group) written-groups)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (format stream "~% (") (when (rule-comment rule) (format stream ":comment ~s" (encode (rule-comment rule))) (when rule-group (format stream "~% "))) (when rule-group (format stream ":group ~a" (car (find rule-group written-groups :key #'cdr)))) (write-line "))" stream)) (terpri stream)))) (write-line ";; Named entity disambiguation rules" stream) (dolist (type '(=n! =n0 =n!h =n0h =n!h1 =n0h1 =n!h2 =n0h2 =n!h3 =n0h3)) (when-let (rule-table (gethash type (rules cg))) (let ((features (sort (collecting ; sort the features first (maphash (lambda (feature rule-tree) (declare (ignore rule-tree)) (collect feature)) rule-table)) #'string<))) (dolist (feature features) (map-tree-rules cg (gethash feature rule-table) (lambda (rule) (let ((rule-group (aref group-comments (rule-id rule)))) (when (and rule-group (not (find rule-group written-groups :key #'cdr))) (format stream "(group-comment ~d ~s)~%~%" (incf group-id) (car rule-group)) (push (cons group-id rule-group) written-groups)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (format stream "~% (") (when (rule-comment rule) (format stream ":comment ~s" (encode (rule-comment rule))) (when rule-group (format stream "~% "))) (when rule-group (format stream ":group ~a" (car (find rule-group written-groups :key #'cdr)))) (write-line "))" stream)) (terpri stream)))))))) (write-line " ))" stream) (terpri stream) (write-line ")" stream) (write-line ";;; EOF") nil))))) #+old (defmethod write-cg-to-stream ((cg constraint-grammar) &key (stream *standard-output*) (encoding $encoding)) (with-slots (group-comments creation-date change-date) cg (labels ((encode (obj) (cond ((eq encoding $encoding) obj) ((stringp obj) (if (eq $encoding encoding) obj (convert-string (copy-seq obj) $encoding encoding))) ((consp obj) (mapcar #'encode obj)) (t obj)))) (write-line ";;;-*- Mode: Lisp; Package: CGP -*-" stream) ; mode line (terpri stream) (write-line ";;; Denne filen er automatisk generert." stream) (terpri stream) (write-line "(in-package :cgp)" stream) (terpri stream) (format stream "(admin-info~% :language ~s~% :created ~s~% :last-change ~s~% :locked ~s~% :parent ~s)~%~%" (language cg) (u::format-universal-time creation-date nil :timestamp :nbo) (u::format-universal-time change-date nil :timestamp :nbo) (cg-locked-p cg) (if (parent-cg cg) (name (parent-cg cg)) nil)) (write-line ";; Morphological disambiguation" stream) (terpri stream) (format stream ";; Information section~%(define-information-section~% ~s)~%~%" (if (eq $encoding encoding) (cg-documentation cg) (convert-string (copy-seq (cg-documentation cg)) $encoding encoding))) (format stream ";; Sentence delimiters~%(define-sentence-delimiters~% '~s)~%~%" (sentence-delimiters cg)) (write-line ";; set declarations" stream) (write-line ";; \"strings\" are converted to %symbols% for faster lookup" stream) (format stream "(define-sets~% '(~%") (let ((definitions (sort (u:collecting (maphash (lambda (symbol def) (u:collect (cons symbol (encode def)))) (set-declarations cg))) #'string< :key #'car))) (dolist (def definitions) (format stream " ~s~%" def))) (write-line "))" stream) (terpri stream) (write-line ";; syntactic function declarations" stream) (format stream "(define-syntactic-functions~% '~s)~%~%" (sort (u:collecting (maphash (lambda (symbol val) (declare (ignore val)) (u:collect symbol)) (syntactic-functions cg))) #'string<)) (terpri stream) (write-line ";; disambiguation constraints" stream) (format stream "(define-disambiguation-rules~% '(~%") (let ((written-groups ())) (loop for rule across (rule-array cg) do (when (and rule (typep rule 'disambiguation-rule) (= 0 (heuristic-level rule)) (not (find (aref group-comments (rule-id rule)) written-groups))) (let ((rule-group (aref group-comments (rule-id rule)))) (cond (rule-group (push rule-group written-groups) (format stream "(comment ~s~%" (car rule-group)) (dolist (rule (cdr rule-group)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream)) (write-line ")" stream)) (t (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream))))))) (write-line ") :clearp t)" stream) (terpri stream) (write-line ";; heuristic disambiguation constraints" stream) (format stream "(define-heuristic-disambiguation-rules~% '(~%") (loop for rule across (rule-array cg) do (when (and rule (typep rule 'disambiguation-rule) (= 1 (heuristic-level rule))) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream))) (write-line ") :clearp t)" stream) (terpri stream) (write-line ";; morphosyntactic mappings" stream) (format stream "(define-mapping-rules~% '(~%") (loop for rule across (rule-array cg) do (when (and rule (typep rule 'mapping-rule)) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream))) (write-line ") :clearp t)" stream) (terpri stream) (write-line ";; syntactic constraints" stream) (format stream "(define-syntactic-rules~% '(~%") (loop for rule across (rule-array cg) do (when (and rule (typep rule 'syntactic-disambiguation-rule) (or (null (heuristic-level rule)) ; preliminary (= 0 (heuristic-level rule)))) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream))) (write-line ") :clearp t)" stream) (terpri stream) (dotimes (i 3) (format stream ";; heuristic syntactic constraints, level ~d~%" (1+ i)) (format stream "(define-heuristic-syntactic-rules ~% '(~%") (loop for rule across (rule-array cg) do (when (and rule (typep rule 'syntactic-disambiguation-rule) (heuristic-level rule) (= (1+ i) (heuristic-level rule))) (format stream ";; ~d~%" (rule-id rule)) (write-rule-body rule stream :to-platform encoding :end-parenthesis nil) (if (rule-comment rule) (format stream "~%~s)~%" (encode (rule-comment rule))) (write-line ")" stream)) (terpri stream))) (format stream ") :clearp t :level ~d)~%~%" (1+ i))) nil))) ;;(stored-cg-versions) (defun stored-cg-versions (&key (path "projects:cgp;rules;*.lisp") (html-p t)) "returns a list of (file-name cg-name universal-time comment) lists" (let ((versions (collecting (dolist (file (directory path)) (let* ((file-name (pathname-name file)) (ut-start (- (length file-name) 10)) (universal-time-string (when (> ut-start 0) (subseq file-name ut-start))) (cg-name (when universal-time-string (subseq file-name 0 ut-start)))) (when universal-time-string (multiple-value-bind (universal-time end) (parse-integer universal-time-string :junk-allowed t) (when (= end 10) (let ((comment nil)) (block comment (with-file-lines (line file) (cond ((and (> (length line) 6) (string= line ";;*;;" :end1 5)) (setf comment (if html-p (if comment (concat comment "
" (utf-8-encode #+old chars-to-entities (subseq line 6))) (subseq line 6)) (if comment (concat comment #.(format nil "~%") (subseq line 6)) (subseq line 6))))) ((and (> (length line) 12) (string= line "(in-package " :end1 12)) (return-from comment)) (t nil)))) (collect (list file-name cg-name universal-time comment))))))))))) (sort versions #'> :key #'caddr))) #+test (write-cg-to-stream *nbo-cg*) #+test (write-cg-to-file *nbo-cg* :path "projects:cgp;rules;nbo-all.lisp") ;;(print (%parse-query-response-key :nob "hidden_value_Newnbo3177_0")) (defun %parse-query-response-key (language key) (let ((str (string key)) (lang-length (length (symbol-name language)))) (cond ((and (> (length str) 29) (string-equal str "hidden_value_rulegroupcomment" :end1 29)) (values (parse-integer str :start (+ 29 lang-length)) :group-comment t)) ((and (> (length str) 27) (string-equal str "hidden_value_rulecommentnew" :end1 27)) (values (parse-integer str :start (+ 27 lang-length)) :comment t)) ((and (> (length str) 24) (string-equal str "hidden_value_rulecomment" :end1 24)) (values (parse-integer str :start (+ 24 lang-length)) :comment)) ((and (> (length str) 21) (string-equal str "rule_rulegroupcomment" :end1 21)) (values (parse-integer str :start (+ 21 lang-length)) :group-comment)) ((and (> (length str) 19) (string-equal str "rule_rulecommentnew" :end1 19)) (values (parse-integer str :start (+ 19 lang-length)) :comment t)) ((and (> (length str) 16) (string-equal str "rule_rulecomment" :end1 16)) (values (parse-integer str :start (+ 16 lang-length)) :comment)) ((and (> (length str) 16) (string-equal str "hidden_value_new" :end1 16)) ;;(values (parse-integer str :start (+ 16 lang-length)) :rule t) (let ((_pos (position #\_ str :start (+ 16 lang-length)))) (if _pos (values (parse-integer str :start (1+ _pos)) ; id :rule ; type t ; new-rule-p (parse-integer str :start (+ 16 lang-length) :end _pos) ; cloned-from ) (values (parse-integer str :start (+ 16 lang-length)) :rule t)))) ((and (> (length str) 13) (string-equal str "hidden_value_" :end1 13)) (values (parse-integer str :start (+ 13 lang-length)) :rule)) ((and (> (length str) 8) (string-equal str "rule_new" :end1 8)) (let ((_pos (position #\_ str :start (+ 8 lang-length)))) (if _pos (values (parse-integer str :start (1+ _pos)) ; id :rule ; type t ; new-rule-p (parse-integer str :start (+ 8 lang-length) :end _pos) ; cloned-from ) (values (parse-integer str :start (+ 8 lang-length)) :rule t)))) ((and (> (length str) 5) (string-equal str "rule_" :end1 5)) (values (parse-integer str :start (+ 5 lang-length)) :rule)) (t nil)))) (defun valid-constraint-p (constraint) (print constraint) (when (consp constraint) (when (eq (car constraint) 'NOT) (setf constraint (cdr constraint))) (unless (or (not (cdr constraint)) (and (cdr constraint) (cddr constraint) (cdddr constraint))) (destructuring-bind (pos-op set &optional link) constraint (and (or (integerp pos-op) (not (find-if-not (lambda (c) (find c "LRC*-+0123456789" :test #'char-equal)) (string pos-op)))) (gethash set (set-declarations *cg*)) (or (null link) (integerp link) (not (find-if-not (lambda (c) (find c "LRC*-+0123456789" :test #'char-equal)) (string link))))))))) #+test (let ((*cg* *nbo-cg*) (*tagger* *nbo-tagger*)) (print (valid-constraint-p '(LR0 (subst subst) *R)))) (defun check-rule-consistency (rule) (labels ((test-features (target) (when (find-if-not #'feature-code target) (error "Det finnes ugyldig trekk i ~a" rule)))) (destructuring-bind (first . rest) rule (cond ((consp first) ;; mapping-regel (unless (and (listp (car rest)) (listp (cdr rest)) (listp (cadr rest)) (null (cddr rest))) (error "ugyldig kropp i mappingregel ~a" rule)) (let ((target (if (stringp (car first)) (cdr first) first))) (test-features target)) (let ((constraint (find-if-not #'valid-constraint-p (car rest)))) (when constraint (error "~a er ikke en velformet føring" constraint))) (test-features (cadr rest))) ((or (eq first '@w) (stringp first)) (destructuring-bind (rule-type target . constraints) rest (unless (operator-to-rule-class *cg* rule-type) (error "~a er ukjent regeltype" rule-type)) (test-features (if (stringp (car target)) (cdr target) target)) (let (( constraint (find-if-not #'valid-constraint-p constraints))) (when constraint (error "~a er ikke velformet føring" constraint))))) (t (error "Kan ikke analysere første del av regelen: ~a" rule)))))) #+test (let ((*cg* *nbo-cg*) (*tagger* *nbo-tagger*)) (check-rule-consistency '("fast" =! (adj fl) (*-1 setn-gr R+1) (NOT R+1 prep/adv) (NOT 0 farlige-adj) (*-1 %seg% *R) (NOT *R ikke-adv) (*-1 %seg% L-1) (L-1C %fast%-verb *L) (*L nom *R) (LR0 nom-fl) (NOT -1 det) (NOT 1 subst/adj)))) (defmethod %update-rules ((cg constraint-grammar) query-alist) (let ((rules-to-delete ()) (rules-to-add ()) (changed-p nil)) ;;(print query-alist) (with-slots (rule-array group-comments change-date) cg (dolist (rule-def query-alist) (destructuring-bind (key . value) rule-def (multiple-value-bind (rule-id type new-rule-p group-rule-id) (%parse-query-response-key (language cg) key) (when rule-id ;;(print (list rule-id type new-rule-p group-rule-id)) (let ((rule (unless new-rule-p (aref rule-array rule-id)))) (when (eq type :rule) (cond ((string-equal value "deleted") (unless new-rule-p (push rule rules-to-delete))) (t (let* ((*package* (find-package :cgp)) (definition (read-from-string (utf-8-decode value) #+ignore(convert-string value :win :mac))) (*cg* cg) (*tagger* (multi-tagger cg))) (check-rule-consistency definition) (setf rules-to-add ;; ensures order (append rules-to-add (list (if new-rule-p rule-id rule) nil))) (cond (new-rule-p (push definition (getf rules-to-add rule-id)) (push :definition (getf rules-to-add rule-id)) (when group-rule-id (push group-rule-id (getf rules-to-add rule-id)) (push :insert-before (getf rules-to-add rule-id)))) (t (push rule rules-to-delete) (push definition (getf rules-to-add rule)) (push :definition (getf rules-to-add rule)) ;; makes sure that the modified rule is going be at the same ;; place as the original one if possible (push (rule-id rule) (getf rules-to-add rule)) (push :insert-before (getf rules-to-add rule))))))))))))) ;; comments (dolist (rule-def query-alist) (destructuring-bind (key . value) rule-def (multiple-value-bind (rule-id type new-rule-p #+ignore group-rule-id) (%parse-query-response-key (language cg) key) (case type (:comment (when rule-id (let ((rule (aref rule-array rule-id))) (cond (new-rule-p (push #+old(convert-string value :win :mac) (utf-8-decode value) (getf rules-to-add rule-id)) (push :comment (getf rules-to-add rule-id))) (t (push rule rules-to-delete) (push #+old(convert-string value :win :mac) (utf-8-decode value) (getf rules-to-add rule)) (push :comment (getf rules-to-add rule)) ;; makes sure that the modified rule is going be at the same place ;; as the original one if possible (push (rule-id rule) (getf rules-to-add rule)) (push :insert-before (getf rules-to-add rule))))))) (:group-comment (when rule-id (setf (car (aref group-comments rule-id)) (utf-8-decode value) #+old(convert-string value :windows $encoding) changed-p t))))))) (print "rules-to-add:") (print rules-to-add) (print "rules-to-delete:") (print rules-to-delete) (let ((already-stored-rules (loop for (rule definition) on rules-to-add by #'cddr collect (apply #'add-rule cg (unless (integerp rule) rule) definition)))) (dolist (rule rules-to-delete) (when (and rule (not (find rule already-stored-rules))) (delete-rule cg rule :delete-from-array-p (not (find rule rules-to-add)))))) (when (or rules-to-delete rules-to-add changed-p) (setf change-date (get-universal-time))))) (with-slots (%feature-codes) cg (clrhash %feature-codes))) (defmethod rule-definition ((rule rule)) (with-slots (domain type target constraints) rule (list* (or domain '@w) type ;; in syntactic rules target is an atom; in md-rules, a list of a list of features. (if (consp target) (car target) target) constraints))) (defmethod rule-definition ((rule mapping-rule)) (with-slots (target constraints labels) rule (list target constraints labels))) ;(inspect (aref (rule-array (gethash "nbo-test" *cg-table*)) 2305)) ;(rule-definition (aref (rule-array (gethash "nbo-test" *cg-table*)) 244)) ;(rule-definition (aref (rule-array (gethash "nbo-test" *cg-table*)) 2305)) (defmethod add-rule ((cg constraint-grammar) rule-previous-version &key definition comment insert-before) ;(print (list definition comment insert-before)) (when (null definition) ;; get the definition from the previous version (setf definition (rule-definition rule-previous-version))) (when (and (null comment) rule-previous-version) ;; get the definition from the previous version (setf comment (rule-comment rule-previous-version))) (let ((group-comment (when insert-before (aref (rule-group-comments cg) insert-before)))) (define-rule cg definition (when rule-previous-version (rule-id rule-previous-version)) ; nil ;; :rule-class (when rule-previous-version (class-of rule-previous-version)) :previous-version rule-previous-version :comment comment ; :group-comment group-comment :insert-before insert-before))) (defmethod delete-rule ((cg constraint-grammar) (rule disambiguation-rule) &key (delete-from-array-p t)) (print delete-from-array-p) (with-slots (target language type domain boundary-mode heuristic-level id) rule (let* ((type-table (if (stringp domain) (gethash domain (domain-rules cg)) (rules cg))) (rule-table (gethash type type-table))) (Print (list rule-table domain)) (labels ((base-form-error? (feature features) ; (9.1.) (when (stringp feature) (error "The first feature of a target may not be a base form: ~s" features)))) (with-slots (rule-array group-comments) cg ;; delete old rule and store rule in rule array (when delete-from-array-p (setf (aref rule-array id) nil)) ;; update group-comments list for rule (when (and delete-from-array-p (aref group-comments id)) (setf (cdr (aref group-comments id)) (delete (rule-id rule) (cdr (aref group-comments id)))) ;; delete old rule from group comments (setf (aref group-comments id) nil)) ;; delete rule (dolist (features target) (cond ((consp features) ; a feature set (setf (gethash (car features) rule-table) (delete (cons (cdr features) rule) (gethash (car features) rule-table) :test #'stored-rules-equal-p)) (when (null (gethash (car features) rule-table)) (remhash (car features) rule-table))) (t (setf (gethash features rule-table) (delete (cons nil rule) (gethash features rule-table) :test #'stored-rules-equal-p)) (when (null (gethash features rule-table)) (remhash features rule-table))))) rule))))) (defmethod delete-rule ((cg constraint-grammar) (rule mapping-rule) &key (delete-from-array-p t)) (with-slots (rule-array group-comments) cg (with-slots (type target id) rule (let ((rule-table (morphosyntactic-mappings cg))) (when delete-from-array-p (setf (aref rule-array id) nil)) ;; delete rule (setf (gethash (car target) rule-table) (delete (cons (cdr target) rule) (gethash (car target) rule-table) :test #+ignore #'equal #-ignore #'stored-rules-equal-p :count 1)) ;; update group-comments list for rule (when (and delete-from-array-p (aref group-comments id)) (setf (cdr (aref group-comments id)) (delete (rule-id rule) (cdr (aref group-comments id)))) ;; delete old rule from group comments (setf (aref group-comments id) nil)) (when (null (gethash (car target) rule-table)) (remhash (car target) rule-table)) rule)))) (defmethod delete-rule ((cg constraint-grammar) (rule named-entity-mapping-rule) &key (delete-from-array-p t)) (with-slots (rule-array group-comments) cg (with-slots (type target id) rule (let ((rule-table (named-entity-mappings cg))) (when delete-from-array-p (setf (aref rule-array id) nil)) ;; delete rule (setf (gethash (car target) rule-table) (delete (cons (cdr target) rule) (gethash (car target) rule-table) :test #+ignore #'equal #-ignore #'stored-rules-equal-p :count 1)) ;; update group-comments list for rule (when (and delete-from-array-p (aref group-comments id)) (setf (cdr (aref group-comments id)) (delete (rule-id rule) (cdr (aref group-comments id)))) ;; delete old rule from group comments (setf (aref group-comments id) nil)) (when (null (gethash (car target) rule-table)) (remhash (car target) rule-table)) rule)))) (defmethod delete-rule ((cg constraint-grammar) (rule syntactic-disambiguation-rule) &key (delete-from-array-p t)) (with-slots (rule-array group-comments) cg (with-slots (type target domain id) rule (let* ((type-table (if (stringp domain) (domain-rules cg) (rules cg))) (rule-table (gethash type type-table))) (when rule-table (remove-rule-from-tree cg rule (gethash target rule-table)) (when delete-from-array-p (setf (aref rule-array id) nil)) ;; update group-comments list for rule (when (and delete-from-array-p (aref group-comments id)) (setf (cdr (aref group-comments id)) (delete (rule-id rule) (cdr (aref group-comments id)))) ;; delete old rule from group comments (setf (aref group-comments id) nil))))))) (defun %parse-declaration-query-response-key (key) (let ((str #-cl-http key #+cl-http(symbol-name key))) (cond ((and (> (length str) 27) (string-equal str "hidden_value_declarationset" :end1 27)) (values (parse-integer str :start 27) :set)) ((and (> (length str) 28) (string-equal str "hidden_value_declarationname" :end1 28)) (values (parse-integer str :start 28) :name)) ((and (> (length str) 14) (string-equal str "declarationset" :end1 14)) (values (parse-integer str :start 14) :set)) ((and (> (length str) 15) (string-equal str "declarationname" :end1 15)) (values (parse-integer str :start 15) :name))))) #+test (let ((*tagger* *nbo-tagger*)) (print (feature-code 'adv))) #+test (maphash (lambda (key val) (print (list key val))) (feature-table *nbo-tagger*)) (defmethod %encode-definition ((cg constraint-grammar) definition) (let ((*tagger* (multi-tagger cg))) (cond ((listp definition) (remove-if #'null (mapcar (lambda (def) (%encode-definition cg def)) definition))) ((stringp definition) definition) (t ;; *** should depend on language! (or (feature-code definition) (error "The feature \"~a\" is not defined." definition)))))) (defmethod %update-set-declarations ((cg constraint-grammar) dec-count query-alist) (with-slots (set-declarations encoded-set-declarations change-date) cg (let ((declarations ()) (changed-p nil) (*package* (find-package :cgp))) (dolist (declaration query-alist) (multiple-value-bind (id type) (%parse-declaration-query-response-key (car declaration)) (print (list :id id :type type)) (when id (push (unless (string= (cdr declaration) "") (read-from-string (utf-8-decode (subst-substrings (cdr declaration) '("<" "<" ">" ">" "&" "&"))))) (getf declarations id)) (print (list :value (subst-substrings (cdr declaration) '("<" "<" ">" ">")))) (push type (getf declarations id))))) (print declarations) (loop for (id value) on declarations by #'cddr do (destructuring-bind (&key name set) value (let ((sorted-set (when set (sort-definition (definition-remove-stars set))))) (if (<= id dec-count) ; update an old declaration (cond (set (setf changed-p t) (setf (gethash name encoded-set-declarations) (%encode-definition cg sorted-set) (gethash name set-declarations) sorted-set)) (t (setf changed-p t) (remhash name set-declarations) (remhash name encoded-set-declarations))) (when (and name set) (setf changed-p t) (setf (gethash name encoded-set-declarations) (%encode-definition cg sorted-set) (gethash name set-declarations) sorted-set)))))) (when changed-p (setf change-date (get-universal-time)))))) ;;; EOF