;; -*- Mode: lisp; Syntax: ansi-common-lisp; Package: XLE; Base: 10; Readtable: augmented-readtable -*- ;;;; XLE-Web users are granted the rights to distribute and use this software ;;;; as governed by the terms of the Lisp Lesser GNU Public License ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. ;;;; Author: Paul Meurer (paul.meurer@aksis.uib.no) ;;;; Aksis, Unifob, University of Bergen, Norway ;;;; http://www.aksis.uib.no (in-package :XLE) (eval-when (:load-toplevel :compile-toplevel :execute) (#+sbcl defparameter #-sbcl defconstant +parameter-list+ '(timeout start_skimming_when_scratch_storage_exceeds start_skimming_when_total_events_exceed max_new_events_per_graph_when_skimming max_xle_scratch_storage max_raw_subtrees max_medial_constituent_weight max_medial2_constituent_weight max_selection_additional_storage normalize_chart_graphs))) #+test (print (list timeout start_skimming_when_scratch_storage_exceeds start_skimming_when_total_events_exceed max_new_events_per_graph_when_skimming max_xle_scratch_storage max_raw_subtrees max_medial_constituent_weight max_medial2_constituent_weight max_selection_additional_storage normalize_chart_graphs)) ;;(print max_xle_scratch_storage) (defmacro write-xle-performance-vars-to-stream (stream) `(progn ,@(collecting (dolist (param +parameter-list+) (collect `(format ,stream "~&set ~(~a~) ~d" ',param ,param))) ;; preliminary! (collect `(format ,stream "~&set property_weights_file property-weights.txt"))))) #+test (write-xle-performance-vars-to-stream *standard-output*) (defparameter *grammar* nil) (defparameter *grammars* (make-hash-table :test #'equal)) #+test (read-xle-performance-vars-from-file (concat *pargram-path* "/norwegian/bokmal/performance-vars.txt")) #+allegro (defmacro read-xle-performance-vars-from-file (file) (with-gensyms (alist param-string val-string fun parser) `(let ((,alist (list ,@(collecting (dolist (parameter +parameter-list+) (collect `',parameter) (collect `(lambda (val) (setf ,parameter val) ;;(print ,parameter) ;; set parameters of all charts (when (and *grammar* (not (eq ',parameter 'max_xle_scratch_storage))) (dolist (,parser (parsers *grammar*)) (setf (ff:fslot-value-typed 'Chart nil (parser-address ,parser) ',parameter) val)))))))))) ;;#-64bit ;; test (error "disabled") (when (probe-file ,file) (with-file-lines (line ,file :external-format :iso-8859-1) ;;(print :read-xle-performance-vars-from-file) (write-line line *standard-output*) (unless (string= "" (string-trim " " line)) (destructuring-bind (,param-string ,val-string) (cdr (split line #\Space)) (when-let (,fun (getf ,alist (print (intern (string-upcase ,param-string) :xle)))) (funcall ,fun (parse-integer ,val-string)))))))))) (defun split (string char &optional count escape-char ignore-null-chunks-p) "COUNT is max number of returned chunks. When ESCAPE-CHAR precedes CHAR, the string is not splitted at that position." (labels ((walk (pos) (let ((next-pos (position char string :start pos))) (if (and next-pos (or (null count) (not (zerop (decf count)))) (or (null escape-char) (zerop next-pos) (char/= (char string (1- next-pos)) escape-char))) (if (and ignore-null-chunks-p (= pos next-pos)) (walk (1+ next-pos)) (cons (subseq string pos next-pos) (walk (1+ next-pos)))) (unless (and ignore-null-chunks-p (= pos (length string))) (list (subseq string pos))))))) (walk 0))) ;; Several grammars can coexist (defclass grammar () ((name :initform nil :initarg :name :reader name) (language :initform nil :initarg :language :reader language) ;; use 3-char ISO names (owner :initform nil :initarg :owner :reader owner) (grammar-path :initform nil :initarg :grammar-path :reader grammar-path) (morphology-type :initform :fst-morphology :initarg :morphology-type :reader morphology-type) (root-cat :initform "ROOT" :reader root-cat) (reparse-cat :initform "FRAGMENTSTOP" :reader reparse-cat) (graph-class :initform 'xle-mrs-graph :initarg :graph-class :reader graph-class) (graph-table :initform (make-hash-table :test #'equal) :reader graph-table) (ot-graph :initform nil :initarg :graph :reader ot-graph) ;; for workaround in disabling OT-marks (reparse-on-timeout-topcat :initform nil :accessor reparse-on-timeout-topcat) (max-parsers :initform 50 :initarg :max-parsers) (parsers :initform () :reader parsers) (default-sentence :initform "" :initarg :default-sentence :reader default-sentence) (errors :initform () :accessor errors) (messages :initform () :accessor messages) ;; string tree containing the discriminant weights of a discriminant-based parse ranking model ;; by default loaded from the file $grammar-path/discriminant-weights.txt (weight-table :initform (dat:make-string-tree) :accessor weight-table) )) (defmethod initialize-instance :after ((grammar grammar) &key &allow-other-keys) (with-slots (grammar-path weight-table) grammar (let ((weights-file (merge-pathnames "discriminant-weights.txt" grammar-path))) (cond ((probe-file weights-file) (with-file-lines (line weights-file) (destructuring-bind (weight feature) (split line #\tab 2) (setf (dat:string-tree-get weight-table feature) (read-from-string weight)))) (format t "~&Discriminant weights for ~a loaded from ~a.~%" grammar weights-file)) (t (warn "No discriminant weights file found for ~s at ~a." grammar weights-file)))))) ;;(print (merge-pathnames "discriminant-weights.txt" "/usr/local/xledir/pargram/norwegian/bokmal/bokmal-mrs.lfg")) (defmethod print-object ((grammar grammar) stream) (with-slots (name) grammar (print-unreadable-object (grammar stream :type t :identity t) (format stream "~a" name)))) ;; There may coexist several parser objects; this is needed when sentences are parsed concurrently ;; (as in a Web context). MAX-PARSERS in GRAMMAR bounds the number of parsers; if ACTIVE-P = NIL, ;; a parser object may be reused. If a new parser is needed and MAX-PARSERS is reached, the oldest ;; one (as recorded in LAST-ACCESS) is reused. This may be overriden by FORCE-P = T in GET-PARSER. ;; TASK records which task (Web, TSDB, ...) is using the parser. (defclass parser () ((grammar :initform nil :initarg :grammar :reader grammar) (parser-address :initform nil :initarg :parser-address :reader parser-address) (last-access :initform (get-universal-time) :initarg :last-access :accessor last-access) (task :initform nil :initarg :task :accessor task) (active-p :initform nil :initarg :active-p :accessor parser-active-p) (valid-p :initform t :accessor parser-valid-p) (graph :initform nil :initarg :graph :accessor graph))) (defmethod print-object ((parser parser) stream) (with-slots (last-access task active-p) parser (print-unreadable-object (parser stream :type t :identity t) (format stream "~a:~a (~a)" task last-access (if active-p "active" "inactive"))))) (defmethod initialize-instance :after ((parser parser) &key (error-on-fatal-grammar-error-p t) get-errors-p check-generator-p &allow-other-keys) #+debug(print (list :xle (getenv "XLE") :xlepath (getenv "XLEPATH"))) (with-slots (grammar parser-address) parser (with-slots (grammar-path parsers errors root-cat reparse-cat) grammar #+debug(print (list grammar-path parsers errors root-cat reparse-cat)) #+allegroxx ;; disabled (when get-errors-p ;;(excl::run-shell-command (print (format nil "echo $XLE; echo $XLEPATH;"))) (let* ((stream #+allegro (excl::run-shell-command (if check-generator-p (print (format nil "export XLE=~a; export XLEPATH=~a; xle -noTk -execute 'create-generator ~a; exit'" (getenv "XLE") (getenv "XLEPATH") grammar-path)) (print (format nil "export XLE=~a; export XLEPATH=~a; xle -noTk -execute 'create-parser ~a; exit'" (getenv "XLE") (getenv "XLEPATH") grammar-path))) :wait nil :output :stream :error-output :output) #+sbcl ;; doesn't work yet (sb-ext:run-program (if check-generator-p (print (format nil "export XLE=~a; export XLEPATH=~a; xle -noTk -execute 'create-generator ~a; exit'" (getenv "XLE") (getenv "XLEPATH") grammar-path)) (print (format nil "export XLE=~a; export XLEPATH=~a; xle -noTk -execute 'create-parser ~a; exit'" (getenv "XLE") (getenv "XLEPATH") grammar-path))) :wait nil :output :stream :error-output :output)) (fatal-p nil) (error-lines (print (collecting (loop for line = (read-line stream nil nil) with foundp = nil with prev = "" while line do (setf line (string-trim " " line)) (cond (foundp (unless (and (string= prev "") (string= line "")) (when (search "could not load" line) (setf fatal-p t)) (setf prev line) (collect line))) ((or (search "Warning-" line) (search "Error" line) (search "error" line)) (setf foundp t) (when (search "could not load" line) (setf fatal-p t)) (collect line)) (t nil))))))) (setf errors (print error-lines)) (when fatal-p (setf parser-address 0) (if error-on-fatal-grammar-error-p (error "~s contains fatal errors." grammar) (return-from initialize-instance))))) #+debug(print (list :creating-parser grammar-path)) #+allegro (with-cstr (string-ptr grammar-path) (setf parser-address (create-parser string-ptr))) #+sbcl (setf parser-address (create-parser grammar-path)) #+debug(print (list :pa parser-address)) (when (and (zerop parser-address) error-on-fatal-grammar-error-p) (error "Could not create parser for ~s." parser)) #+debug (print (list :parser-address parser-address :rc (foreign-slot parser-address Chart rootcat) :rpc (foreign-slot parser-address Chart reparse-cat) #||:root-cat (when parser-address (ffc::%get-null-terminated-string (foreign-slot parser-address Chart rootcat))) :reparse-cat (when parser-address (ffc::%get-null-terminated-string (foreign-slot parser-address Chart reparse-cat)))||# )) (unless (zerop parser-address) (let ((rca (foreign-slot parser-address Chart rootcat)) (rpca (foreign-slot parser-address Chart reparse-cat))) #+debug(print (list :rca rca :rpca rpca :inputcount (ff:fslot-value-typed 'Chart nil parser-address 'input_count))) (if (zerop rca) (warn "Could not initialize root-cat") (setf root-cat (ffc::%get-null-terminated-string rca))) (unless (zerop rpca) (setf reparse-cat (ffc::%get-null-terminated-string rpca))))) #+debug(print (list grammar-path parsers errors root-cat reparse-cat)) ))) ;; Try to find an inactive parser with same TASK; ;; if there is none, create a new one; ;; if no more can be created and FORCE-P = T, grab the oldest active one with same TASK ;; if there is none and FORCE-P = T, create a new one anyway (good idea?) (defmethod get-parser ((grammar grammar) &key task force-p (active-p t) (error-on-fatal-grammar-error-p t) check-generator-p) (with-slots (grammar-path parsers max-parsers) grammar (or (#+allegro mp::without-scheduling #-allegro progn (let ((parser (find-if (lambda (parser) (and (not (parser-active-p parser)) (eq task (task parser)))) parsers))) (when parser (when active-p (setf (parser-active-p parser) t)) (setf (last-access parser) (get-universal-time)) parser))) (when (< (length parsers) max-parsers) (let ((parser (make-instance 'parser :grammar grammar :task task :active-p active-p :error-on-fatal-grammar-error-p error-on-fatal-grammar-error-p :check-generator-p check-generator-p :get-errors-p (and (owner grammar) #+ignore (equal (name grammar) "Georgian"))))) (push parser parsers) parser)) (when force-p ;; take the parser with same task and least recent access (#+allegro mp::without-scheduling #-allegro progn (let ((parser nil) (last-access (get-universal-time))) (dolist (p parsers) (when (and (eq task (task p)) (< (last-access p) last-access)) (setf parser p last-access (last-access p)))) (when parser (setf (last-access parser) (get-universal-time)) parser)))) (when force-p (let ((parser (make-instance 'parser :grammar grammar :task task :active-p t :get-errors-p (owner grammar)))) (push parser parsers) parser))))) (defclass xle-graph () ((grammar :initform *grammar* :initarg :grammar :reader grammar) (parser :initform nil :initarg :parser :accessor parser) (sentence :initform nil :initarg :sentence :reader sentence) (root-cat :initform nil :initarg :root-cat :reader root-cat) (statistics :initform nil :reader statistics) (top-f-node-var :initform 0 :accessor top-f-node-var) (graph-address :initform nil :reader graph-address) (prolog :initform nil :accessor prolog) ;; the unparsed prolog terms as string (prolog-terms :initform nil :initarg :prolog-terms :accessor prolog-terms) ;; the parsed prolog terms (lists) (solution :initform 0 :reader solution) (%previous-solution :initform nil) ;; used for deallocating XLE memory of unused solution objects (solutions :initform () :accessor solutions) (solution-count :initform 0 :accessor solution-count) (solution-nr :initform nil :accessor solution-nr) (equivalences :initform nil :accessor equivalences) (disjunctions :initform nil :accessor disjunctions) (disjunctions-tree :initform nil :accessor disjunctions-tree) (disjunction-solutions :initform () :accessor disjunction-solutions) ;; maps disjunction vars to solution sets (disjunction-choices :initform nil :accessor disjunction-choices) (choice-weights :initform nil :accessor choice-weights) (discriminants :initform nil :accessor discriminants) (c-structure-list :initform () :accessor c-structure-list) (c-structure-svg :initform nil :accessor c-structure-svg-string) (current-c-structure :initform nil :accessor current-c-structure) (var-array-list :initform () :accessor var-array-list) (var-array :initform nil :accessor var-array) (c-var-array :initform nil :accessor c-var-array) (var-list :initform nil :accessor var-list) (realized-vars :initform () :accessor realized-vars) (subsume-list :initform nil :accessor subsume-list) (phi-list :initform () :reader phi-list) (inverse-phi-list :initform () :reader inverse-phi-list) (global-projection-fs-ids :initform () :accessor global-projection-fs-ids) (current-var-array :initform nil :accessor current-var-array) (restricted-solutions-count :initform 0 :reader restricted-solutions-count) (unoptimal-solutions-count :initform 0 :reader unoptimal-solutions-count) (disable-OT-p :initform nil :initarg :disable-OT-p :reader disable-OT-p) ;; works only after prolog parse (fragments-p :initform nil :reader fragment-analysis-p) (unknown-word :initform nil :accessor unknown-word) (discriminants-class :initform 'discriminants :allocation :class :reader discriminants-class) (morph-discriminant-class :initform 'morph-discriminant :allocation :class :reader morph-discriminant-class) (f-structure-discriminant-class :initform 'f-structure-discriminant :allocation :class :reader f-structure-discriminant-class) (rule-discriminant-class :initform 'rule-discriminant :allocation :class :reader rule-discriminant-class) (lex-discriminant-class :initform 'lex-discriminant :allocation :class :reader lex-discriminant-class) (constituent-discriminant-class :initform 'constituent-discriminant :allocation :class :reader constituent-discriminant-class) (secondary-discriminant-class :initform 'secondary-discriminant :allocation :class :reader secondary-discriminant-class) (ranking :initform nil :accessor ranking) ;; list of (solution . rank) pairs sorted by rank ;; inspector (inspect-p :initform nil :initarg :inspect-p :accessor inspect-p) (c-root-node :initform nil :initarg :c-root-node :accessor c-root-node) (active-node :initform nil :initarg :active-node :accessor active-node) ;;(chart-path-stack :initform () :accessor chart-path-stack) (tree-solutions-count :initform -1 :reader tree-solutions-count) (good-tree-solutions-count :initform -1 :reader good-tree-solutions-count) (tree-solution-id :initform 0 :accessor tree-solution-id) (semform-edge-list :initform () :accessor semform-edge-list) (morphology-tree :initform nil :accessor morphology-tree) (current-morphology-tree :initform nil :accessor current-morphology-tree) )) (defclass xle-mrs-graph (xle-graph) ((mrs-list :initform () :accessor mrs-list) (current-mrs :initform nil :accessor current-mrs) ;; the mrs to unpack in a packed PP-normalized mrs (sub-solution-nr :initform -1 :accessor sub-solution-nr) (normalized-pairs :initform () :accessor normalized-pairs))) (defmacro null-or-zero-p (obj) `(or (null ,obj) (zerop ,obj))) ;; seems to be buggy (defmethod %fragment-analysis-p ((graph xle-graph)) (with-slots (graph-address) graph (unless (zerop graph-address) #+debug(print (list :fragtop (find-metavariable graph-address "FRAGMENTSTOP"))) (not (zerop (find-metavariable graph-address "FIRST")))))) (defparameter *eo* nil) (defparameter *max-solution-count* 4000) (defparameter *use-parse-regexp-p* t) ;; parses the sentence (if if parse-p and get-parser-p is T), but does not call parse-prolog() (defmethod initialize-instance :after ((graph xle-graph) &key sentence lattice fs-prolog-file prolog ;; prolog terms string (root-cat "") parser task force-p ranking-p (reparse-on-timeout-topcat nil) (cg-preparse-on-fragment-analysis-p nil) (cg-preparse-p nil) (set-OT-p t) (error-on-fatal-grammar-error-p t) check-generator-p (get-parser-p t) (parse-p t) &allow-other-keys) (when get-parser-p ;; don't parse when graph is loaded from a batch parse (with-slots (grammar graph-address restricted-solutions-count unoptimal-solutions-count disable-OT-p) graph (setf (prolog graph) prolog) (with-slots (ot-graph grammar-path) grammar ;; set disable-OT to its new value #+allegro (when set-OT-p ;; fixme: do this differently!?! (unless (or (null ot-graph) (null-or-zero-p (graph-address ot-graph))) #+debug(print (list :ot-graph-addr (graph-address ot-graph))) (let* ((compstate (ff:fslot-value-typed 'Graph nil (graph-address ot-graph) 'compstate)) (disable-ot (unless (null-or-zero-p compstate) (ff:fslot-value-typed 'DUCompState nil compstate 'disable-OT)))) #+debug(print (list :dis disable-ot)) (cond ((null-or-zero-p compstate) nil) ((and (eq disable-ot 0) disable-ot-p) (setf (ff:fslot-value-typed 'DUCompState nil compstate 'disable-OT) 1)) ((and (eq disable-ot 1) (not disable-ot-p)) (setf (ff:fslot-value-typed 'DUCompState nil compstate 'disable-OT) 0)))))) (let ((parser (or parser (get-parser grammar :task task :force-p force-p :check-generator-p check-generator-p :error-on-fatal-grammar-error-p error-on-fatal-grammar-error-p)))) #+debug(print (list :parser parser)) (when parser (with-slots (parser-address active-p) parser (setf active-p t (parser graph) parser) ;; send the sentence to XLE for parsing or read prolog into XLE (unless (or (not parse-p) (zerop parser-address)) (let ((success-p nil)) (unwind-protect (labels ((parse (cg-preparse-p cg-preparse-on-fragment-analysis-p) (setf graph-address (let (#+(and xxx allegro (not logon)) ;; not needed for newest XLE (excl::*locale* (excl::find-locale "en_EN"))) #+debug(print :parsing) (cond (lattice ;; lattice = T is not used (any more) (parse-lattice lattice (or root-cat "") parser-address)) (sentence (if (and *use-parse-regexp-p* (or (eq (morphology-type (grammar parser)) :plugin-morphology))) (with-allocated-strings () #+debug(print :morph-regexp) (multiple-value-bind (regexp heap) (build-morph-regexp sentence :cg-preparse-p cg-preparse-p) (unwind-protect (parse-regexp parser-address regexp (or root-cat "") 1 ;; no-morphology ) (free-heap heap)))) (let ((excl::*locale* (excl::find-locale "en_EN"))) (parse-sentence parser-address (case (morphology-type (grammar parser)) (:plugin-morphology (concatenate 'string (if cg-preparse-p "y" "n") sentence)) (t (print sentence) )) (or root-cat ""))))) (prolog (let ((fs-prolog-file #+allegro(system:make-temp-file-name) #+sbcl"/tmp/tmp.prl") (created-p nil)) (unwind-protect (progn (with-open-file (stream fs-prolog-file :direction :output :if-exists :supersede) (setf created-p t) (write-string prolog stream)) (with-open-file (stream fs-prolog-file) (let ((cstream (fdopen #+allegro(excl::stream-input-handle stream) #+sbcl(sb-sys::fd-stream-fd stream) "r"))) #+debug(print (list :read-prolog-graph parser-address)) ;;(reset-storage parser-address) ;; new (read-prolog-graph cstream parser-address)))) (when created-p (delete-file fs-prolog-file))))) (fs-prolog-file (with-open-file (stream fs-prolog-file) (let ((cstream (fdopen #+allegro(excl::stream-input-handle stream) #+sbcl(sb-sys::fd-stream-fd stream) "r"))) (read-prolog-graph cstream parser-address))))))) (unless fs-prolog-file (when (and (zerop graph-address) reparse-on-timeout-topcat) ;; no parse, probably timeout ;; try chunk parsing #+debug(print (list :reparse-on-timeout-topcat reparse-on-timeout-topcat)) (setf graph-address (parse-sentence parser-address sentence reparse-on-timeout-topcat))) #+allegro (if (and cg-preparse-on-fragment-analysis-p (not cg-preparse-p) (%fragment-analysis-p graph)) (parse t nil) (let* ((compstate (unless (null-or-zero-p graph-address) (ff:fslot-value-typed 'Graph nil graph-address 'compstate))) (chart (unless (null-or-zero-p compstate) (get-compstate-chart compstate))) (top-graph (unless (null-or-zero-p chart) (get-top-graph chart))) (restricted-solution (unless (null-or-zero-p top-graph) (get-edge-solutions top-graph 0)))) (cond ((null-or-zero-p restricted-solution) (setf restricted-solutions-count 0 unoptimal-solutions-count 0)) (t (clear-solution-counts restricted-solution) (setf restricted-solutions-count (floor (count-restricted-solutions restricted-solution))) (clear-solution-counts restricted-solution) (setf unoptimal-solutions-count (floor (count-unoptimal-solutions restricted-solution))))) #+debug(print (list :r restricted-solutions-count :u unoptimal-solutions-count))))))) (parse cg-preparse-p cg-preparse-on-fragment-analysis-p) (case ranking-p ;; to do: move this somewhere else ((t :xle) (when (and (not (zerop (graph-address graph))) (<= restricted-solutions-count *max-solution-count*)) (most-probable-structure (graph-address graph) (namestring (merge-pathnames "property-weights.txt" grammar-path))) (get-choice-weights graph))) (:discriminants (when (and (not (zerop (graph-address graph))) (<= restricted-solutions-count *max-solution-count*)) ))) (let ((prop (get-chart-prop parser-address "UNKNOWNWORD"))) (setf (unknown-word graph) (unless (zerop prop) (ffc::%get-null-terminated-string prop)))) (setf ot-graph graph success-p t)) (when (not success-p) (setf active-p nil)))))))))))) (defmethod solution-weight ((graph xle-graph) solution-nr) (with-slots (choice-weights disjunctions-tree) graph (when choice-weights (reduce #'+ (mapcar (lambda (choice) (gethash (car choice) choice-weights 0)) (nth-solution-choices disjunctions-tree solution-nr)))))) #+test (let* ((graph (parse-sentence (create-parser "/usr/local/xledir/pargram/norwegian/bokmal/bokmal-mrs.lfg") "Han ser kattene." "")) (disjunctions (ff:fslot-value-typed 'Graph nil graph 'disjunctions)) (count (ff:fslot-value-typed 'Disjunction nil disjunctions 'n-choices)) (clause (get-choice graph disjunctions 1))) (print (list :disjunctions disjunctions :disjunction.arm0 (ff:fslot-value-typed 'Disjunction nil disjunctions 'arm 0) :disjunction.arm1 (ff:fslot-value-typed 'Disjunction nil disjunctions 'arm 1) :n-choices count :clause clause :clause.union1 (ff:fslot-value-typed 'Clause nil clause 'union1) :clause.union2 (ff:fslot-value-typed 'Clause nil clause 'union2) :clause.mutable (ff:fslot-value-typed 'Clause nil clause 'mutable) :clause.selected (ff:fslot-value-typed 'Clause nil clause 'selected) :clause.nogood (ff:fslot-value-typed 'Clause nil clause 'nogood) :clause.skip (ff:fslot-value-typed 'Clause nil clause 'skip) :clause.non-local (ff:fslot-value-typed 'Clause nil clause 'non_local) :clause.data (ff:fslot-value-typed 'Clause nil clause 'data) :clause.offset (ff:fslot-value-typed 'Clause nil clause 'offset) :clause.indexed-cache (ff:fslot-value-typed 'Clause nil clause 'indexed-cache) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment) :clause.type (ff:fslot-value-typed 'Clause nil clause 'type) :clause.graph (ff:fslot-value-typed 'Clause nil clause 'graph) :clause.next (ff:fslot-value-typed 'Clause nil clause 'next))) (get-choice-weight clause)) #+test (let* ((graph (parse-sentence (create-parser "/usr/local/xledir/pargram/english/standard/english.lfg") "Time flies like an arrow." "")) (disjunctions (ff:fslot-value-typed 'Graph nil graph 'disjunctions)) (count (ff:fslot-value-typed 'Disjunction nil disjunctions 'n-choices)) (clause (get-choice graph disjunctions 2))) (print (ff::char*-to-string (print-clause clause 0))) ;;(setf (ff:fslot-value-typed 'Clause nil clause 'type) 0) (print (list :graph graph :disjunctions disjunctions :disjunction.arm0 (ff:fslot-value-typed 'Disjunction nil disjunctions 'arm 0) :disjunction.arm1 (ff:fslot-value-typed 'Disjunction nil disjunctions 'arm 1) :n-choices count :clause clause :clause.union1 (ff:fslot-value-typed 'Clause nil clause 'union1) :clause.union2 (ff:fslot-value-typed 'Clause nil clause 'union2) ;:clause.union3 (ff:fslot-value-typed 'Clause nil clause 'union3) ;:clause.union4 (ff:fslot-value-typed 'Clause nil clause 'union4) ;:clause.union5 (ff:fslot-value-typed 'Clause nil clause 'union5) :clause.mutable (ff:fslot-value-typed 'Clause nil clause 'mutable) :clause.selected (ff:fslot-value-typed 'Clause nil clause 'selected) :clause.nogood (ff:fslot-value-typed 'Clause nil clause 'nogood) :clause.skip (ff:fslot-value-typed 'Clause nil clause 'skip) ;:clause.non-local (ff:fslot-value-typed 'Clause nil clause 'non_local) :clause.data (ff:fslot-value-typed 'Clause nil clause 'data) :clause.offset (ff:fslot-value-typed 'Clause nil clause 'offset) :clause.indexed-cache (ff:fslot-value-typed 'Clause nil clause 'indexed-cache) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment) #||:clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment2) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment3) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment4) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment5) :clause.segment (ff:fslot-value-typed 'Clause nil clause 'segment6)||# :clause.type (ff:fslot-value-typed 'Clause nil clause 'type) #||:clause.type (ff:fslot-value-typed 'Clause nil clause 'type2) :clause.type (ff:fslot-value-typed 'Clause nil clause 'type3) :clause.type (ff:fslot-value-typed 'Clause nil clause 'type4) :clause.type (ff:fslot-value-typed 'Clause nil clause 'type5)||# :clause.graph (ff:fslot-value-typed 'Clause nil clause 'graph) :clause.next (ff:fslot-value-typed 'Clause nil clause 'next))) (print (ff::char*-to-string (print-choice clause 0))) (print (select-choice clause)) (print (get-choice-weight clause))) #+test (let* ((graph (parse-sentence (create-parser "/usr/local/xledir/pargram/norwegian/bokmal/bokmal-mrs.lfg") "Han ser kattene." "")) (disjunctions (ff:fslot-value-typed 'Graph nil graph 'disjunctions)) (clause (get-choice graph disjunctions 1))) (print (ff::char*-to-string (print-clause clause 0))) (get-choice-weight clause)) #+allegro (defmethod get-choice-weights ((graph xle-graph)) (with-slots (graph-address choice-weights) graph (if choice-weights (clrhash choice-weights) (setf choice-weights (make-hash-table))) (let ((disjunctions (ff:fslot-value-typed 'Graph nil graph-address 'disjunctions)) (dis 0)) (labels ((get-weights (disjunctions) (unless (zerop disjunctions) (unless (= (ff:fslot-value-typed 'Disjunction nil disjunctions 'internal) 1) (incf dis) (loop for i from 1 to (ff:fslot-value-typed 'Disjunction nil disjunctions 'n-choices) for clause = (get-choice graph-address disjunctions i) do (setf (gethash (intern (choice-label dis i) :xle) choice-weights) (get-choice-weight graph-address clause)) #+debug (print (list :choice (intern (choice-label dis i) :xle) :weight (get-choice-weight graph-address clause))))) (get-weights (ff:fslot-value-typed 'Disjunction nil disjunctions 'next))))) (get-weights disjunctions))))) #+old (defmethod rank-solutions ((graph xle-graph)) (with-slots (solution-count choice-weights ranking) graph (unless (null choice-weights) (setf ranking (coerce (sort (loop for i from 0 to (1- solution-count) collect (cons i (solution-weight graph i))) #'> :key #'cdr) 'vector))))) ;; see also : extract-c-structure() (defparameter *weight-table* (dat:make-string-tree)) #+test (let ((graph (parse "Det reget." nil))) (get-next-solution graph) (print (ranking graph))) (defmethod rank-solutions ((graph xle-graph) &key (model-type :discriminants) weight-table) (with-slots (grammar solution-count discriminants ranking) graph (let ((weight-table (or weight-table (weight-table grammar)))) (when weight-table (unless (and (eq model-type :discriminants) (or (null discriminants) (null (s-context discriminants)) #+ignore(eq (s-context discriminants) 1))) ;; has not been disambiguated (setf ranking (coerce (sort (loop for i from 0 to (1- solution-count) collect (cons i (ecase model-type ((:xle) (solution-weight graph i)) ((t :discriminants) (let ((weight 0.0)) (loop for disc across (discriminants-array discriminants) when (and disc (not (subtypep (class-of disc) 'constituent-discriminant)) (not (equal 1 (s-context disc))) ;; should not happen! (= 1 (aref (s-context disc) i))) do (let ((disc-string (discriminant-string disc))) #+debug(print (list disc (dat:string-tree-get weight-table disc-string 0))) (incf weight (dat:string-tree-get weight-table disc-string 0)))) weight))))) #'> :key #'cdr) 'vector))))))) (defmethod extract-score ((graph xle-graph) solution) (with-slots (ranking) graph (when ranking (cdr (find (1- solution) ranking :key #'car))))) #+test (test) #+test (defun test () (let* ((*grammar* (find-grammar "bokmal-mrs")) (graph (parse "Det regnet." nil :ranking nil )) (solutions (loop for solution = (solution (get-next-solution graph :ranking-p t)) while (not (zerop solution)) collect solution))) (dolist (solution solutions) (print (list solution (extract-score graph solution) (extract-c-structure graph solution)))) (print (ranking graph)))) #+test (test) (defmethod get-next-solution ((graph xle-graph) &key (reclaim-memory-p t) (packed-p t) (ranking-p t)) (with-slots (graph-address solution solution-count %previous-solution solutions solution-nr choice-weights ranking) graph (labels ((set-ranked-solution () (if ranking (let ((rank-pair (aref ranking solution-nr))) (setf solution (1+ (car rank-pair))) #+ignore (values solution (cdr rank-pair))) (setf solution (1+ solution-nr))) #+debug(print (list :solution-nr solution-nr :solution solution :ranking ranking graph-address solution-count %previous-solution solutions solution-nr)))) (cond (packed-p (cond ((null solution-nr) (when ranking-p ;; fixme: should only be done for discriminant ranking ;; first, we have to get the discriminants, that means parsing the packed structure (multiple-value-bind (va c-va root) (parse-prolog graph :packed-p t :packed-c-structure-p t ;; obs! :packed-f-structure-p t ;; obs! ;;:solution graph-address :build-c-structure-p t ;; OBS must be t! :disjunction-choice nil :context nil :ranking-p ranking-p) (unless (zerop (fill-pointer c-va)) (build-c-structure graph c-va root :calculate-discriminants-p t :packed-p t) (multiple-value-bind (disc-tree secondary-disc-tree) (build-discriminants graph (build-f-structure graph :var-array va)) (when (discriminants graph) (build-f-structure-discriminants graph (discriminants graph) disc-tree) (build-secondary-discriminants graph (discriminants graph) secondary-disc-tree)))) #+old (unless (zerop (fill-pointer c-va)) (build-c-structure graph c-va root :calculate-discriminants-p t :packed-p t) (let ((disc-tree (build-discriminants graph (build-f-structure graph :var-array va)))) (when (discriminants graph) (build-f-structure-discriminants graph (discriminants graph) disc-tree))))) (rank-solutions graph :model-type :discriminants)) (setf solution-nr 0 solution 1) ;;(describe (discriminants graph)) #+test (when choice-weights (rank-solutions graph)) (if (zerop solution-count) (setf solution 0) (set-ranked-solution))) ((< solution-nr (1- (solution-count graph))) (incf solution-nr) (set-ranked-solution)) (t (setf solution 0)))) (t (setf %previous-solution solution solution (next-graph-solution graph-address (or solution 0))) (unless (zerop solution) (pushnew solution solutions)) (when (and reclaim-memory-p %previous-solution (not (zerop %previous-solution))) (free-graph-solution %previous-solution)) solution)))) graph) (defmacro do-parse-output-lines ((line solution &key (structures "") lines prolog prolog-destination) &body body) (with-gensyms (temp-path created-p mode cstream stream) `(cond (,lines (dolist (,line ,lines) ,@body)) (,prolog (with-input-from-string (,stream ,prolog) (with-stream-lines (,stream ,line) ,@body))) (t (unless (zerop ,solution) (let ((,temp-path (or ,prolog-destination #+allegro(system:make-temp-file-name) #+sbcl"/tmp/tmp.lfg")) ;; fix! (,created-p nil)) (with-cstr (,mode "w") (unwind-protect (progn (with-open-file (stream ,temp-path :direction :output :if-exists :supersede) (setf ,created-p t) (let ((,cstream (fdopen #+allegro(excl::stream-output-handle stream) #+sbcl(sb-sys::fd-stream-fd stream) ,mode))) ;;(print (list :cstream ,cstream ,solution ,structures)) (print-prolog-graph ,cstream ,solution ,structures) ;;(print (list :cstream ,cstream ,solution ,structures)) (fflush ,cstream))) (with-file-lines (,line ,temp-path :external-format :iso-8859-1) ,@body)) (when (and ,created-p (not ,prolog-destination)) (delete-file ,temp-path)))))))))) ;; is destructive! (defun prefix-to-infix (prefix &optional (double-to-single-quotes-p t)) (declare (optimize (speed 3) (safety 1))) (let ((next nil)) (loop with prev and exp-start = t and in-quoted-p = nil and prev-c = nil for c across prefix for i fixnum from 0 do (cond ((and (char= c #\') (or (not prev-c) (char/= prev-c #\\))) (setf exp-start nil in-quoted-p (not in-quoted-p) prev next next #\" (char prefix i) prev)) ((and exp-start (char= c #\[)) (setf prev next next nil (char prefix i) c)) (exp-start (setf next c (char prefix i) #\( exp-start nil)) ((and (char= c #\,) #-orig (not in-quoted-p)) (setf exp-start (eq #\( (find-if (lambda (c) (find c '(#\( #\, #\)))) prefix :start (1+ i))) (char prefix i) next next #\space)) ((char= c #\() (setf exp-start (eq #\( (find-if (lambda (c) (find c '(#\( #\, #\)))) prefix :start (1+ i))) (char prefix i) next next #\space)) (t (setf exp-start nil prev next next (if (and double-to-single-quotes-p (eq c #\")) #\' c) (char prefix i) prev))) (setf prev-c c)) (if (char= next #\)) (concatenate 'string prefix ")") prefix))) ;;(print (prefix-to-infix "cf(A5,subtree(6789,'VPx[v,fin]',-,6253)),")) ;;(princ (prefix-to-infix "cf(1,surfaceform(414,'Kaiser\\'s-Netz',39,52)),")) (eval-when (:load-toplevel :compile-toplevel :execute) (defparameter *prolog-form-readtable* (copy-readtable)) (set-macro-character #\[ (lambda (stream ignore) (declare (ignore ignore)) (cons '_bracket (read-delimited-list #\] stream t))) nil *prolog-form-readtable*) (set-macro-character #\] (get-macro-character #\)) nil *prolog-form-readtable*)) (defun execute-xle-command (command) (let ((res-ptr (execute-tcl-command command))) (ffc::%get-null-terminated-string res-ptr))) (defparameter *pargram-path* nil) #+test (update-and-reload-grammar :force-p nil) ;; make obsolete! (defparameter *default-grammar* #+(and logon #+ignore(not 64bit)) "bokmal-mrs" #-logon nil) (defun find-grammar (name &key (default *default-grammar*) owner (common-if-not-found-p t)) #+debug(print (list :grammar name :default default)) (let ((grammar (gethash name *grammars* (and default (or (gethash default *grammars*) (find-grammar-by-filename (concatenate 'string default ".lfg"))))))) (cond ((and grammar (or (null (owner grammar)) (null owner) (equal owner (owner grammar)) (super-user-p owner))) grammar) (common-if-not-found-p (maphash (lambda (name grammar) (when (or (null (owner grammar)) (equal owner (owner grammar))) (return-from find-grammar grammar))) *grammars*) nil) (t nil)))) (defun find-grammar-by-filename (filename &key (error-if-not-found-p t)) #+debug(print filename) (maphash (lambda (name grammar) (declare (ignore name)) (with-slots (grammar-path) grammar #+debug(print (list name grammar grammar-path)) (let ((pos (search filename grammar-path))) (when (and pos (= (+ pos (length filename)) (length grammar-path)) (or (zerop pos) (char= (char grammar-path 0) #\/))) (return-from find-grammar-by-filename grammar))))) *grammars*) (if error-if-not-found-p (error "Grammar ~s could not be found." filename) (warn "Grammar ~s could not be found." filename))) (defun load-grammars (file) #+debug(print file) (let ((default-p t)) ;; first grammar is default grammar (with-file-lines (line file);; #+allegro :external-format #+allegro :iso-8859-1) (unless (or (zerop (length line)) (char= (char line 0) #\#)) (destructuring-bind (name path &optional morphology mrs-p owner) (split line #\tab) #+debug(print (list :file file :name name :path path :morphology morphology :mrs-p mrs-p :owner owner)) (when default-p (setf *default-grammar* name)) #-xle-web ;;(or xle-web 64bit) (when (string= path "norwegian/bokmal/bokmal-mrs.lfg") (setf *default-grammar* name)) (when (equal mrs-p "") (setf mrs-p nil)) (setf (gethash name *grammars*) (make-instance 'grammar :grammar-path (if (char= (char path 0) #\/) path ;; path is absolute (concatenate 'string *pargram-path* path)) :max-parsers 5 :graph-class (if mrs-p 'xle-mrs-graph 'xle-graph) :morphology-type (if (and morphology (not (equal morphology ""))) (intern (string-upcase morphology) :keyword) :fst-morphology) :name name :language (cond ((string= name "Georgian") "geo") ((string= name "Tigrinya") "tir") ((string= name "Turkish") "tur") ((string= name "Slovensko") "slv") ((string= name "Avar") "ava") ((string= name "Arabic") "ara")) :owner (unless (equal owner "") owner) :default-sentence (if (equal name "English") "Grammar writers like to work with LFG grammars." "")) default-p nil) #+debug(describe (gethash name *grammars*))))) #+debug(print (list :*default-grammar* *default-grammar*)) #+disabled ;;#-(or xle-web sbcl) (read-xle-performance-vars-from-file (concatenate 'string *pargram-path* #+(and :allegro :64bit) "/norwegian/bokmal/performance-vars-64.txt" #-(and :allegro :64bit) "/norwegian/bokmal/performance-vars.txt")) ;; fix! (setf *grammar* (gethash *default-grammar* *grammars*)))) (defun grammar-list () (sort (collecting (maphash (lambda (grammar obj) (collect (cons (if (and (owner obj) (eq 0 (search (owner obj) grammar))) (subseq grammar (1+ (length (owner obj)))) grammar) obj))) *grammars*)) #'string-lessp :key #'car)) (defparameter *grammar-def-file* #-:logon (concatenate 'string "projects:xle;" #+xle-web "xle-web-" #+pargram "pargram-" #-sbcl "grammars.cfg" #+sbcl "grammars.cfg") #+:logon (concatenate 'string (getenv "PARGRAM") "/norwegian/bokmal/logon-grammar.cfg")) (defun write-grammar-definition-file (&optional (file *grammar-def-file*)) (with-open-file (stream file :direction :output :if-exists :supersede) (maphash (lambda (name grammar) (with-slots (name grammar-path morphology-type graph-class owner) grammar (format stream "~a~c~a~c~a~c~a~c~a~%" name #\Tab (subseq grammar-path (length *pargram-path*)) #\Tab (if (eq morphology-type :fst-morphology) "" (string-downcase morphology-type)) #\Tab (if (eq graph-class 'xle-mrs-graph) "t" "") #\Tab (or owner "")))) *grammars*))) (defmethod unload-grammar ((grammar grammar)) (with-slots (parsers ot-graph graph-table) grammar (dolist (parser parsers) (unless (zerop (parser-address parser)) (free-chart (parser-address parser)))) (clrhash graph-table) (setf parsers () ot-graph nil) (when (string= (name grammar) "bokmal-mrs") (kill-morph-server)))) #+allegro (ff:def-foreign-call getpid (:void)) #+allegro (defun kill-morph-server () (let ((pid-file (format nil "/tmp/morph-server-pid.~d" (getpid)))) (when (probe-file pid-file) (print :pid-file-exists) (with-open-file (stream pid-file) (let ((morph-server-pid (read stream))) (#+allegro excl::run-shell-command #+sbcl sb-ext:run-program (print (format nil "kill -9 ~d" morph-server-pid))) (print (list :killed morph-server-pid)))) (delete-file pid-file)))) #+allegro (unless (sys::getenv "MORPHPORT") (push '(ignore-errors (ff::unload-foreign-library +xle-module-path+) (print "unloaded.") (kill-morph-server)) sys:*exit-cleanup-forms*)) (defun choice-label (dis i) (with-output-to-string (stream) (labels ((label-char (j) (multiple-value-bind (int rest) (floor (1- j) #.(- (char-code #\Z) (char-code #\A) -1)) (unless (zerop int) (label-char int)) (write-char (code-char (+ #.(char-code #\A) rest)) stream)))) (label-char dis) (format stream "~d" i)))) ;; the inverse (defun label-choice (label) (let ((int-start (position-if (lambda (c) (find c "1234567890")) label))) (cons (loop with choice = 0 for pos from 0 to (1- int-start) for i = (- (char-code (char label pos)) #.(char-code #\A) -1) do (setf choice (* choice #.(- (char-code #\Z) (char-code #\A) -1))) (incf choice i) finally (return choice)) (parse-integer label :start int-start)))) #+test (print (label-choice "AJ1")) #+test (print (choice-label 36 1)) ;; workaround for n-bit integers in sbcl (defun %internal-disjunction-p (disjunctions) (= 1 #+allegro (foreign-slot disjunctions Disjunction internal) #+sbcl (logand (ash (foreign-slot disjunctions Disjunction bits) -20) 1))) #+test (generate-spd-prolog-files (get-treebank "simple") :version "0.95" :directory "/tmp/" :ids '(5143)) (defmethod set-choices ((graph xle-graph) &key labels prolog-file) (when (null prolog-file) (error "No prolog-file location specified.")) (let ((choices (mapcar #'label-choice labels))) #+debug(print (list :labels labels :choices choices)) (with-slots (graph-address) graph (let ((disjunctions (foreign-slot graph-address Graph disjunctions)) (w 0) (dis 0)) (labels ((set-choices (disjunctions) (unless (zerop disjunctions) (unless (%internal-disjunction-p disjunctions) (incf dis) #+debug(print (list :graph-address graph-address :disjunctions disjunctions)) #+debug(print (list :n-choices (foreign-slot disjunctions Disjunction n-choices))) (loop for i from 1 to (foreign-slot disjunctions Disjunction n-choices) for clause = (get-choice graph-address disjunctions i) when (find-if (lambda (pair) (and (= (car pair) dis) (= (cdr pair) i))) choices) do #+debug(print (list :w w :dis dis i (choice-label dis i) clause)) (select-choice clause))) (set-choices (foreign-slot disjunctions Disjunction next))))) (set-choices disjunctions)) (with-open-file (stream prolog-file :direction :output :if-exists :supersede) ;; :external-format :utf-8) (with-cstr (mode "w") (let* ((cstream (fdopen #+allegro(excl::stream-output-handle stream) #+sbcl(sb-sys::fd-stream-fd stream) mode)) (f-solution (chosen-fschart-solution graph-address))) (if (zerop f-solution) (warn "No f-solution found for ~s." labels) (print-prolog-graph cstream (extract-fstructure f-solution (foreign-slot graph-address Graph compstate) 0) "")) (fflush cstream)))))))) ;; packing bug grammars: tar zcvf .tar.gz ;; Grammar distribution: MAke bug grammar, rename to norwegian-distribution-/norwegian/bokmal ;; remove unnecessary files ;; cp linux.x86.64/ (the morph server) ;; ln -s bokmal-mrs-emacs.lfg bokmal.lfg ;; pack as above. :eof