;; -*- Mode: lisp; Syntax: ansi-common-lisp; Package: cgp; Base: 10; Readtable: augmented-readtable -*- ;; ;; Copyright (C) Paul Meurer 2001 - 2005. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen ;; ;; Web-interface to the Common Lisp-reimplementation of the CG parser/tagger system ;; (Oslo-tagger) developed at UiO (Dokumentasjonsprosjektet; Tekstlaboratoriet) ;; ;;------------------------------------------------------------------------------------- ;; TO DO: make fancier interface ;;------------------------------------------------------------------------------------- ;; PROBLEMS: ;; ;; Does not work in IE 5.00.2920.0000 (problems with persistent connections) ;; ;;------------------------------------------------------------------------------------- ;; The Perl script cgp.pl accesses the URL "/cl/cgp/oslo-bergen-tagger.html" ;; which calls tag-text-form() (in-package :cgp) #+test (cl-user::run-shell-command (concat "rsync --archive --rsh=ssh " "paul@decentius.hit.uib.no:/home/paul/lisp/projects/cgp/rules/ " "/home/paul/lisp/projects/cgp/rules/")) (defconstant $max-idle-time (* 60 10)) (defmethod grab-lock ((cg constraint-grammar) (obj t)) nil) (eval-when (:compile-toplevel :load-toplevel :execute) (setf lxml::*encoding* :utf-8 ;;:entities lxml::*newline-after-endtag-p* '(p td tr table title script head |p| |td| |tr| |table| |title| |script| |head|))) (defun cg-name-list () (collecting (maphash (lambda (name cg) (declare (ignore cg)) (collect name)) *cg-table*))) #+allegro-xx (defmethod request-remote-host ((request http-request)) (socket::remote-host (request-socket request))) (defmethod grab-lock ((cg constraint-grammar) (request http-request)) (let ((lock (cg-edit-lock cg)) (now (get-universal-time))) (with-slots (user host last-access) lock (cond ((null user) (setf user (get-basic-authorization request) host (request-remote-host request) last-access now) lock) ((and (equal user (get-basic-authorization request)) (equal host (request-remote-host request))) (setf last-access now) lock) ((< (+ last-access $max-idle-time) now) (setf user (get-basic-authorization request) host (request-remote-host request) last-access now)) (t nil))))) (defmethod release-lock ((cg constraint-grammar) (request http-request)) (let ((lock (cg-edit-lock cg))) (with-slots (user host last-access) lock (when (or (null user) (and (eq user (get-basic-authorization request)) (equal host (request-remote-host request)))) (setf user nil host nil last-access nil) lock)))) (defun write-analyser-page-doc (stream) #m((font :face "Arial, Helvetica, sans-serif") ((h2 :align "center") "Oslo-Bergen-taggeren (for bokmål og nynorsk)") ((p :align "center") ((font :face "Arial, Helvetica, sans-serif") "Et samarbeidsprosjekt mellom UiO og UiB.")) (p ((font :face "Arial, Helvetica, sans-serif") "Taggeren består av en preprosessor, en multitagger og en Constraint Grammar-modul for morfologisk og syntaktisk disambiguering.")) (p ((span :style "font-weight: bold") "Preprosessor. ") "Finner blant annet setningsgrenser. (Utviklet av Taggerprosjektet, ved Dokumentasjonsprosjektet og Tekstlaboratoriet, begge UiO. Nyprogrammert ved Aksis, UiB.)") (p ((span :style "font-weight: bold") "Multitagger. ") "Setter grammatiske tagger på ord. Basert på") (ul (li (b "Norsk ordbank") ", som er satt sammen av:" (ul (li "ordlister og lister over bøyningsmønstre for bokmål og nynorsk laget ved IBM Norge A/S") (li "materiale fra Bokmålsordboka og Nynorskordboka laget ved Seksjon for leksikografi, INL, UiO.") (li "opplysninger om hvilke argumentstruktur verb kan ta. Opplysningene er systematisert og samlet av NorKompLeks ved Universitetet i Trondheim.")) "Materialet er tilpasset og videreutviklet av Taggerprosjektet (ved Dokumentasjonsprosjektet og Tekstlaboratoriet (UiO)).") (li "En "(b "sammensetningsanalysator") " utviklet ved Tekstlaboratoriet, UiO.")) (p "Multitaggeren er nyprogrammert ved Aksis, UiB, men ble opprinnelig laget av Dokumentasjonsprosjektet. ") (p ((span :style "font-weight: bold") "Disambigueringsdel. ") "Fjerner overflødige tagger v.h.a. morfologiske og syntaktiske Constraint Grammar- (føringsbaserte) regler. Utviklet ved Taggerprosjektet, ved Tekstlaboratoriet, UiO. Regeltolker utviklet ved Aksis, UiB.") (p "Kontakt:" (br/) ((a :href "http://www.hf.uio.no/tekstlab/index.html") "Tekstlaboratoriet") ", UiO: " "Janne Bondi Johannessen." (br/) ((a :href "http://www.aksis.uib.no") "Aksis") ", UiB: " ((a :href "http://www.aksis.uib.no/people/12") "Paul Meurer") (br/) ((a :href "http://www.dokpro.uio.no/index.html") "Dokumentasjonsprosjektet") ", UiO." (br/) "Seksjon for leksikografi, UiO.") (hr/) (p "Her kan du teste taggeren:") )) #+copy (defmethod request-uri-string ((request http-request)) (with-output-to-string (stream) (net.uri:render-uri (request-raw-uri request) stream))) (defun ensure-string (object) (typecase object (string object) (symbol (string-downcase (string object))) (t (with-output-to-string (stream) (write object :stream stream))))) ;; should be the same as the function defined in aserve/utilties.lisp, but this ;; one uses entity encoding, whearas HTML-SELECT uses utf-8. Should be fixed. #+obsolete (defun %html-select (stream &key name options default onchange) (if onchange #m((select :name #L name :onchange #L onchange) #L(dolist (option options) (if (consp option) (let ((value (ensure-string (car option)))) (if (string-equal value (string default)) #m((option :value #L value :selected "true") #L(write-string (cdr option) stream)) #m((option :value #L value) #L(write-string (cdr option) stream)))) (let ((value (ensure-string option))) (if (string-equal value (ensure-string default)) #m((option :value #L value :selected "true") #L(write-string value stream)) #m((option :value #L value) #L(write-string value stream))))))) #m((select :name #L name) #L(dolist (option options) (if (consp option) (let ((value (ensure-string (car option)))) (if (string-equal value (string default)) #m((option :value #L value :selected "true") #L(write-string (cdr option) stream)) #m((option :value #L value) #L(write-string (cdr option) stream)))) (let ((value (ensure-string option))) (if (string-equal value (ensure-string default)) #m((option :value #L value :selected "true") #L(write-string value stream)) #m((option :value #L value) #L(write-string value stream))))))))) (defmethod analyser-page-body ((request http-request) stream &key (text "") (language :nbo) (version "nbo") (compare-version "nbo") compare-p eagles-p expand-tokens-p (print-rules-p t) print-scarrie-styles-p (tagging-niveau :morphological-disambiguation) (mode :test) (file "") (encoding :macintosh)) (when (eq mode :edit) #m(link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/analyze-text.css")) (when (and (eq mode :test) (equal text "")) (write-analyser-page-doc stream)) (if *www-interface-disabled-p* #m(p (b "Tagger-siden er midlertidig tatt ut av drift, men er snart oppe igjen.")) #m((form :method "post" :name "form" ;;:enctype "multipart/form-data" :action #L(request-uri-string request)) (p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;") #L(write-string (if (eq mode :test) "Analyser tekst (maks 500 tegn):" "Analyser tekst:") stream))) ((textarea :name "text" :rows "6" :cols "80" :onkeydown "if (window.event.keyCode == 13) { window.event.keyCode = false; window.event.cancelBubble = false; form.submit() } ") #L(write-string text stream)) (br/) ((table :id "tblChoicesTable") #L(when (eq mode :edit) #+ignore-yet #m(tr ((td :align "right" :class "clsChoicesLabel") "Analyser fil ") (td (accept-input 'file "FILE" :default file :size 54))) #+ignore-yet #m(tr ((td :align "right" :class "clsChoicesLabel") "Filformat ") (td #L(html-select stream :name "encoding" :options '("Windows" "Macintosh" "Unix") :default encoding))) #m(tr ((td :align "right" :class "clsChoicesLabel") "CG-versjon ") (td #L(html-select stream :name "version" :options (cg-name-list) :default version))) #m(tr ((td :align "right" :class "clsChoicesLabel") ((input :type "checkbox" :name "compare-p" :checked #L(when compare-p "on")) "Sammenlign med ")) (td #L(html-select stream :name "compare-version" :options (cg-name-list) :default compare-version)))) #L(when (eq mode :test) #m(tr ((td :align "right" :class "clsChoicesLabel") "Språk ") (td #L(html-select stream :name "language" :options '(("nbo" . "Bokmål") ("nny" . "Nynorsk")) :default language)))) (tr ((td :align "right" :class "clsChoicesLabel") "Nivå ") (td #L(html-select stream :name "tagging-niveau" :options (if (eq mode :edit) `(("multi-tagging" . "Multitagging") ("morphological-disambiguation" . "Morfologisk disambiguering") ("syntactic-mapping" . "Syntaktisk mapping") ("syntactic-disambiguation" . "Syntaktisk disambiguering") ("named-entity-mapping" . "Morfologisk disambiguering og navnemapping") ("named-entity-disambiguation" . "Morfologisk og navnedisambiguering") ("syntactic-named-entity-mapping" . "Syntaktisk disambiguering og navnemapping") ("syntactic-named-entity-disambiguation" . "Syntaktisk og navnedisambiguering")) `(("multi-tagging" . "Multitagging") ("morphological-disambiguation" . "Morfologisk disambiguering") ("syntactic-disambiguation" . "Syntaktisk disambiguering") ("syntactic-named-entity-disambiguation" . "Syntaktisk og navnedisambiguering"))) :default tagging-niveau)))) (p (input/ :type "submit" :name "analyse" :value "Analyser") " " ((input :type "checkbox" :name "eagles-p" :checked #L(when eagles-p "t")) " bruk Eagles-tagsett ") ((input :type "checkbox" :name "expand-tokens-p" :checked #L(when expand-tokens-p "t")) " ekspander flerordsnavn ") ((input :type "checkbox" :name "print-rules-p" :checked #L(when print-rules-p "t")) " vis brukte regler ") ((input :type "checkbox" :name "print-scarrie-styles-p" :checked #L(when print-scarrie-styles-p "t")) " vis " ((a :href "http://www.ling.uib.no/~~desmedt/scarrie/lexdb.html") "Scarrie") "-stilnivåer")) (hr/)))) (defun analyser-page-documentation-links (stream &key show-documentation-p) (when show-documentation-p #m(p (b (a :href "/cl/cgp/source/") "Kildekode og dokumentasjon") " (for autoriserte brukere)" )) #m (hr/) #+cl-http (p "Siden er generert av " (ns4.0:note-anchor "CL-HTTP" :reference "http://www.ai.mit.edu/projects/iiip/doc/cl-http/home-page.html"))) #+ignore (defmethod compute-cg-description-form ((request http-request) stream) (destructuring-bind (version) url:search-keys (compute-cg-description-form "/cl/cgp/constraint-grammars.html" stream `((version ,version))))) #+test (let ((stream *standard-output*)) (js/edit-rule-popup-menu stream)) (defmethod cg-description-form ((request http-request) ent) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) (bind-query-values (version submit-form cg-clone-name cg-locked-p backup-cg default-cg comment delete-cg documentation) request nil t t t ;;(print version) (unless version (setf version "nbo")) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title #L(format stream "Description of the Constraint Grammar \"~a\"" version)) (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8") ((script :language "JavaScript") #L(js/edit-cg stream)) #+ignore (ns4.0::write-script (ns4.0:intern-script :edit-cg :java-script) stream)) (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/analyze-text.css") ;; change stylesheet! (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/edit-rules.css") #L(if *www-interface-disabled-p* #m(p (b "Siden er midlertidig tatt ut av drift, men er snart oppe igjen.")) (handler-case (let ((cg (gethash version *cg-table*))) (when (null cg) (error "The CG ~a seems to be deleted." version)) ;; actions (when submit-form (when cg-clone-name (when (gethash cg-clone-name *cg-table*) (error "A Constraint Grammar named ~s exists already. Please choose another name" cg-clone-name)) (make-named-cg-copy cg cg-clone-name)) (when (and (or delete-cg documentation (not (equal (cg-locked-p cg) cg-locked-p))) (not (grab-lock cg request))) (error "The CG \"~a\" is in use by user ~a@~a and can't be updated." version (user-name request) (request-remote-host request))) (setf (cg-locked-p cg) cg-locked-p) (when backup-cg (let ((now (get-universal-time))) (write-cg-to-file cg :path (format nil "projects:cgp;rules;~a~s.lisp" version now) :temp-path (format nil "projects:cgp;rules;~a~s.temp" version now) :version-comment comment))) (when default-cg (let ((name (string-downcase (language cg))) (now (get-universal-time))) (write-cg-to-file cg :default-p t :path (format nil "projects:cgp;rules;~a.lisp" name) :temp-path (format nil "projects:cgp;rules;~a~s.temp" name now) :version-comment "Default-versjon") (load (concat "projects:cgp;rules;" name ".lisp")))) (when delete-cg (if cg-locked-p (error "The CG \"~a\" is locked and can't be deleted." version) (remhash version *cg-table*))) (when documentation (if cg-locked-p (error "The CG \"~a\" is locked and can't be edited." version) (setf (cg-documentation cg) documentation #+obsolete (convert-string documentation :windows $encoding) (change-date cg) (get-universal-time))))) #m((form :method "post" :action "/cl/cgp/constraint-grammars.html") ((table :id "tblChoicesTable") (tr ((td :align "right" :class "clsChoicesLabel") "CG-versjon: ") (td #L(html-select stream :name "version" :options (cg-name-list) :onchange "ChangeCG()" :default version) #+ignore ((select :name "VERSION" :size "1" :onchange "ChangeCG()") #L(maphash (lambda (name cg) (declare (ignore cg)) (if (equal name version) #m((option :selected nil) #L(write-string name stream)) #m(option #L(write-string name stream)))) *cg-table*)))) #+ignore (td ((div :id "oDownload" :style "behavior:url(#default#download)") ((a :href #L(concat "javascript:oDownload.startDownload('/cl/cgp/download-cg?version=" version "', onDownloadDone)")) "Last ned CG som fil")))) (br/) (hr/) ((table :bgcolor "#ffffef" :id "tblChoicesTable") (tr ((td :align "right" :class "clsChoicesLabel") "Opprettet ") (td #L(u::format-universal-time (creation-date cg) stream :timestamp :nbo))) (tr ((td :align "right" :class "clsChoicesLabel") "Siste endring ") (td #L(u::format-universal-time (change-date cg) stream :timestamp :nbo))) #L(when (parent-cg cg) #m(tr ((td :align "right" :class "clsChoicesLabel") "Klonet fra ") (td #L(let ((name (parent-cg cg))) #m((a :href #L(concat "/cl/cgp/constraint-grammar?" name)) #L(write-string name stream)))))) (tr ((td :align "right" :class "clsChoicesLabel") "Skrivebeskyttet ") (td #L(if (cg-locked-p cg) #m(input/ :type "checkbox" :name "CG-LOCKED-P" :checked "true") #m(input/ :type "checkbox" :name "CG-LOCKED-P")))) (tr ((td :align "right") ((a :href #L(concat "/cl/cgp/download-cg?version=" version)) "Last ned CG som fil")) (td "(OBS: høyreklikk på lenken)")) (tr ((td :align "right" :class "clsChoicesLabel") "Dupliser CG som: ") (td (input/ :type "text" :name "cg-clone-name" :size "20"))) (tr ((td :align "right" :class "clsChoicesLabel") "Slett fra minnet  ") (td (input/ :type "checkbox" :name "DELETE-CG"))) (tr ((td :align "right" :class "clsChoicesLabel") "Lag sikkerhetskopi ") (td (input/ :type "checkbox" :name "BACKUP-CG") " Kommentar: " (input/ :type "text" :name "comment" :size "30") #+ignore (accept-input 'string "COMMENT" :size "30"))) (tr ((td :align "right" :class "clsChoicesLabel") "Lagre som default ") (td (input/ :type "checkbox" :name "DEFAULT-CG"))) (tr ((td :align "right" :valign "top" :class "clsChoicesLabel") "Dokumentasjon ") (td ((font :face "arial, helvetica, geneva, sans-serif" :size "3") ((div :id "cgDocumentation" :onclick "EditDocumentation()") #L(write-line (convert-string (cg-documentation cg) $encoding :sgml '(#\newline "
")) stream))))) (tr (td (input/ :type "submit" :name "SUBMIT-FORM" :value "Oppdater"))))) #m (hr/) #m((a :href "/cl/cgp/site-map.html" :target "help-page") "Til hovedsiden")) #-ignore (error (cond) #m(p #S(format nil "Error: ~a" cond))))))))))) (defmethod download-cg ((request http-request) ent) (bind-query-values (version) request nil t t t (with-http-response (request ent) (with-http-body (request ent :headers `((content-disposition . ,(concat "attachment; filename=" version ".lisp")))) (let ((stream (request-reply-stream request))) (let ((cg (gethash version *cg-table*))) (when cg (let ((*print-pretty* nil)) (write-cg-to-stream cg :stream stream ; :encoding :windows ))))))))) (defmethod cg-versions-form ((request http-request) ent) (bind-query-values (submit) request nil t t t (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request)) (query-alist (request-query request))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Liste over lagrete versjoner") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/analyze-text.css") ;; change stylesheet! (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/edit-rules.css") #L(if *www-interface-disabled-p* #m(p (b "Siden er midlertidig tatt ut av drift, men er muligens snart oppe igjen.")) (progn ;;handler-case #m((font :face "Arial, Helvetica, sans-serif" :size "3") (h3 "Gå tilbake til en lagret CG-versjon") #L(when submit (loop for (filename . value) in query-alist when (string-equal value "on") do (load (concat "projects:cgp;rules;" filename ".lisp")))) ((form :method "post" :action #L(request-uri-string request)) (br/) (input/ :type "submit" :name "submit" :value "Importer valgte") (hr/) ((table :border "0" :cellpadding "2" :bgcolor "#ffffef" :id "tblChoicesTable") ((tr :bgcolor "#fafae8") (td "Velg for import") ((td :align "center") "Navn") ((td :align "center") "Lagringsdato") ((td :align "center") "Kommentar")) #L(dolist (cg-version (stored-cg-versions)) #m(tr ;; importp / name / store-date / comment #L(destructuring-bind (file-name cg-name store-date comment) cg-version #m((td :align "center") (input/ :type "checkbox" :name #L file-name)) #m((td :align "right" :class "clsChoicesLabel") #L(write-string cg-name stream)) #m(td #L(u::format-universal-time store-date stream :timestamp :nbo)) #m(td #L(when comment (write-string comment stream))))))) ) (hr/) ((a :href "/cl/cgp/site-map.html" :target "help-page") "Til hovedsiden")) #+ignore(error (cond) #m(p #S(format nil "Error: ~a" cond)))))))))))) (defmethod compute-analyser-form ((request http-request) ent) (%compute-analyser-form request ent :print-rules :test)) (defmethod compute-edit-analyser-form ((request http-request) ent) (%compute-analyser-form request ent :print-rules :edit)) #+test (defmethod tag-text-form ((request http-request) ent) (bind-query-values (text tagging-niveau language version) request nil t t t (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) (unwind-protect (let* ((tagging-niveau (intern (string-upcase (or (print tagging-niveau) "morphological-disambiguation")) :keyword)) (language (intern (string-upcase (or language "NBO")) :keyword)) (cg (if version (gethash version *cg-table*) (ecase language (:nbo *nbo-cg*) (:nny *nny-cg*))))) (when (consp text) (labels ((concat-with-nl (strings) (if (cdr strings) (concat (car strings) #.(format nil "~%") (concat-with-nl (cdr strings))) (car strings)))) (setf text (concat-with-nl text)))) (when text (handler-case (disambiguate-from-string text #+test(convert-string text :win :mac) ; *** ?? :stream stream :cg cg :print-function (lambda (sentence &key stream platform) (print-sentence sentence :stream stream :platform platform :print-rules nil)) :tagging-niveau (or tagging-niveau :morphological-disambiguation)) (error (cond) (format stream "~a" (format nil "Error: ~a" cond)))))))))))) (defclass multipart-buffer () ((request :initform nil :initarg :request) (buffer :initform (make-array 4096 :element-type 'character)) (index :initform 0 :accessor buffer-index) (size :initform 0 :accessor buffer-size))) (defclass prefilled-multipart-buffer (multipart-buffer) ((lines :initform :empty :accessor multipart-lines))) ;; debug (defparameter *request* nil) (defmethod read-next-line ((stream multipart-buffer)) (let ((line :eof)) (with-slots (request buffer index size) stream (setf *request* request) (labels ((read-chunk () (cond ((null size) :eof) ((= size index) (setf size (get-multipart-sequence request buffer) index 0) (if size (read-chunk) line)) (t (when (eq line :eof) (setf line "")) (let* ((nl-pos (position-if (lambda (c) (member c '(#\Newline #\Return))) buffer :start index :end size) #+ignore(position #\Return buffer :start index :end size)) (pos (or nl-pos size))) (setf line (concat line (subseq buffer index (if (and (> pos index) (char= (char buffer (1- pos)) #\Return)) (1- pos) pos))) index (if nl-pos (1+ nl-pos) size)) (if nl-pos line (read-chunk))))))) (read-chunk))))) (defmethod read-next-line ((stream prefilled-multipart-buffer)) (when (eq (multipart-lines stream) :empty) (setf (multipart-lines stream) (loop for line = (call-next-method) ;; do (print line) collect line ;; (if (eq line :eof) :eof "disabled") ;; line until (eq line :eof))) (format t "~d lines read.~%" (length (multipart-lines stream)))) (pop (multipart-lines stream))) #+test (net.aserve::debug-on :info) #+test (net.aserve::debug-on :notrap) #+test (net.aserve::debug-off :notrap) #+test (with-open-file (stream "~/test.xml" :direction :output :if-exists :supersede) #m(text (s "Dette er en setning.") (s "Dette er en annen."))) (defmethod tag-text-form ((request http-request) ent) (bind-query-values (lang mode in-format out-format version show-rules print-lc-features in-file) request nil t t t #+debug(print (list lang mode in-format out-format show-rules in-file)) (let* ((mode (intern (string-upcase mode) :keyword)) (print-lc-features (or print-lc-features (find mode '(:sndl :neol)))) (tagging-niveau (ecase mode (:mt :multi-tagging) (:md :morphological-disambiguation) (:sm :syntactic-mapping) (:sd :syntactic-disambiguation) (:sdr :syntactic-disambiguation-regexp) ((:neo :neol) :named-entity-recognition-only) (:mnd :named-entity-disambiguation) (:snm :syntactic-named-entity-mapping) (:sdnm :syntactic-disambiguation-named-entity-mapping) ((:snd :sndl) :syntactic-named-entity-disambiguation))) (in-format (intern (string-upcase in-format) :keyword)) (out-format (intern (string-upcase out-format) :keyword)) (tokenizer-class (ecase in-format (:text (if (find mode '(:neo :neol)) 'pretagged-tokenizer 'tokenizer)) (:xml 'xml-tokenizer) ;; does not work yet (:cwb 'pretokenized-tokenizer))) ;; change to cwb-tokenizer! ) (cond ((eq out-format :oslo) (let* ((language (intern (string-upcase (or lang "NBO")) :keyword)) (cg (if version (gethash version *cg-table*) (newest-cg language))) (*cg* cg)) (when (null cg) (error "Grammar ~a not loaded." (or version language))) (progn ;;handler-case (labels ((disambiguate (in-stream stream) (disambiguate-stream tokenizer-class in-stream :cg cg :context-size (when (find tagging-niveau '(:named-entity-disambiguation :named-entity-recognition-only :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation :syntactic-disambiguation-regexp)) *context-size*) :print-function (lambda (sentence &key platform &allow-other-keys) (let ((*package* (find-package :cgp))) #+debug (print-sentence sentence :stream *standard-output* :expand-tokens-p (eq out-format :cwp) :platform (or platform $encoding) :print-rules show-rules) (print-sentence sentence :stream stream :expand-tokens-p (or (eq out-format :cwb) (find mode '(:neol :sndl))) :print-lc-features print-lc-features :platform (or platform $encoding) :print-rules show-rules))) :tagging-niveau (or tagging-niveau :morphological-disambiguation)))) (with-open-file (in-stream in-file) (with-open-file (stream "/home/paul/cgp-out.txt" :direction :output :if-exists :supersede) (disambiguate in-stream stream)))) #+debug (error (cond) (with-open-file (stream "/home/paul/cgp-out.txt" :direction :output :if-exists :supersede) (format stream "Error: ~a" cond)) (error "~a" cond) )))) (t (with-http-response (request ent :timeout (* 60 60 2)) (with-http-body (request ent :headers `((content-disposition . ,(concat "attachment; filename=" "dis" ".txt")))) (get-multipart-header request) (let ((stream (request-reply-stream request)) (in-stream (make-instance 'prefilled-multipart-buffer :request request))) (unwind-protect (let* ((language (intern (string-upcase (or lang "NBO")) :keyword)) (cg (if version (gethash version *cg-table*) (newest-cg language)))) (handler-case (labels ((disambiguate (stream) (disambiguate-stream tokenizer-class in-stream :cg cg :context-size (when (find tagging-niveau '(:named-entity-disambiguation :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation)) *context-size*) :print-function (lambda (sentence &key platform &allow-other-keys) ;;(print sentence) (let ((*package* (find-package :cgp))) (case out-format ((:text :cwb :oslo) (print-sentence sentence :stream stream :expand-tokens-p (eq out-format :cwp) :print-lc-features print-lc-features :platform (or platform $encoding) :print-rules show-rules)) (:html (print-sentence-html sentence :stream stream :print-lc-features print-lc-features :print-rules show-rules)) (:xml (print-sentence-xml sentence :print-lc-features print-lc-features :stream stream))))) :tagging-niveau (or tagging-niveau :morphological-disambiguation)))) (cond ((and (eq in-format :text) (eq out-format :xml)) #m(?xml :version "1.0" :encoding "utf-8" :standalone "yes") #m((text :date #L(now :format :date) :tagging #L (string tagging-niveau)) #L(terpri stream) #L(disambiguate stream))) ((eq out-format :oslo) (with-open-file (stream "/home/paul/cgp-out.txt" :direction :output :if-exists :supersede) (disambiguate stream))) (t (disambiguate stream)))) (error (cond) (if (eq out-format :oslo) (with-open-file (stream "/home/paul/cgp-out.txt" :direction :output :if-exists :supersede) (format stream "Error: ~a" cond)) (format stream "Error: ~a" cond)))))))) (loop while (get-multipart-header request)))))))) (defmethod %compute-analyser-form ((request http-request) ent &key print-rules) (setf *request* request) ;; *** change key to mode (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) (bind-query-values ;; *** put this into with-html-response! (text file remote-file encoding tagging-niveau language version compare-version compare-p eagles-p expand-tokens-p print-rules-p print-scarrie-styles-p) request nil t t t #-debug (print (list text file remote-file encoding tagging-niveau language version compare-version compare-p eagles-p)) #+debug(print (map 'list #'identity (utf-8-decode text))) (unwind-protect (let ((tagging-niveau (intern (string-upcase tagging-niveau) :keyword)) (language (intern (string-upcase (or language "NBO")) :keyword))) (when (consp text) ;; When encoding type is MULTIPART, text with newlines is returned as a list ;; of lines (see "file-uploading.lisp"). ;; For the parser, the lines have to be joined again. (labels ((concat-with-nl (strings) (if (cdr strings) (concat (car strings) #.(format nil "~%") (concat-with-nl (cdr strings))) (car strings)))) (setf text (concat-with-nl text)))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Oslo-Bergen-taggeren") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) ((body :onload "document.form.text.focus()") #L(analyser-page-body request stream :text (or text "") :tagging-niveau tagging-niveau :language language :mode print-rules :version version :compare-version compare-version :compare-p compare-p :file remote-file :encoding encoding :eagles-p eagles-p :expand-tokens-p expand-tokens-p :print-rules-p print-rules-p :print-scarrie-styles-p print-scarrie-styles-p) #L(when (or text remote-file) (progn ;;handler-case (cond ((and remote-file (not (equal remote-file ""))) #m(pre (teletype #L(let ((cg (if version (gethash version *cg-table*) (or (ecase language ;; newest ones! (:nbo (gethash "nbo" *cg-table*)) (:nny (gethash "nny" *cg-table*))) (newest-cg language))))) (with-open-file (file-stream file :direction :input) (disambiguate-stream *tokenizer* file-stream ;; stream :encoding (when encoding (intern (string-upcase encoding) :keyword)) :cg cg :context-size (when (find tagging-niveau '(:named-entity-disambiguation :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation)) *context-size*) :compare-cg (when compare-p (gethash compare-version *cg-table*)) :print-function (lambda (sentence &key platform &allow-other-keys) (when print-scarrie-styles-p (add-scarrie-styles sentence)) (print-sentence-html sentence :stream stream :platform platform :print-rules print-rules :expand-tokens-p expand-tokens-p :eagles-p eagles-p :print-rules-p print-rules-p)) :tagging-niveau tagging-niveau)))))) ((null text) nil) ((string= text "") nil) ((and (> (length text) 500) (eq print-rules :test)) nil) (t #m(pre (teletype #L(let ((cg (if version (gethash version *cg-table*) (or (ecase language ;; newest ones! (:nbo (gethash "nbo" *cg-table*)) (:nny (gethash "nny" *cg-table*))) (newest-cg language))))) (disambiguate-from-string (if remote-file text (utf-8-decode text)) :stream stream :cg cg :context-size (when (find tagging-niveau '(:named-entity-disambiguation :syntactic-named-entity-mapping :syntactic-disambiguation-named-entity-mapping :syntactic-named-entity-disambiguation)) *context-size*) :compare-cg (when compare-p (gethash compare-version *cg-table*)) :print-function (lambda (sentence &key stream platform &allow-other-keys) (when print-scarrie-styles-p (add-scarrie-styles sentence)) (print-sentence-html sentence :stream stream :platform platform :print-rules print-rules :expand-tokens-p expand-tokens-p :eagles-p eagles-p :print-rules-p print-rules-p)) :tagging-niveau tagging-niveau)))))) #+ignore (error (cond) #m(p #S(format nil "Error: ~a" cond)))) (when (eq print-rules :edit) (analyser-page-documentation-links stream))) (hr/) "Tagger-programvaren og vev-tjeneren er skrevet i " ((a :href "http://www.lisp.org/alu/home") "Common Lisp") "."))) (when file (delete-file file)))))))) (defun respond-to-regexp-filter-test-form (url stream query-alist) (%respond-to-regexp-filter-test-form url stream query-alist :print-rules :edit)) (defmethod show-match ((token token) stream) (declare (ignore stream))) (defmethod show-match ((token regexp-token) stream) #m((font :color "green") #L(case (match token) (:match-start (write-string "> " stream)) (:match-end (write-string "< " stream)) (:match (write-string "| " stream)) (:whole-match (write-string "* " stream)) (otherwise nil)))) (defmethod print-rules ((token token) (mode t) str stream &key &allow-other-keys) (format stream "\"<~a>\"" (convert-string str :mac :sgml))) (defmethod print-rules ((token token) (mode (eql :print)) str stream &key &allow-other-keys) (format stream "\"<~a>\"~{ ~a~}" (utf-8-encode str) ;;(convert-string str :mac :sgml) (reverse (token-used-rules token)))) (defmethod print-rules ((token token) (mode (eql :edit)) str stream &key print-rules-p &allow-other-keys) (let ((sentence (token-chain token))) (with-slots (cg) sentence (write-string "\"<" stream) #m(b ((font :color "blue") #s str)) (write-string ">\"" stream) (when (and print-rules-p (token-used-rules token)) (destructuring-bind (mt-rule &rest rules) (reverse (token-used-rules token)) (format stream " ~a" mt-rule) (dolist (rule rules) (unless (symbolp rule) (write-char #\Space stream) #m((a :href #L(format nil "/cl/cgp/show-rules.html?~a" (query-to-form-urlencoded (list (cons "show-rules" "yes") (cons "lang" (name cg)) (cons "rule-id" (rule-id rule))))) :target "rule-pane") #L(write-string (rule-string rule) stream))))))))) (defmethod print-rules ((token compare-token) (mode (eql :edit)) str stream &key &allow-other-keys) (let ((sentence (token-chain token))) (with-slots (cg compare-cg) sentence (format stream "\"<") #m(b ((font :color "blue") #L(write-string (u:convert-string str :mac :sgml) stream))) (destructuring-bind (mt-rule &rest rules) (reverse (token-used-rules token)) (let ((compare-rules (cdr (reverse (compare-used-rules token))))) (format stream ">\" ~a" mt-rule) (labels ((walk (rules c-rules) (let ((rule (car rules)) (c-rule (car c-rules))) (cond ((and (null rule) (null c-rule)) nil) ((eq rule c-rule) (write-char #\Space stream) #m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d" (name cg) (rule-id rule)) :target "rule-pane") #L(rule-string rule)) (walk (cdr rules) (cdr c-rules))) ((< (length c-rules) (length rules)) (write-char #\Space stream) #m((font :color "red") "[1]") #m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d" (name cg) (rule-id rule)) :target "rule-pane") #L(rule-string rule)) (walk (cdr rules) c-rules)) (t (write-char #\Space stream) #m((font :color "green") "[2]") #m((a :href #L(format nil "/cl/cgp/show-rule?version=~a&rule-id=~d" (name compare-cg) (rule-id c-rule)) :target "rule-pane") #L(rule-string rule)) (walk rules (cdr c-rules))))))) (walk rules compare-rules))))))) (defun stringify (str-or-symbol) (typecase str-or-symbol (string str-or-symbol) (integer ;; temporary fix until lexin-search works (format nil "~a" str-or-symbol)) (t (string-downcase (string str-or-symbol))))) (defmethod print-features ((token token) stream &key lc-features-p &allow-other-keys) (with-slots (features lc-features chain) token (let ((features (remove-feature-inclusions features))) (dolist (fl features) (when (car fl) (format stream "~% \"") #m((font :color "red") #S(car fl)) (write-char #\" stream) (mapcar (lambda (f) (write-string " " stream) (if (consp f) (mapcar (lambda (f) #m #s(stringify f)) f) #m #s (stringify f))) (code-features (cdr fl) (feature-vector chain))))) (when lc-features-p (dolist (fl (remove-feature-inclusions lc-features)) (when (car fl) (format stream "~% \"") #m((font :color "orange") #S(car fl)) (write-char #\" stream) (mapcar (lambda (f) (write-string " " stream) (if (consp f) (mapcar (lambda (f) #m #s(stringify f)) f) #m #s (stringify f))) (code-features (cdr fl) (feature-vector chain))))))))) ;; rewrite using feature-filter!! (defmethod print-eagles-features ((token token) stream) (with-slots (features) token (dolist (fl features) (when (car fl) (let* ((cg-features (code-features (cdr fl))) (eagles-features (cg-to-eagles-features (car cg-features) (cdr cg-features)))) (format stream "~% \"") #m((font :color "red") #S(car fl) #+ignore #L(write-string (chars-to-entities (car fl)) stream)) (let ((firstp t)) (labels ((convert (f) (if firstp (progn (setf firstp nil) (string-upcase (utf-8-encode (stringify f)))) (utf-8-encode (stringify f))))) (format stream "\" ~{~a~^ ~}" (mapcar (lambda (f) (if (consp f) (mapcar #'convert f) (convert f))) eagles-features))))))))) (defmethod print-features ((token compare-token) stream &key &allow-other-keys) (with-slots (features compare-features chain) token (labels ((walk (features c-features) (let ((fl (car features)) (c-fl (car c-features))) (cond ((and (null fl) (null c-fl)) nil) ((and fl (null (car fl))) (walk (cdr features) c-features)) ((and c-fl (null (car c-fl))) (walk features (cdr c-features))) ((equal fl c-fl) (format stream "~% \"") #m((font :color "green") #S (car fl) #+ignore #L(write-string (chars-to-entities (car fl)) stream)) (format stream "\" ~{~a~^ ~}" (mapcar (lambda (f) (if (consp f) (mapcar #'utf-8-encode #+ignore #'chars-to-entities f) (utf-8-encode #+ignore chars-to-entities f))) (code-features (cdr fl) (feature-vector chain)))) (walk (cdr features) (cdr c-features))) ((< (length c-features) (length features)) (format stream "~% ") #m((font :color "red") #L(write-string "[1]" stream)) (write-char #\" stream) #m((font :color "green") #S(car fl) #+ignore #L(write-string (chars-to-entities (car fl)) stream)) (format stream "\" ~{~a~^ ~}" (mapcar (lambda (f) (if (consp f) (mapcar #'utf-8-encode #+ignore #'chars-to-entities f) (utf-8-encode #+ignore chars-to-entities f))) (code-features (cdr fl) (feature-vector chain)))) (walk (cdr features) c-features)) (t (format stream "~% ") #m((font :color "red") #L(write-string "[2]" stream)) (write-char #\" stream) #m((font :color "green") #S(car c-fl) #+ignore #L(write-string (chars-to-entities (car c-fl)) stream)) (format stream "\" ~{~a~^ ~}" (mapcar (lambda (f) (if (consp f) (mapcar #'utf-8-encode #+ignore #'chars-to-entities f) (utf-8-encode #+ignore chars-to-entities f))) (code-features (cdr c-fl) (feature-vector chain)))) (walk features (cdr c-features))))))) (walk features compare-features)))) (defmethod token-expansion-end ((token token)) (with-slots (expansion) token (if expansion (token-expansion-end (cdr expansion)) token))) (defmethod print-sentence-html ((sentence sentence) &key stream (print-features t) print-rules eagles-p expand-tokens-p print-lc-features print-rules-p &allow-other-keys) "outputs a CG style tagged sentence as html" (setf *sentence* sentence) #+debug(print (list :printing sentence)) (labels ((walk (token concat-token) #+debug (unless (null token) (print (list token :expansion (token-expansion token)))) (cond ((null token) nil) ((and expand-tokens-p (token-expansion token) ;; avoid nesting (not concat-token)) (labels ((expand (token first last) (if (token-expansion first) (expand token (car (token-expansion first)) (cdr (token-expansion first))) (walk first token)) (unless (eq first last) (expand token (token-next first) last)))) (expand token (car (token-expansion token)) (cdr (token-expansion token)))) (walk (token-next token) nil)) (t (let ((str (token-value token)) (features (token-features token))) (unless (symbolp str) (if print-features (let ((*package* (find-package :cgp))) (terpri stream) (show-match token stream) (print-rules token print-rules str stream :print-rules-p print-rules-p)) (write-string str stream)) (when (and features print-features) (if eagles-p (print-eagles-features token stream) (print-features token stream :lc-features-p (or expand-tokens-p print-lc-features)))) (write-char #\Space stream)) (unless (or (eq token (last-token sentence)) concat-token) (walk (token-next token) nil))))))) (walk (first-token sentence) nil))) ;; *sentence* ;;; -------------------------------------- rule editing ----------------------------------------- (defmethod show-rule-form ((request http-request) ent) (bind-query-values (lang version rule-id domain all-domains type heuristic-niveau features show-rules) request nil t t t (let ((version (or version "NBO")) (cg (name-to-cg version)) (id (when rule-id (parse-integer rule-id :junk-allowed t))) (type (intern (string-upcase type) :keyword)) (heuristic-niveau (cond ((equal heuristic-niveau "0") 0) ((equal heuristic-niveau "1") 1) ((equal heuristic-niveau "2") 2) ((equal heuristic-niveau "3") 3) (t :alle))) (features (string-parse features :whitespace '(#\Space))) (error nil)) #+debug(print (list version show-rules )) ;; TO DO: put handler-case around here etc.! (handler-case (when (and cg (null show-rules)) ; save-changes (%update-rules cg (request-query request))) (error (cond) (setf error cond))) (show-rules request ent :version (or lang version) :id id :rule-id rule-id :heuristic-niveau heuristic-niveau :type type :domain domain :all-domains all-domains :features features :keep-groups-together-p (not version) :error error)))) (defmethod show-rules-page-body ((request http-request) stream &key version domain all-domains type heuristic-niveau features rule-id) (labels ((concat-with-space (strings) (cond ((cdr strings) (concat (car strings) " " (concat-with-space (cdr strings)))) ((null strings) "") (t (car strings))))) #m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;") "Velg reglene som skal vises:")) #m((table :id "tblQueryTable") (tr ((td :align "right" :class "clsQueryLabel") "Versjon ") (td #L(html-select stream :name "version" :options (collecting (maphash (lambda (name cg) (declare (ignore cg)) (collect name)) *cg-table*)) :default version) " " ((a :href #L(concat "/cl/cgp/constraint-grammars.html?version=" version)) "vis")) (td (input/ :type "submit" :name "show-rules" :value "Vis reglene"))) (tr ((td :align "right" :class "clsQueryLabel") "Regel-ID ") ((td :valign "top") (input/ :type "text" :name "rule-id" :size "10" :value #L(or rule-id "")) ((b :class "clsQueryLabel") " eller:"))) (tr ((td :align "right" :class "clsQueryLabel") " Regeltype ") (td #L(html-select stream :name "type" :options '(:select :strong-select :discard :syntactic-map :syntactic-select :syntactic-discard :named-entity-map :named-entity-select :named-entity-discard) :default type))) (tr ((td :align "right" :class "clsQueryLabel") " Heur. nivå ") (td #L(html-select stream :name "heuristic-niveau" :options '(:alle 0 1 2 3) :default heuristic-niveau) #+ignore (accept-input 'select-choices "HEURISTIC-NIVEAU" :choices '(:alle 0 1 2 3) :default heuristic-niveau :size :pull-down-menu))) (tr ((td :align "right" :class "clsQueryLabel") "Domene ") (td (input/ :type "text" :name "domain" :size "30" :value #L(or domain "")) " alle" #L(if all-domains #m(input/ :type "checkbox" :name "ALL-DOMAINS" :checked "true") #m(input/ :type "checkbox" :name "ALL-DOMAINS")))) (tr ((td :align "right" :class "clsQueryLabel") "Trekk ") (td (input/ :type "text" :name "features" :size "30" :value #L(concat-with-space features))))))) ;;;; ------------------------------------- Set declarations editing ------------------------------------------ (defmethod show-set-declarations-form ((request http-request) ent) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) (bind-query-values (version dec-count declaration-name-substring) request nil t t t (show-set-declarations request stream :version version :substring (utf-8-decode (or declaration-name-substring "")) :count (parse-integer dec-count :junk-allowed t))))))) (defmethod show-set-declarations-page-body ((request http-request) stream &key version (substring "")) #m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;") "Velg deklarasjonene som skal vises:")) #m((table :id "tblQueryTable") (tr ((td :align "right" :class "clsQueryLabel") "Versjon ") (td #L(html-select stream :name "version" :options (collecting (maphash (lambda (name cg) (declare (ignore cg)) (collect name)) *cg-table*)) :default version) " " ((a :href #L(concat "/cl/cgp/constraint-grammar?version=" version)) "vis")) (td (input/ :type "submit" :name "show-set-declarations" :value "Vis deklarasjonene"))) (tr ((td :align "right" :class "clsQueryLabel") "Delstreng ") (td (input/ :type "text" :name "declaration-name-substring" :size "30" :value #L substring))))) (defmethod show-set-declarations ((request http-request) stream &key version substring count) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Redigeringsside for mengdedeklarasjonene") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body #L(progn;handler-case (progn #m((form :method "post" :action #L(request-uri-string request)) (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/edit-declarations.css") #L(show-set-declarations-page-body request stream :version version :substring substring) #L(when version (let* ((*cg* (name-to-cg version)) (*package* (find-package :cgp)) (*print-case* :downcase) (%substring (string-upcase substring)) (declarations (sort (collecting (maphash (lambda (name set) (declare (ignore set)) (when (search %substring (symbol-name name)) (collect name))) (set-declarations *cg*))) #'string<))) #-ignore (when t ; update-alist ; put this here to get error message into browser (%update-set-declarations (name-to-cg version) count (request-query request))) #m((script :language "JavaScript") #L(js/edit-declaration (length declarations) stream)) #m(input/ :type "hidden" :name "dec-count" :value #L(ensure-string (length declarations))) #m(br/) #m((table :width "100%" :id "tblEditMenu") (tr (td "|") (td ((div :id "newDeclarations" :onclick "InsertNewDeclaration()") "Lag ny deklarasjon")) (td "|") #L(unless (cg-locked-p *cg*) #m(td ((div :id "saveChanges" :onclick "SaveChanges()") "Lagre endringer")) #m(td "|")) ((td :id "testPage") ((a :href #L(concat "/cl/cgp/tagger.html?version=" version "&tagging-niveau=morphological-disambiguation") :target "test-rules") "Test regelsettet") #+ignore ((a :href #L(concat "/cl/cgp/test-rules?~a" version) :target "test-rules") "Test regelsettet")) (td "|") (td ((div :id "mainPage") ((a :href "/cl/cgp/site-map.html" :target "site-map-page") "Hovedside"))) (td "|") (td ((div :id "helpPage") ((a :href "/cl/cgp/declaration-edit-help.html" :target "help-page") "Hjelp"))) (td "|"))) #m(br/) #m((table :width "100%" :id "tblDeclarationsTable") (tr ((td :valign "top" :class "clsDeclarationNameLabel") " Deklarasjon") ((td :valign "top" :class "clsDeclarationSetLabel") " Definisjon")) #L(let ((id 0)) ;; IDs are necessary for Javascript editing (dolist (declaration declarations) (incf id) #m(tr ((td :valign "top" :class "clsDeclarationName") ((div :id #L(format nil "DeclarationName~d" id) :onclick #L(format nil "EditDeclaration('~d', 'Name')" id)) #L(write-string (convert-string (string-downcase declaration) $encoding :sgml '(#\< "<" #\> ">")) stream))) ((td :valign "top" :class "clsDeclarationSet") ((div :id #L(format nil "DeclarationSet~d" id) :onclick #L(format nil "EditDeclaration('~d', 'Set')" id)) #L(write-string (convert-string (with-output-to-string (str) (write (gethash declaration (set-declarations *cg*)) :stream str)) $encoding :sgml '(#\< "<" #\> ">")) stream))))))))))) #+ignore (error (cond) #m(p #L(format stream "Error: ~a" cond))))))) ;;;; -------------------------------- Test corpus ------------------------------------ (defmethod test-corpus-form ((request http-request) ent) (with-http-response (request ent :timeout 6000) (with-http-body (request ent) (let ((stream (request-reply-stream request))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Tagging av testkorpuset") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body #L(test-corpus-page-body request stream ent))))))) (defmethod test-corpus-page-body ((request http-request) stream entity) (declare (ignore entity)) (bind-query-values (version (tagging-niveau "morphological-disambiguation") test-corpus tag-corpus) request nil t t t #m(p ((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;") "Velg CG-versjon og prøvekorpus:")) #+ignore (when tag-corpus (show-tagging-result request stream :entity entity :version version :corpus test-corpus)) #m((form :method "post" :action #L(request-uri-string request)) ((table :id "tblQueryTable") (tr ((td :align "right" :class "clsQueryLabel") "Versjon ") (td #L(html-select stream :name "version" :options (cg-name-list) :default version) " " ((a :href #L(concat "/cl/cgp/constraint-grammar?version=" version)) "vis")) (tr ((td :align "right" :class "clsQueryLabel") "Nivå ") (td #L(html-select stream :name "tagging-niveau" :options (list "morphological-disambiguation" "syntactic-disambiguation") :default tagging-niveau)))) (tr ((td :align "right" :class "clsQueryLabel") "Prøvekorpus ") (td #L(html-select stream :name "test-corpus" :options (mapcar #'pathname-name (directory (concat "/home/" (user-name request) "/test-corpus/*.cor"))) ;;*test-corpora* :default test-corpus))) (tr (td (input/ :type "submit" :name "tag-corpus" :value "Tagg korpuset")))) #L(when tag-corpus (let ((niveau (intern (string-upcase tagging-niveau) :keyword))) #m(br) #m(hr) #m((font :face "Verdana, Arial, Helvetica, Geneva, sans-serif;") (p #L(write-string (concat "Resultatet skrives til \"~/test-corpus/" test-corpus (ecase niveau (:morphological-disambiguation ".dis\".") (:syntactic-disambiguation ".syn-dis\"."))) stream)))))) (process-run-function "show-tagging-result" #'show-tagging-result request stream :version version :tagging-niveau (intern (string-upcase tagging-niveau) :keyword) :corpus test-corpus))) ;; write results to file in ~/terst-corpus until we find out how to keep the page alive ;; and send progress messages (defmethod show-tagging-result ((request http-request) stream &key entity version corpus (tagging-niveau :morphological-disambiguation)) (when corpus (labels ((message (text) ;; does not work. #m(p #L(write-string text stream)))) (let ((file (concat "/home/" (user-name request) "/test-corpus/" corpus ".cor"))) (when (probe-file file) (multiple-value-bind (precision recall sentence-count token-count error-count) (with-open-file (in-stream file) (with-open-file (out-stream (concat "/home/" (user-name request) "/test-corpus/" corpus (ecase tagging-niveau (:morphological-disambiguation ".dis") (:syntactic-disambiguation ".syn-dis"))) :direction :output :if-exists :supersede :if-does-not-exist :create) (run-test-corpus (gethash version *cg-table*) :in-stream in-stream :out-stream out-stream ;;:message-fn #'message :tagging-niveau tagging-niveau :print-only-sentences-with-errors-p nil))) (with-open-file (out-stream (concat "/home/" (user-name request) "/test-corpus/" corpus (ecase tagging-niveau (:morphological-disambiguation ".res") (:syntactic-disambiguation ".syn-res"))) :direction :output :if-exists :supersede :if-does-not-exist :create) (format out-stream "Antall setninger: ~d~%Antall ord: ~d~%Antall feil: ~d~%Presisjon: ~,2f~%Recall: ~,2f~%" sentence-count token-count error-count (* precision 100.0) (* recall 100.0)))) #+mcl #m(tr (td "CPU-tid:") (td #L(format stream "~:d ms" (- (get-internal-run-time) start-run-time))))))))) ;;(error (cond) (p (format stream "Error: ~a" cond))))))))) ;;;; ------------------------------------------ Rules editing ------------------------------------------------ ;; rename! (defun name-to-cg (name) (gethash name *cg-table*) #+ignore (ecase language (:nbo *nbo-cg*) (:nny *nny-cg*))) (defmethod show-rules ((request http-request) ent &key version id rule-id domain all-domains type heuristic-niveau features error (keep-groups-together-p t)) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Redigeringsside for CG-reglene") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body #L(handler-case (let* ((*cg* (name-to-cg version)) (*package* (find-package :cgp)) (rules (when *cg* (get-rules *cg* :id id :domain (if all-domains :alle (utf-8-decode domain)) :type type :heuristic-niveau heuristic-niveau :features (mapcar (lambda (f) (intern (string-upcase f))) features) :keep-groups-together-p keep-groups-together-p)))) (when *cg* #m((script :language "JavaScript") #L(js/edit-rule (string-downcase (symbol-name (language *cg*))) stream))) #m((form :method "post" :action "/cl/cgp/show-rules.html" #+ignore(request-uri-string request)) (link/ :rel "stylesheet" :type "text/css" :href "/cl/cgp/styles/edit-rules.css") #L(when error #m((font :color "red") (h3 "Det oppstod en feil ved lagring:")) #m(p #S(format stream "~a" error))) #L(show-rules-page-body request stream :version version :rule-id rule-id :domain (utf-8-decode domain) :all-domains all-domains :type type :heuristic-niveau heuristic-niveau :features features) #L(when rules #m(br/) #m((table :width "100%" :id "tblEditMenu") (tr (td "|") (td ((div :id "newRule" :onclick #L(format nil "InsertNewRule('~a')" (language *cg*))) "Lag ny regel")) (td "|") #L(unless (cg-locked-p *cg*) #m(td ((div :id "saveChanges" :onclick "SaveChanges()") "Lagre endringer")) #m(td "|")) ((td :id "testPage") ((a :href #L(concat "/cl/cgp/tagger.html?version=" version "&tagging-niveau=morphological-disambiguation") :target "test-rules") "Test regelsettet") (td "|") (td ((div :id "mainPage") ((a :href "/cl/cgp/site-map.html" :reference "/cl/cgp/site-map.html" :target "help-page") "Hovedside"))) (td "|") (td ((div :id "helpPage") ((a :href "/cl/cgp/rule-edit-help.html" :target "help-page") "Hjelp"))) (td "|"))) (br/) ((table :width "100%" :id "tblRulesTable") #L(let ((group-comment nil)) (dolist (rule rules) (when rule (let ((new-group-comment (aref (rule-group-comments *cg*) (rule-id rule)) #+ignore(rule-group-comment rule))) (cond ((null new-group-comment) (when group-comment (setf group-comment nil) #m(tr ((td :valign "top" :align "right" :class "clsRuleGroupCommentLabel") " ") ((td :valign "top" :class "clsRuleGroupComment") " ")))) ((eq new-group-comment group-comment) nil) (t (setf group-comment new-group-comment) #m(tr ((td :valign "top" :align "right" :class "clsRuleGroupCommentLabel") "Regelgruppe " (br/) " ") ((td :valign "top" :class "clsRuleGroupComment") ((font :color "blue" :face "verdana, arial, helvetica, geneva, sans-serif" :size "3") ((pre :id #L(format nil "ruleGroupComment~a~d" (language rule) id) :onclick #l(format nil "EditRule('ruleGroupComment~a~d', 'group-comment')" (language rule) id)) #L(write-string (convert-string (car group-comment) :mac :sgml '(#\Newline "
")) stream))))))))) (when rule (write-rule-edit-html rule stream))))) ((script :language "JavaScript") #L(js/edit-rule-popup-menu stream)) ((div :id "divMenu1" :class "clsMenu" :onmouseover "Menu_hover()" :onmouseout "Menu_hover()" :onclick "Menu_click()") ((div :id "EditRule") "Rediger regelen") ((div :id "EditRuleComment") "Rediger kommentar") ((div :id "DeleteRule") "Slett regelen") ((div :id "CloneRule") "Dupliser regelen") ((div :id "UndoRuleChanges") "Angre siste endring")))))) (error (cond) #+debug(error cond) #m(p #s(format stream "Error: ~a" cond)))))))))) (defmethod write-rule-edit-html ((rule rule) stream) (let ((*package* (find-package :cgp)) (*print-case* :downcase) (language (language rule)) (id (rule-id rule))) #m((tr :id #L(format nil "RuleRow~a~d" language id)) ((td :width "80pt" :valign "top" :align "right" :class "clsRuleNameLabel") "Regel ") ((td :valign "top" :id #L(format nil "RuleName~a~d" language id) :class "clsRuleName" :expitemnum "1") " " #L(write (class-name (class-of rule)) :stream stream) (b #L(format stream " ~a " language)) #L(write (rule-id rule) :stream stream))) #m((tr :id #L(format nil "RuleDefRow~a~d" language id)) ((td :valign "top" :align "right" :class "clsRuleLabel") "Definisjon ") ((td :valign "top" :class "clsRuleBody") ((font :face "courier, verdana, arial, helvetica, geneva, sans-serif" :size "3") ((pre :id #L(format nil "~a~d" language id) :onclick #L(format nil "EditRule('~a~d', 'body')" language id)) #L(write-rule-body rule stream :language language :html-p t))))) (when (rule-comment rule) #m(tr ((td :valign "top" :align "right" :class "clsRuleLabel") " Kommentar ") ((td :valign "top" :class "clsRuleComment") ((font :color "blue" :face "verdana, arial, helvetica, geneva, sans-serif" :size "3") ((pre :id #L(format nil "ruleComment~a~d" language id) :onclick #L(format nil "EditRule('ruleComment~a~d', 'comment')" language id)) #L(write-string (convert-string (rule-comment rule) :mac :sgml '(#\Newline "
")) stream)))))))) (defmethod display-rule-edit-help ((request http-request) ent) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Hjelpeside") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body (h3 "Hjelp for regelredigering") (p "Det finnes tre måter å redigere regelsettet på: Du kan slette en regel, redigere en eksisterende regel, " "og definere en ny regel. I tillegg kan du redigere kommentarer. " "Alle redigeringskommandoer er tilgjengelige fra menyen som kommer opp når du klikker " "på feltet med regeltype og -id. " "Når du har redigert en regel etc. og er gått ut av regelfeltet, oppdateres statusfeltet i regelen. ") (h4 "Slette regler") (p "Som alltid er det enklest å slette. Bruk kommandoen fra menyen.") (h4 "Redigere regler") (p "For å redigere en eksisterende regeldefinisjon velger du enten \"Rediger regelen\" fra menyen, " "eller du klikker på regelteksten.") (h4 "Lage nye regler") (p "Du kan lage nye regler ved å duplisere en eksisterende regel og redigere den. (Bruk menyen.) Den nye regelen legges inn " "umiddelbart foran regelen du tok utgangspunkt i. At regelen kommer på rett plass er særdeles viktig for " "mapping-regler, og her anbefales det forøvrig at du går ut fra et søk som i det minste viser alle reglene " "med samme trekk. Hvis regelen du dupliserer har en gruppekommentar, vil den nye regelen få samme kommentar.") (p "En alternativ måte å lage en ny regel på er å klikke på \"Lag ny regel\". Dette lager en tomt tekstfelt som " "du kan skrive i. Regelen kommer til å havne aller først blant reglene av samme type og med samme første trekk.") (h4 "Angre endringer") (p "Du kan angre endringer du har gjort ved å bruke \"Angre siste endring\" fra menyen. " "Endringer i kommentarfelt kan ikke angres via menyen. (Dette kommer senere.)") (h4 "Lagring") (p "Ved å klikke \"Lagre endringer\" sender du endringene til tjeneren."))))))) #+cl-http (defmethod user-access-controls ((user standard-user)) (let ((groups (user-groups user)) (access-controls ())) (maphash (lambda (name access-control) (declare (ignore name)) (when (intersection groups (access-control-default-groups access-control)) (pushnew access-control access-controls))) (realm-url-group-table (user-realm user))) access-controls)) (defmethod display-cgp-site-map ((request http-request) ent) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request)) (user (user-name request))) (labels ((describe-uris (stream &rest uris) (mapcar (lambda (uri) (when-let (entity (cdar (aserve::uri-string-to-entities uri))) (describe-entity entity :html-table-row stream))) uris))) #m(!DOCTYPE "HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"") #m(html (head (title "Oversiktskart CG-tagger") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body ((font :face "Arial, Helvetica, sans-serif" :size "3") (h2 "Oversiktskart CG-tagger") (p "Oversikten lister opp de viktigste sidene for CG-taggeren. " "De fleste sidene er passordbeskyttet." #+cl-http " (Du har rettigheten" #+cl-http (when (> (length access-controls) 1) "e") #+cl-http (loop for (access-control . rest) on access-controls do (i (fast-format stream " ~a" (access-control-name access-control)) (if rest "," ").")))) ((table :border "0" :cellpadding "2" :bgcolor "#ffffef") ((tr :bgcolor "#fafae8") (td "  URL") (td "Tilgang") (td "Beskrivelse")) (tr ((td :colspan "3") (b "Testsider"))) #L(describe-uris stream "/cl/cgp/test.html" #+ignore"/oslo-bergen-tagger.html" "/cl/cgp/ranked-analyses.html") (tr ((td :colspan "3") (b "Redigeringssider"))) #L(describe-uris stream "/cl/cgp/tagger.html" "/cl/cgp/constraint-grammars.html" "/cl/cgp/cg-versions.html" "/cl/cgp/tagger-edit.html" "/cl/cgp/show-rules.html" "/cl/cgp/show-set-declarations.html" "/cl/cgp/run-test-corpus.html" "/cl/cgp/lexicon-search.xml"))) (br/) (hr/) #+orig ((font :face "Arial, Helvetica, sans-serif" :size "2") "Oppdatert " #L(format stream #.(u:now :format :date :language :nbo)) " " ((a :href "mailto:paul.meurer@aksis.uib.no") "Paul Meurer"))))))))) ;;; ------------------------------ xml output -------------------------------------- (eval-when (:compile-toplevel :load-toplevel :execute) (setf lxml::*encoding* :utf-8 lxml::*newline-after-endtag-p* nil)) ;; debugging (defparameter *break-p* nil) (defparameter *warn-on-end-found-p* nil) (defparameter *warn-on-token-mismatch-p* nil) #+debug(defparameter *sl* ()) ;; find places where and tags are to be inserted (defmethod find-sentence-boundary-tokens ((sentence sentence) &key (stack ()) (expand-tokens-p t) &allow-other-keys) #+debug(push sentence *sl*) (with-slots (in-sentence-elements) (tokenizer sentence) (let ((first nil) (last nil) (last-in-sentence nil) (first-original-token nil) (last-original-token nil)) #+debug(print (list :in-sentence-elements in-sentence-elements :stack stack)) (when stack (setf first-original-token (first-token sentence)) ;; remove spurious elements (loop while (and stack (first-token sentence) (eq (token-value (first-token sentence)) :insignificant) (let ((atts (token-attributes (first-token sentence)))) ;;(print (list :a (caddr atts) :b (caddr (car stack)))) (and (eq (car atts) :%end) (eq (caddr atts) (caddr (car stack)))))) do (remove-token (first-token sentence)) (setf stack (cdr stack))) ;; reopen closed elements on (remaining) stack (let ((first-pos (cgp::token-stream-position (first-token sentence)))) (dolist (xml-token stack) (push-token sentence :insignificant :attributes (list :%start first-pos (caddr xml-token)) :position first-pos))) (setf stack ())) (labels ((walk (token concat-token) #+debug(print (list token :concat-token concat-token :next (when token (token-next token)) :expansion (token-expansion token) :first first :last last :stack stack)) (cond ((null token) nil) ((and expand-tokens-p (token-expansion token)) (do ((ex-token (car (token-expansion token)) (token-next ex-token))) ((eq ex-token (cdr (token-expansion token))) (walk ex-token token)) (walk ex-token token)) #+debug(print (list :walk-expanded (token-next token) nil)) (walk (token-next token) nil)) (t (let ((value (token-value token)) (xml-token (token-attributes token))) (cond ((eq value :newline) nil) ((null value) nil) ((not (eq value :insignificant)) #+debug(print token) (unless first (setf first token))) (t ;; xml tag (destructuring-bind (tag-type xml-file-pos tag . attributes) xml-token (declare (ignore xml-file-pos attributes)) (case tag-type (:%string nil) (:%start (cond (first (when *warn-on-end-found-p* (when last (warn "End already found: ~s." xml-token)) (print (list :>> tag))) #+debug(print (list token :pushing1 stack)) (push xml-token stack)) ((find tag in-sentence-elements) (setf first token) (when *warn-on-end-found-p* (print (list :> tag))) #+debug(print (list token :pushing2 stack)) (push xml-token stack)) (t nil))) (:%end (cond (last #+debug(print (list token :popping1 stack)) (pop stack) nil) ((not first) nil) ((null stack) ;;(print (cons :last token)) (setf last token)) (t (when *warn-on-token-mismatch-p* (unless (eq (caddr (car stack)) tag) (warn "Token-mismatch: ~s not in ~s." tag stack))) (when *warn-on-end-found-p* (print (list :< (car stack)))) #+debug(print (list token :popping2 stack)) (pop stack))))))))) (unless (or (eq token (last-token sentence)) concat-token) (walk (token-next token) nil)))))) (walk (first-token sentence) nil) (when stack (setf last-original-token (last-token sentence)) #+debug(print (cons :stack stack)) ;; give each auxiliary token the pos of the last original token before #+debug(print (cons :last-token (last-token sentence))) (let* ((stream-pos (cgp::token-stream-position (last-token sentence))) (last-pos (if stream-pos (if (or (eq (token-value (last-token sentence)) :newline) (insignificant-token-p (last-token sentence))) stream-pos (+ stream-pos (length (token-value (last-token sentence))))) #+ignore (cadr (token-attributes (last-token sentence)))))) #+debug(print (list :stream-pos stream-pos (last-token sentence) last-pos)) (dolist (xml-token stack) (add-token sentence :insignificant :attributes (list :%end last-pos (caddr xml-token)))))) (values first last stack first-original-token last-original-token last-in-sentence))))) #|| ;; problems: ad 1)

Det er et hus. Huset er stort.

ad 2)

Det er et hus. Huset er stort.

ad 3)

Det er et hus. Huset er stort.

Solution: Check whether an element overlaps with a sentence; handle spurious overlap. If there is non-spurious overlap, close the elt before sentence start and open it again. ||# ;; TO DO: split this into plain xml and xml + tag #-under-debugging (defmethod print-sentence-xml ((sentence sentence) &key stream (print-features t) (print-sentence-elts-p t) (expand-tokens-p t) fresh-line-before-word-p (substitutions '("&" "&")) (word-elt :|word|) (encoding :utf-8) stack &allow-other-keys) "outputs a tagged sentence as xml" (when *break-p* (break)) #+debug(print sentence) ;; improve this: ;; remove spurious elements; ;; insert element according to whether it is in-sentence or not; ;; use that info in find-sentence-boundary-tokens() #+ignore (dolist (xml-token stack) (format stream "<~a>" (caddr xml-token))) (let ((*package* (find-package :cgp)) (start-token nil) (end-token nil) (end-stack nil)) (when print-sentence-elts-p (multiple-value-setq (start-token end-token end-stack) (find-sentence-boundary-tokens sentence :stack stack))) (labels ((walk (token concat-token) (when (eq token start-token) (write-string "" stream) #+debug(print (list :token "")) (setf start-token nil) (unless end-token (setf end-token t))) #+debug(print (list :token token)) (when (eq token end-token) (write-string "" stream) #+debug(print (list :token "")) (setf end-token nil)) (cond ((null token) nil) ((and expand-tokens-p (token-expansion token)) (do ((ex-token (car (token-expansion token)) (token-next ex-token))) ((eq ex-token (cdr (token-expansion token))) (walk ex-token token)) (walk ex-token token)) (walk (token-next token) nil)) (t (let ((value (token-value token)) (features (token-features (or concat-token token))) (attributes (token-attributes token))) (cond ((stringp value) (when fresh-line-before-word-p (fresh-line stream)) #m((#L word-elt) #L(if (eq encoding :utf-8) (lxml::write-utf-8-encoded (subst-substrings value substitutions #+ignore'("&" "&")) stream) (write-string (subst-substrings value substitutions #+ignore '("&" "&")) stream)) #L(when (and features print-features) (print-features-xml (or concat-token token) stream :encoding encoding)))) ((eq value :newline) (fresh-line stream)) ((eq value :insignificant) (write-xml-tag attributes stream)) ((null value) (terpri stream)) (t (print (list :value value)))) (unless (or (eq token (last-token sentence)) concat-token) (walk (token-next token) nil))))))) (walk (first-token sentence) nil) (when (and end-token (null start-token)) (write-string "
" stream)) end-stack))) (defparameter *word-count* 0) (defun write-xml-tag (tag-list stream &optional (pos 0) stack xml-branch-stack tag-branch-stack) (destructuring-bind (tag-type xml-file-pos tag . attributes) tag-list (case tag-type (:%string (incf pos (lxml::write-utf-8-encoded (subst-substrings tag '("&" "&")) stream))) (:%end (write-string " stream) (when stack (incf pos (+ 3 (length name))) (setf (cdaar xml-branch-stack) xml-file-pos (cdaar tag-branch-stack) pos) (pop stack) (incf (car stack)) (pop xml-branch-stack) (pop tag-branch-stack)))) (:%comment (write-string "" stream) (incf pos (+ 7 (length tag)))) (otherwise (let ((name (symbol-name tag))) (push 0 stack) (when xml-branch-stack (let ((branch (list (list xml-file-pos)))) (setf (cdar xml-branch-stack) (append (cdar xml-branch-stack) (list branch))) (push branch xml-branch-stack))) (when tag-branch-stack (let ((branch (list (list pos)))) (setf (cdar tag-branch-stack) (append (cdar tag-branch-stack) (list branch))) (push branch tag-branch-stack))) (write-char #\< stream) (write-string name stream) (incf pos (1+ (length name))) (loop for (att val) on attributes by #'cddr do (let ((att-val-str (format nil " ~a=~s" att val))) (write-string att-val-str stream) (incf pos (length att-val-str)))) (cond ((eq tag-type :%empty) (incf pos 2) (write-string "/>" stream)) (t (incf pos) (write-char #\> stream)))))) (values pos stack xml-branch-stack tag-branch-stack))) ;; we assume here that the POS feature is the first feature in the bv (defun pos-equal-p (bv1 bv2) (loop for b1 across bv1 for b2 across bv2 when (= 1 b1) do (return-from pos-equal-p (= 1 b2))) nil) ;; TODO: implement encoding!; lc-features (defmethod print-features-xml ((token token) stream &key substitutions syntactic-function-codes (remove-feature-inclusions-p t) print-as-elements-p mark-differences-p #+ignore ambiguous-reading-count &allow-other-keys) #+debug(print :print-features-xml print-features-xml) (with-slots (features chain attributes) token ;;(let ((feature-vector (or (feature-vector chain) (feature-vector (multi-tagger (constraint-grammar chain)))))) (with-slots (feature-vector) chain (labels ((stringify-features (f-bv) (mapcar (lambda (f) (subst-substrings (string-downcase f) substitutions)) (if nil;feature-vector (bv-to-feature-list f-bv feature-vector) (code-features f-bv feature-vector))))) #+debug(print (list token :feature-vector feature-vector)) (destructuring-bind (&key filter original-positions missing-positions &allow-other-keys) attributes (let* ((*tagger* (multi-tagger (constraint-grammar chain))) (ambiguous-reading-count #+what-is-this-for?(list 0)) (feature-list (if remove-feature-inclusions-p (collecting (dolist (fl (remove-feature-inclusions features)) (when (car fl) (collect fl)))) features)) (id -1) (f-string (with-output-to-string (stream) (let ((ambiguous-p (> (count-if-not #'null feature-list) 1)) (first-reading (when ambiguous-reading-count (1+ (car ambiguous-reading-count))))) (dolist (lemma.features feature-list) (incf id) (when lemma.features #+debug(print (list :id id :lemma.features lemma.features)) (destructuring-bind (lemma . reading-features) lemma.features (let* ((original-features (when print-as-elements-p (getf attributes :original-features))) (all-readings-p (when print-as-elements-p (getf attributes :all-readings-p))) (original-reading (find-if (lambda (o-reading) (and o-reading (string= (car o-reading) lemma) (subsumes-p (cdr o-reading) reading-features))) original-features))) #+debug(print (list :reading-id (1+ id) :lemma lemma :filtered (when (or original-reading (find id filter)) t) :reading (stringify-features reading-features) :original-features original-features :original-positions original-positions)) #m((reading :id #s id :count #s(when (and ambiguous-p ambiguous-reading-count) (incf (car ambiguous-reading-count))) :first-reading #s (when ambiguous-reading-count first-reading) :lemma #s lemma #+ignore #L(remove #\" (subst-substrings lemma '("&" "&"))) :filtered #L(when (or original-reading (find id filter)) "true") :original #L(when (or original-reading (find id original-positions)) "true") :missing #L(when (find id missing-positions) "true") :features #L(unless print-as-elements-p (subst-substrings (reduce (lambda (x y) (concat x " " y)) (stringify-features reading-features) :initial-value "") '("<" "‹" ">" "›")))) #L(when print-as-elements-p (loop for f across reading-features for i from 0 when (= f 1) do #m(feature/ :code #s i :diff #L(cond ((not (or mark-differences-p (cdr feature-list))) nil) ;; difference in same pos ((loop for fl in feature-list thereis (and fl (not (eq fl lemma.features)) (pos-equal-p (cdr fl) reading-features) (zerop (bit (cdr fl) i)))) "pos") ;; difference in different pos ((loop for fl in feature-list thereis (and fl (not (eq fl lemma.features)) (zerop (bit (cdr fl) i)))) "true") (t nil)) :syntactic #s(when (and original-features (or (and syntactic-function-codes (find i syntactic-function-codes)) (syntactic-function-p *cg* (code-feature i feature-vector)))) (if (and (cdr original-reading) (= (bit (cdr original-reading) i) 1)) "old" (if all-readings-p "all" "new"))) :value #L(if (feature-print-fn *tagger*) (funcall (feature-print-fn *tagger*) (code-feature i feature-vector) :pretty-p t) (subst-substrings (string-downcase (code-feature i feature-vector)) '("<" "‹" ">" "›"))))))))))))))) (write-string f-string stream) (length f-string))))))) (defmethod update-regexp ((request http-request) entity) (with-html-response (request entity stream (update)) (when update (load "projects:cgp;named-entities-recognizer.lisp")) #m(html (head (title "Update regexp") (meta/ :http-equiv "Content-Type" :content "text/html; charset=utf-8")) (body (h3 "Oppdater navnegjenkjennings-uttrykkene") ((form :method "post" :action "/cl/cgp/update-regexp.html") (input/ :type "submit" :name "update" :value "Oppdater")) #L(when *regexp-parser* #m(p "Slik er de nå:") #m(pre #S(let ((*package* :cgp) (*print-circle* nil) (*print-level* nil) (*print-case* :downcase)) (format nil "~s" (source-regexp *regexp-parser*))))))))) ;;; ------------------------------------------------------------------------------------------------- ;;; pages and urls ;; ;; hierarchy: ;; ;; /doc/ : dokumentasjon ;; /cl/ : computational linguistics (or common lisp ;-) ;; /cl/cgp/ : constraint grammar parser ;; /cl/cgp/source/ : partial cgp source code ;; /corpus/ : corpus ;; /corpus/cwb/ : CWB CQi interface ;; /termbase/ : terminology #+test ;; bug (publish :path "/test.html" :content-type "text/plain" :function (lambda (request ent) (with-http-response (request ent) (with-http-body (request ent) (let ((stream (request-reply-stream request))) (format stream "~s~%" '(foo bar))))))) ;; does not work as it should #+ignore (publish :path "" :content-type "text/html" :function (lambda (req ent) (with-http-response (req ent :response *response-moved-permanently*) (setf (reply-header-slot-value req :location) "http://www.aksis.uib.no") (with-http-body (req ent))))) (publish :path "/cl/cgp/update-regexp.html" :content-type "text/html" :function #'update-regexp :authorizer *authorizer* :documentation "") (publish :path "/cl/cgp/site-map.html" :content-type "text/html" :function #'display-cgp-site-map :authorizer *authorizer* :documentation "Oversiktskart CG-tagger") (publish :path "/cl/cgp/test.html" :content-type "text/html" :function #'compute-analyser-form :documentation "Oslo-Bergen-taggerens hjemmeside.") (publish :path "/cl/cgp/obt.html" :content-type "text/html" :function #'compute-analyser-form :documentation "Oslo-Bergen-taggerens hjemmeside.") (publish :path "/cl/cgp/tagger.html" :content-type "text/html" :function #'compute-edit-analyser-form :authorizer *authorizer* :documentation "Tagging av prøvetekster. Reglene som er brukt, vises med ID. Mulighet for å gå direkte til regelredigeringssiden.") #-disabled-because-of-misuse (publish :path "/cl/cgp/oslo-bergen-tagger.html" :content-type "text/text" :function #'tag-text-form :authorizer *authorizer* :documentation "Tagging av tekstfiler.") (publish :path "/cl/cgp/test-rules" :content-type "text/html" :function #'compute-edit-analyser-form :authorizer *authorizer*) ;; change to show-rules.html (publish :path "/cl/cgp/show-rules.html" :content-type "text/html" :function #'show-rule-form :authorizer *authorizer* :documentation "Redigeringsside for CG-regler.") (publish :path "/cl/cgp/show-set-declarations.html" :content-type "text/html" :function #'show-set-declarations-form :authorizer *authorizer* :documentation "Redigeringsside for CG-deklarasjoner.") (publish :path "/cl/cgp/constraint-grammars.html" :content-type "text/html" :function #'cg-description-form :authorizer *authorizer* :documentation "Siden beskriver den valgte grammatikken. Muligheter for nedlasting som Lisp-fil, sikkerhetskopiering og duplisering.") (publish :path "/cl/cgp/download-cg" :content-type "text/plain" :function #'download-cg :authorizer *authorizer* :documentation "Laster ned CG-spesifikasjonen som Lisp-fil") (publish :path "/cl/cgp/cg-versions.html" :content-type "text/html" :function #'cg-versions-form :authorizer *authorizer* :documentation "Siden viser lagrete cg-versjoner og gir mulighet til å laste inn tidligere versjoner") (publish :path "/cl/cgp/run-test-corpus.html" :content-type "text/html" :function #'test-corpus-form :authorizer *authorizer* :documentation "Disambiguerer prøvekorpuset og viser presisjon og recall.") (publish :path "/cl/cgp/run-edit-help.html" :content-type "text/html" :function #'display-rule-edit-help :authorizer *authorizer* :documentation "Hjelpeside for regelredigering.") (publish-file :path "/cl/cgp/styles/edit-rules.css" :file "projects:cgp;edit-rules.css" :content-type "text/css") (publish-file :path "/cl/cgp/styles/edit-declarations.css" :file "projects:cgp;edit-declarations.css" :content-type "text/css") (publish-directory :prefix "/cgp/distribution/" :destination "/home/paul/logon/uib/bin/linux.x86.32/" :authorizer *authorizer*) ;; created using (build-xle-web) ;; and tar zcvf .tar.gz (publish-file :path "/xle-web/xle-web-linux32-2006-04-04.tar.gz" :file "/home/paul/lisp/projects/xle/build/linux.x86.32/xle-web-linux32-2006-04-04.tar.gz" :content-type "application/gzip" :authorizer (make-instance 'net.aserve::password-authorizer :allowed '(("john" . "acdt") ("ozlem" . "Istanbul")) :realm "XLE-WEB")) #+test (print (directory "/home/paul/")) :eof