;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2012 -- 2012 Stephan Oepen (oe@ifi.uio.no); ;;; see `LICENSE' for conditions. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; processing the (emerging) Grammar Markup Language (GML) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; _fix_me_ ;; to get started on bracketed inputs, we assume there are no token ambiguities ;; at this stage (which will be true when working off REPP results, but is not ;; guaranteed to be true in general, i.e. for other sources of parser input). ;; (31-oct-12; oe) default_lb_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW #uw, +LB anti_bracket_list, +RB #rb, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW #uw, +LB bracket_null, +RB #rb, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_rb_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW #uw, +LB #lb, +RB anti_bracket_list, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW #uw, +LB #lb, +RB bracket_null, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. prefix_gml_bracket_tmr := two_one_tmt & [ +INPUT < [ +FORM ^⌊\(⌋$ ], [ +FORM #form & ^[^⌊¦⌋]+$, +CLASS #class, +TRAIT [ +UW #uw, +LB #lb, +RB #rb, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #trait, +TRAIT [ +UW #uw, +LB < ctype . #lb >, +RB #rb, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. suffix_gml_bracket_tmr := two_one_tmt & [ +INPUT < [ +FORM #form & ^[^⌊¦⌋]+$, +CLASS #class, +TRAIT [ +UW #uw, +LB #lb, +RB #rb, +HD #hd ] , +PRED #pred, +CARG #carg, +TNT #tnt ], [ +FORM ^⌊\)⌋$ ] >, +OUTPUT < [ +FORM #form, +CLASS #trait, +TRAIT [ +UW #uw, +LB #lb, +RB < ctype . #rb >, +HD #hd ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. ;; ;; _fix_me_ ;; there is a design decision (yet to be made) about whether or not to include characters of ;; GML tokens in the characterization accounting, which matters in a setup where DT targets ;; are identified by full spans. so, arguably, we should be using ‘two_one_tmt’ below. ;; (19-jun-13; dan & oe) ;; dt_gml_tmr := token_mapping_rule & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW #uw, +LB #lb, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt, +ID #id, +FROM #from, +TO #to ], [ +FORM ^⌊←¦([^⌊¦⌋]+)(?:¦([^⌊¦⌋]+))?⌋$ ] >, +OUTPUT < [ +FORM #form, +CLASS #trait, +TRAIT [ +UW #uw, +LB #lb, +RB #rb, +HD [ +LL.-CTYPE- "${uc(I2:+FORM:1)}", +TG "${I2:+FORM:2}" ] ], +PRED #pred, +CARG #carg, +TNT #tnt, +ID #id, +FROM #from, +TO #to ] >, +CONTEXT < >, +POSITION "I1