;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2012 -- 2012 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; processing the (emerging) Grammar Markup Language (GML)
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;
;; _fix_me_
;; to get started on bracketed inputs, we assume there are no token ambiguities
;; at this stage (which will be true when working off REPP results, but is not
;; guaranteed to be true in general, i.e. for other sources of parser input).
;;                                                              (31-oct-12; oe)

default_lb_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW #uw, +LB anti_bracket_list, +RB #rb, +HD #hd ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW #uw, +LB bracket_null, +RB #rb, +HD #hd ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

default_rb_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW #uw, +LB #lb, +RB anti_bracket_list, +HD #hd ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW #uw, +LB #lb, +RB bracket_null, +HD #hd ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

prefix_gml_bracket_tmr := two_one_tmt &
[ +INPUT < [ +FORM ^⌊\(⌋$ ], 
           [ +FORM #form & ^[^⌊¦⌋]+$, +CLASS #class, 
             +TRAIT [ +UW #uw, +LB #lb, +RB #rb, +HD #hd ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #trait, 
              +TRAIT [ +UW #uw, 
                       +LB < ctype . #lb >, +RB #rb, +HD #hd ], 
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

suffix_gml_bracket_tmr := two_one_tmt &
[ +INPUT < [ +FORM #form & ^[^⌊¦⌋]+$, +CLASS #class, 
             +TRAIT [ +UW #uw, +LB #lb, +RB #rb, +HD #hd ] ,
             +PRED #pred, +CARG #carg, +TNT #tnt ],
           [ +FORM ^⌊\)⌋$ ] >,
  +OUTPUT < [ +FORM #form, +CLASS #trait, 
              +TRAIT [ +UW #uw, 
                       +LB #lb, +RB < ctype . #rb >, +HD #hd ], 
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

;;
;; _fix_me_
;; there is a design decision (yet to be made) about whether or not to include characters of
;; GML tokens in the characterization accounting, which matters in a setup where DT targets
;; are identified by full <x:y> spans.  so, arguably, we should be using ‘two_one_tmt’ below.
;;                                                                      (19-jun-13; dan & oe)
;;
dt_gml_tmr := token_mapping_rule &
[ +INPUT < [ +FORM #form, +CLASS #class, 
             +TRAIT [ +UW #uw, +LB #lb, +RB #rb ],
             +PRED #pred, +CARG #carg, +TNT #tnt,
             +ID #id, +FROM #from, +TO #to ],
           [ +FORM ^⌊←¦([^⌊¦⌋]+)(?:¦([^⌊¦⌋]+))?⌋$ ] >,
  +OUTPUT < [ +FORM #form, +CLASS #trait, 
              +TRAIT [ +UW #uw, +LB #lb, +RB #rb,
                       +HD [ +LL.-CTYPE- "${uc(I2:+FORM:1)}",
                             +TG "${I2:+FORM:2}" ] ],
              +PRED #pred, +CARG #carg, +TNT #tnt,
              +ID #id, +FROM #from, +TO #to ] >,
  +CONTEXT < >,
  +POSITION "I1<I2, O1@I1, O1@I2" ].