;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2009 -- 2010 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; finally, make all non-generic tokens lower case (for lexical look-up).  as
;;; a side effect, the rule may also end up `grounding' very underspecified
;;; tokens (which should not exist, at this point, in principle): if +TRAIT,
;;; +CLASS, and +TNT were all unspecific, we end up defaulting their values to
;;; a token that can only activate native lexical entries.
;;;
;;; also, apply sensible defaults for +TRAIT and +CLASS individually, and make
;;; sure there are no genuine duplicates among tokens in the same cell.
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


downcase_tmr := one_one_form_tmt &
[ +INPUT < [ +FORM ^(.*[[:upper:]].*)$, 
             +TRAIT.+UW -, +CLASS non_ne, +TNT null_tnt ] > ,
  +OUTPUT < [ +FORM "${lc(I1:+FORM:1)}" ] > ].

default_lb_native_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW -, +LB anti_bracket_list, +RB #rb ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW -, +LB bracket_null, +RB #rb ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

default_rb_native_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW -, +LB #lb, +RB anti_bracket_list ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW -, +LB #lb, +RB bracket_null ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

default_lb_generic_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW +, +LB anti_bracket_list, +RB #rb ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW +, +LB bracket_null, +RB #rb ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

default_rb_generic_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +CLASS #class,
             +TRAIT [ +UW +, +LB #lb, +RB anti_bracket_list ],
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +CLASS #class,
              +TRAIT [ +UW +, +LB #lb, +RB bracket_null ],
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

default_class_tmr := one_one_tmt &
[ +INPUT < [ +FORM #form, +TRAIT #trait, +CLASS no_class,
             +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS non_ne,
              +PRED #pred, +CARG #carg, +TNT #tnt ] >,
  +CONTEXT < > ].

;;
;; in support of robust (‘self-help’) annotation of islands of correctness in
;; parse failures (using FFTB), make sure that all chart cells contain exactly
;; one ‘bridge’ token (which will activate a specialized bridge lexical entry,
;; indicating when there was no adequate lexical analysis available otherwise
;; in a chart cell.  here, we take advantage of redundancy detection built into
;; token mapping, i.e. even though the rule is written as if it could apply any
;; number of times per cell, there shall not be duplicates in the token chart.
;; to avoid duplicates due to downcasing, always downcase +FORM in the output.
;;
;; _fix_me_
;; come to think of it, we may want to erase brackets and the DT head bundle;
;; they have no downstream role to play on this token.          (8-aug-14; oe)
;;
bridge_tmr := token_mapping_rule &
[ +INPUT < >,
  +CONTEXT < [ +FORM ^(.*)$, 
               +TRAIT [ +IT #it, +LB #lb, +RB #rb, +HD #hd ],
               +CARG #carg & ^(.+)$, 
               +ID #id, +FROM #from, +TO #to ] >,
  +OUTPUT < [ +FORM "${lc(C1:+FORM:1)}", 
              +CLASS bridge_class,
              +TRAIT [ +UW +, +IT #it, +LB #lb, +RB #rb, +HD #hd ], 
              +PRED "_${lc(C1:+CARG:1)}_u_bridge_rel", +CARG #carg, 
              +ID #id, +FROM #from, +TO #to, +TNT null_tnt ] >,
  +POSITION "O1@C1" ].

;;
;; _fix_me_
;; as a catch-all rule, ditch tokens (in the same cell) that can be unified.
;; the chart mapping machinery aims to protect us against the introduction of
;; equivalent tokens (i.e. it will not generate new +OUTPUT tokens for which
;; equivalent items are in the chart already), hence it is most likely we will
;; end up purging tokens that stand in a genuine subsumption relation (unless
;; there were genuine duplicates in the input already).  but we have no control
;; over which of the two tokens will be preserved in the rule below; a classic
;; expectation might rather be to keep the most general token in the chart.  to
;; accomplish this, one would need a new +POSITION operator, e.g. "C1[I1", or
;; maybe even the actual UniCode glyph for subsumption.         (20-aug-10; oe)
;; 
uniq_tmr := token_mapping_rule &
[ +CONTEXT < #token >,
  +INPUT < #token >,
  +OUTPUT < >,
  +POSITION "I1@C1" ].