;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2009 -- 2010 Stephan Oepen (oe@ifi.uio.no); ;;; see `LICENSE' for conditions. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; finally, make all non-generic tokens lower case (for lexical look-up). as ;;; a side effect, the rule may also end up `grounding' very underspecified ;;; tokens (which should not exist, at this point, in principle): if +TRAIT, ;;; +CLASS, and +TNT were all unspecific, we end up defaulting their values to ;;; a token that can only activate native lexical entries. ;;; ;;; also, apply sensible defaults for +TRAIT and +CLASS individually, and make ;;; sure there are no genuine duplicates among tokens in the same cell. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; downcase_tmr := one_one_form_tmt & [ +INPUT < [ +FORM ^(.*[[:upper:]].*)$, +TRAIT.+UW -, +CLASS non_ne, +TNT null_tnt ] > , +OUTPUT < [ +FORM "${lc(I1:+FORM:1)}" ] > ]. ;; ;; _fix_me_ ;; to prevent spurious defaulting (replacing bracket_null with bracket_null) it would be ;; nicer to devise a notion of anti_bracket_list, but that's not quite so obvious in the ;; diff-list space as it was with regular lists. ;; default_lb_native_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_rb_native_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_lb_generic_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_rb_generic_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_class_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +TRAIT #trait, +CLASS no_class, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS non_ne, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. ;; ;; _fix_me_ ;; as a catch-all rule, ditch tokens (in the same cell) that can be unified. ;; the chart mapping machinery aims to protect us against the introduction of ;; equivalent tokens (i.e. it will not generate new +OUTPUT tokens for which ;; equivalent items are in the chart already), hence it is most likely we will ;; end up purging tokens that stand in a genuine subsumption relation (unless ;; there were genuine duplicates in the input already). but we have no control ;; over which of the two tokens will be preserved in the rule below; a classic ;; expectation might rather be to keep the most general token in the chart. to ;; accomplish this, one would need a new +POSITION operator, e.g. "C1[I1", or ;; maybe even the actual UniCode glyph for subsumption. (20-aug-10; oe) ;; uniq_tmr := token_mapping_rule & [ +CONTEXT < #token >, +INPUT < #token >, +OUTPUT < >, +POSITION "I1@C1" ]. ;; ;; at the end of the day, each token needs to be decorated with a ‘token identity’ (+TI), a ;; string that will (ideally uniquely) identify the token in DT bi-directional dependencies, ;; i.e. the value that arguments can ‘target’ in their +TG property. ;; token_identity_tmr := one_one_identity_tmt & [ +INPUT < [ +FROM ^(.+)$, +TO ^(.*)$, +TRAIT #trait, +TICK - ] >, +OUTPUT < [ +TRAIT #trait & [ +HD.+TI "<${I1:+FROM:1}:${I1:+TO:1}>" ], +TICK + ] >, +CONTEXT < > ].