;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2009 -- 2010 Stephan Oepen (oe@ifi.uio.no); ;;; see `LICENSE' for conditions. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; finally, make all non-generic tokens lower case (for lexical look-up). as ;;; a side effect, the rule may also end up `grounding' very underspecified ;;; tokens (which should not exist, at this point, in principle): if +TRAIT, ;;; +CLASS, and +TNT were all unspecific, we end up defaulting their values to ;;; a token that can only activate native lexical entries. ;;; ;;; also, apply sensible defaults for +TRAIT and +CLASS individually, and make ;;; sure there are no genuine duplicates among tokens in the same cell. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; downcase_tmr := one_one_form_tmt & [ +INPUT < [ +FORM ^(.*[[:upper:]].*)$, +TRAIT.+UW -, +CLASS non_ne, +TNT null_tnt ] > , +OUTPUT < [ +FORM "${lc(I1:+FORM:1)}" ] > ]. default_lb_native_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB anti_bracket_list, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_rb_native_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB #lb, +RB anti_bracket_list ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW -, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_lb_generic_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB anti_bracket_list, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB bracket_null, +RB #rb ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_rb_generic_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB #lb, +RB anti_bracket_list ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT [ +UW +, +LB #lb, +RB bracket_null ], +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. default_class_tmr := one_one_tmt & [ +INPUT < [ +FORM #form, +TRAIT #trait, +CLASS no_class, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +TRAIT #trait, +CLASS non_ne, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT < > ]. ;; ;; in support of robust (‘self-help’) annotation of islands of correctness in ;; parse failures (using FFTB), make sure that all chart cells contain exactly ;; one ‘bridge’ token (which will activate a specialized bridge lexical entry, ;; indicating when there was no adequate lexical analysis available otherwise ;; in a chart cell. here, we take advantage of redundancy detection built into ;; token mapping, i.e. even though the rule is written as if it could apply any ;; number of times per cell, there shall not be duplicates in the token chart. ;; to avoid duplicates due to downcasing, always downcase +FORM in the output. ;; ;; _fix_me_ ;; come to think of it, we may want to erase brackets and the DT head bundle; ;; they have no downstream role to play on this token. (8-aug-14; oe) ;; bridge_tmr := token_mapping_rule & [ +INPUT < >, +CONTEXT < [ +FORM ^(.*)$, +TRAIT [ +IT #it, +LB #lb, +RB #rb, +HD #hd ], +CARG #carg & ^(.+)$, +ID #id, +FROM #from, +TO #to ] >, +OUTPUT < [ +FORM "${lc(C1:+FORM:1)}", +CLASS bridge_class, +TRAIT [ +UW +, +IT #it, +LB #lb, +RB #rb, +HD #hd ], +PRED "_${lc(C1:+CARG:1)}_u_bridge_rel", +CARG #carg, +ID #id, +FROM #from, +TO #to, +TNT null_tnt ] >, +POSITION "O1@C1" ]. ;; ;; _fix_me_ ;; as a catch-all rule, ditch tokens (in the same cell) that can be unified. ;; the chart mapping machinery aims to protect us against the introduction of ;; equivalent tokens (i.e. it will not generate new +OUTPUT tokens for which ;; equivalent items are in the chart already), hence it is most likely we will ;; end up purging tokens that stand in a genuine subsumption relation (unless ;; there were genuine duplicates in the input already). but we have no control ;; over which of the two tokens will be preserved in the rule below; a classic ;; expectation might rather be to keep the most general token in the chart. to ;; accomplish this, one would need a new +POSITION operator, e.g. "C1[I1", or ;; maybe even the actual UniCode glyph for subsumption. (20-aug-10; oe) ;; uniq_tmr := token_mapping_rule & [ +CONTEXT < #token >, +INPUT < #token >, +OUTPUT < >, +POSITION "I1@C1" ].