;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2009 -- 2018 Stephan Oepen (oe@ifi.uio.no); ;;; Copyright (c) 2009 -- 2018 Dan Flickinger (danf@stanford.edu); ;;; see ‘LICENSE’ for conditions. ;;; tokens := *top* & [ +LIST *list*, +LAST token ]. token_min := *avm*. ;; DPF 2018-03-28 - Added +ONSET token := token_min & [ +FORM string, +CLASS token_class, +TRAIT token_trait, +PRED predsort, +CARG string, +ID *diff-list*, +FROM string, +TO string, +TNT tnt, +TICK bool, +ONSET c-or-v-onset ]. token_trait := *top* & [ +UW bool, +IT italics, +LB bracket_list, +RB bracket_list, +LD bracket_list, +RD bracket_list, +HD token_head ]. anti_trait := token_trait. native_token_list := *list*. native_token_cons := native_token_list & *cons* & [ FIRST.+TRAIT.+UW -, REST native_token_list ]. native_token_null := native_token_list & *null*. generic_token_list := *list*. generic_token_cons := generic_token_list & *cons* & [ FIRST.+TRAIT.+UW +, REST generic_token_list ]. generic_token_null := generic_token_list & *null*. italics := *sort*. left_italics := italics. right_italics := italics. both_italics := italics. ;; FIX - oe (see also loading of tmr/ner and tmr/gml ;; #| weak_bracket_list := *list*. weak_bracket_cons := weak_bracket_list & *cons* & [ FIRST n, REST weak_bracket_list ]. weak_bracket_null := weak_bracket_list & *null*. anti_bracket_list := weak_bracket_list. |# bracket_list := *diff-list*. bracket_null := bracket_list & [ LIST #list, LAST #list ]. bracket_nonnull := bracket_list & [ LIST *cons* ]. ;; ;; the type of entries in delimiter lists (aka weak brackets), a short name to ;; keep token feature structures tiny. ;; n := *top*. ;; ;; to integrate predictions from a data-driven dependency parser, say: tokens ;; can be annotated with (a) an identifier (or maybe index), e.g. the starting ;; character position; (b) a dependency label (from the CTYPE hierarchy); and ;; (c) the target identifier (drawing on the same name space as +TI). here, ;; the selection is from the dependent to its head, to take advantage of the ;; common assumption that no token can depend on multiple heads. later on, in ;; the syntax, each construction must designate one daughter as its ‘dependency ;; head’ (in a mostly syntactic perspective), identify its own CTYPE with +LL ;; on all non-head daugthers, and identify the identifier (+TI) from the head ;; with +TG on all non-head daughters. ;; token_head := *sort* & [ +TI string, +LL ctype , +TG string ]. ;; ;; _fix_me_ ;; in token mapping, the original +TNT list (of tags and probabilities) can be ;; rewritten; native tokens, for example, will end up with an empty list, and ;; generic tokens ‘multiply out’ all elements from the input list. to preserve ;; information about the top-ranked PoS hypotheses in (all) token FSs that end ;; up as part of a derivation (recorded in [incr tsdb()], say), the rules make ;; sure to set (and then leave intact) the value of +TNT.+MAIN. i am wondering ;; whether it would be possible to reverse the logic of what we do, i.e. leave ;; the original list intact and selectively move active values to another part ;; of the token FS, where lexical entries could look for it. not quite sure, ;; however, how that would work for the rules that ‘multiply out’ PoS tags and ;; create as many generic tokens as there were elements in the original list. ;; (18-nov-10; oe) tnt_main := *top* & [ +TAG string, +PRB string ]. tnt := *top* & [ +MAIN tnt_main, +TAGS *list*, +PRBS *list* ]. null_tnt := tnt & [ +TAGS <>, +PRBS <> ]. ;; ;; in token mapping, it is useful to have available distinct ‘anti’-strings. ;; anti_string := string. non_string := string. ;; Used as value of +ONSET on tokens, without the (recursive) --TL attribute, ;; to distinguish vowel-initial numerals: |*a 8 meter tree| ;; c-or-v-onset := *sort*. c-onset := c-or-v-onset. v-onset := c-or-v-onset. ;;; ;;; orthographic classes, used in token mapping and lexical filtering ;;; token_class := *sort*. no_class := token_class. bridge_class := token_class. regular_class := token_class. named_entity := regular_class. card_or_time_or_proper_ne := named_entity. ;; For decimal numbers between -1.0 and 1.0, which can show either singular ;; or plural agreement on their measure nouns. card_or_proper_ne := card_or_time_or_proper_ne. ;; We need to allow |2c| to be either a measure noun or an ordinary proper noun, since ;; it could be the amount of two cups or an identifier that typically follows |2b|. meas_or_proper_ne := named_entity. proper_ne := card_or_proper_ne & meas_or_proper_ne. file_ne := proper_ne. url_ne := proper_ne. email_ne := proper_ne. phone_ne := proper_ne. card_or_dom_or_year_or_time_ne := named_entity. card_or_year_or_time_ne := card_or_dom_or_year_or_time_ne. card_or_year_ne := card_or_year_or_time_ne. card_or_dom_ne := card_or_dom_or_year_or_time_ne. card_or_time_ne := card_or_time_or_proper_ne & card_or_year_or_time_ne. card_or_meas_ne := named_entity. card_or_decimal_ne := named_entity. card_ne := card_or_year_ne & card_or_dom_ne & card_or_time_ne & card_or_meas_ne & card_or_proper_ne & card_or_decimal_ne. year_ne := card_or_year_ne. ord_or_dom_ne := named_entity. ord_ne := ord_or_dom_ne. date_or_fract_ne := named_entity. frct_ne := date_or_fract_ne. decimal_ne := card_or_decimal_ne. plur_ne := named_entity. plur_apos_ne := named_entity. dom_card_ne := card_or_dom_ne. dom_ord_ne := ord_or_dom_ne. date_ne := date_or_fract_ne. meas_or_time_ne := named_entity. ;time_ne := card_or_dom_ne & card_or_time_ne & meas_or_time_ne. time_ne := card_or_time_ne & meas_or_time_ne. meas_ne := meas_or_time_ne & card_or_meas_ne & meas_or_proper_ne. meas_noun_ne := named_entity. ital_ne := named_entity. ;; ;; the following are modeled after POSIX character classes; most have obvious ;; correspondences in terms of (more elaborate) UniCode character properties. ;; essentially, we cross-classify along three dimensions: (a) the combination ;; of characters used, (b) whether or not the first character is capitalized, ;; and (c) whether or not a token appears utterance-initial. ;; non_ne := regular_class & [ +INITIAL luk ]. non_alphanumeric := non_ne. apostrophe := non_alphanumeric. anti_apostrophe := non_alphanumeric. alphanumeric := non_ne & [ +CASE token_case ]. alphabetic := alphanumeric. numeric := alphanumeric. ;; ;; at least the fourth time that i revise this hierarchy. ‘capitalized’ or not ;; is a property of the first character (|1A| is not capitalized). ‘mixed’, on ;; the other hand, is only applicable to tokens with at least two characters. ;; both |aB| and |AbC| are mixed, but |A| or |a| are not. finally, ‘lower’ and ;; ‘upper’ reflect the full token string, i.e. |Dan| is neither, |1a| is lower, ;; and |A| is upper. ;; token_case := *sort*. capitalized := token_case. non_capitalized := token_case. mixed := token_case. non_mixed := token_case. capitalized+mixed := capitalized & mixed. capitalized+non_mixed := capitalized & non_mixed. capitalized+lower := capitalized+non_mixed. capitalized+upper := capitalized+non_mixed. non_capitalized+mixed := non_capitalized & mixed. ;; ;; we are making a simplifying assumption here, not distinguishing one-token ;; non-capitalized (which could be called ‘non_capitalized+non_mixed’) from ;; ‘non_capitalized+lower’. so far, we just never care about the distinction. ;; non_capitalized+lower := non_capitalized & non_mixed. chart_mapping_rule := *top* & [ +CONTEXT *list*, +INPUT *list*, +OUTPUT *list*, +POSITION string, +JUMP string ]. ;;; ;;; constructing a sensible hierarchy of token mapping rules is not trivial. ;;; there is variation among many dimensions: (a) arity of input and output, ;;; positioning of LHS and RHS rule elements, (c) which token properties are ;;; copied over, and others. ;;; ;;; following is an attempt to sketch some of the more frequent configurations, ;;; but so far there is hardly any use of inheritance here ... ;;; token_mapping_rule := chart_mapping_rule. basic_one_one_tmt := token_mapping_rule & [ +INPUT.FIRST [ +ID #id, +FROM #from, +TO #to, +ONSET #onset ], +OUTPUT.FIRST [ +ID #id, +FROM #from, +TO #to, +ONSET #onset ] ]. basic_two_two_tmt := basic_one_one_tmt & [ +INPUT.REST.FIRST [ +ID #id, +FROM #from, +TO #to ], +OUTPUT.REST.FIRST [ +ID #id, +FROM #from, +TO #to ] ]. basic_three_three_tmt := basic_two_two_tmt & [ +INPUT.REST.REST.FIRST [ +ID #id, +FROM #from, +TO #to ], +OUTPUT.REST.REST.FIRST [ +ID #id, +FROM #from, +TO #to ] ]. basic_four_four_tmt := basic_three_three_tmt & [ +INPUT.REST.REST.REST.FIRST [ +ID #id, +FROM #from, +TO #to ], +OUTPUT.REST.REST.REST.FIRST [ +ID #id, +FROM #from, +TO #to ] ]. one_one_tmt := basic_one_one_tmt & [ +INPUT < [] >, +OUTPUT < [] >, +POSITION "O1@I1" ]. two_two_tmt := basic_two_two_tmt & [ +INPUT < [], [] >, +OUTPUT < [], [] >, +POSITION "I1, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT.+UW +, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* >, +CONTEXT < > ]. basic_two_two_ner_tmt := basic_one_one_ner_tmt & basic_two_two_tmt & [ +INPUT < [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* >, +OUTPUT < [], [ +FORM #form, +CLASS #class, +TRAIT.+UW +, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* > ]. basic_three_three_ner_tmt := basic_two_two_ner_tmt & basic_three_three_tmt & [ +INPUT < [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* >, +OUTPUT < [], [], [ +FORM #form, +CLASS #class, +TRAIT.+UW +, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* > ]. basic_four_four_ner_tmt := basic_three_three_ner_tmt & basic_four_four_tmt & [ +INPUT < [], [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* >, +OUTPUT < [], [], [], [ +FORM #form, +CLASS #class, +TRAIT.+UW +, +PRED #pred, +CARG #carg, +TNT #tnt ] . *list* > ]. |# basic_one_one_ner_tmt := basic_one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list* >, +OUTPUT < [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list*>, +CONTEXT < > ]. basic_two_two_ner_tmt := basic_one_one_ner_tmt & basic_two_two_tmt & [ +INPUT < [], [ +FORM #form, +CLASS #class,+PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list* >, +OUTPUT < [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list*>]. basic_three_three_ner_tmt := basic_two_two_ner_tmt & basic_three_three_tmt & [ +INPUT < [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list* >, +OUTPUT < [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list*>]. basic_four_four_ner_tmt := basic_three_three_ner_tmt & basic_four_four_tmt & [ +INPUT < [], [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list* >, +OUTPUT < [], [], [], [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt, +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #rb, +HD #hd ]] . *list*>]. ;;; ;;; _fix_me_ ;;; need to revisit these, once i decide on which order brackets go onto the ;;; +LB and +RB lists. in principle, an NE pattern may be surrounded by GML ;;; brackets, which either way we must not lose. (2-nov-12; oe) ;;; two_two_ner_tmt := basic_two_two_ner_tmt & [ +INPUT < [ +TRAIT.+LD bracket_null ], [ +TRAIT.+LD #ld ] >, +OUTPUT < [ +TRAIT.+RD bracket_null ], [ +TRAIT.+LD #ld ] >, +POSITION "I1, +OUTPUT < [ +TRAIT.+RD bracket_null ], [], [ +TRAIT.+LD bracket_null ] >, +POSITION "I1, +OUTPUT < [ +TRAIT.+RD bracket_null ], [], [], [ +TRAIT.+LD bracket_null ] >, +POSITION "I1 ] should amount to the ;;; same thing, viz. no relevant reference to context tokens. ACE developers ;;; asked the ERG developers to always spell out the empty list, but i would ;;; prefer not to build that expectation into the machinery. while a fully ;;; underspecified list could be construed to allow matching arbitrary tokens ;;; as ‘context’, but since such matches could never be referenced in +OUTPUT ;;; they could not possibly have any effect on the result of rule application. ;;; hence, an engine would be justified in deciding to not even attempt any ;;; matching against an underspecified +CONTEXT (or +INPUT, for that matter), ;;; which is the strategy adopted in PET. approach ACE developers about this, ;;; one day. (31-oct-12; oe) ;;; two_one_tmt := token_mapping_rule & [ +INPUT < [ +ID [ LIST #front, LAST #middle ], +FROM #from, +ONSET #onset ], [ +ID [ LIST #middle, LAST #back ], +TO #to ] >, +OUTPUT < [ +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +ONSET #onset ] >, +CONTEXT <>, +POSITION "I1, +OUTPUT < [ +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +ONSET #onset] >, +POSITION "I1, +OUTPUT < [ +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +ONSET #onset ] >, +POSITION "I1, +OUTPUT < [ +ID #id, +FROM #from, +TO #to, +ONSET #onset ], [ +ID #id, +FROM #from, +TO #to ] >, +CONTEXT <>, +POSITION "O1, +OUTPUT < [ +ID #id, +FROM #from, +TO #to, +ONSET #onset ], [ +ID #id, +FROM #from, +TO #to ], [ +ID #id, +FROM #from, +TO #to ] >, +CONTEXT <>, +POSITION "O1, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. one_one_form_tmt := one_one_tmt & [ +INPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. one_one_trait_tmt := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. two_two_trait_tmt := two_two_tmt & [ +INPUT < [ +FORM #form1, +CLASS #class1, +PRED #pred1, +CARG #carg1, +TNT #tnt1 ], [ +FORM #form2, +CLASS #class2, +PRED #pred2, +CARG #carg2, +TNT #tnt2 ] >, +OUTPUT < [ +FORM #form1, +CLASS #class1, +PRED #pred1, +CARG #carg1, +TNT #tnt1 ], [ +FORM #form2, +CLASS #class2, +PRED #pred2, +CARG #carg2, +TNT #tnt2 ] >, +CONTEXT <> ]. two_one_initial_form_tmt := two_one_tmt & [ +INPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ], [] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] > ]. two_one_final_form_trait_tmt := two_one_tmt & [ +INPUT < [], [ +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +PRED #pred, +CARG #carg, +TNT #tnt ] > ]. three_one_initial_form_tmt := three_one_tmt & [ +INPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ], [], [] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. three_one_center_form_tmt := three_one_tmt & [ +INPUT < [], [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ], [] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. three_one_final_form_tmt := three_one_tmt & [ +INPUT < [], [], [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. one_two_all_form_tmt := one_two_tmt & [ +INPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ], [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. two_one_keep_brackets_tmt := two_one_tmt & [ +INPUT < [ +TRAIT [ +UW #uw, +IT #it, +HD #hd, +LB #lb, +LD #ld ] ], [ +TRAIT [ +RB #rb, +RD #rd ] ] >, +OUTPUT < [ +TRAIT [ +UW #uw & +, +IT #it, +HD #hd, +LB #lb, +RB #rb, +LD #ld, +RD #rd ] ] > ]. ;;; ;;; a few relatively specialized token mapping rule types, for configurations ;;; that are instantiated with non-trivial frequency. ;;; token_class_null_tnt_tmt := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS no_class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT.+MAIN #main ] >, +OUTPUT < [ +FORM #form, +CLASS non_ne, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT null_tnt & [ +MAIN #main ] ] >, +CONTEXT <> ]. token_class_tmt := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS no_class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +OUTPUT < [ +FORM #form, +CLASS non_ne, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt ] >, +CONTEXT <> ]. token_case_tmt := token_mapping_rule & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT #trait, +PRED #pred, +TNT #tnt, +ONSET #onset ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT #trait, +PRED #pred, +TNT #tnt, +ONSET #onset ] > ]. one_one_token_case_tmt := one_one_tmt & token_case_tmt & [ +CONTEXT <> ]. tick_reset_tmt := one_one_tmt & [ +INPUT < [ +FORM #form, +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt, +TICK + ] >, +OUTPUT < [ +FORM #form, +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +TNT #tnt, +TICK bool ] >, +CONTEXT <> ]. ;; ;; the following rules are unusual, as they combine +IDs from both the context ;; and input elements; the contexts (punctuation marks) need to remain in the ;; chart until (re-)attached to all adjacent tokens, but eventually they will ;; be purged from the chart. ;; DPF 2016-03-03 - Propagate strong bracket constraints from the punctuation ;; mark to the output. ;; #| prefix_punctuation_tmt := token_mapping_rule & [ +CONTEXT < [ +TRAIT.+HD #hd, +ID [ LIST #front, LAST #middle ], +FROM #from ] >, +INPUT < [ +CLASS #class, +TRAIT #trait & [ +HD #hd ], +PRED #pred, +CARG #carg, +ID [ LIST #middle, LAST #back ], +TO #to, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +TNT #tnt ] >, +POSITION "C1, +CONTEXT < [ +ID [ LIST #middle, LAST #back ], +TO #to ] >, +OUTPUT < [ +CLASS #class, +TRAIT #trait, +PRED #pred, +CARG #carg, +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +TNT #tnt ] >, +POSITION "I1, +INPUT < [ +CLASS #class, +TRAIT [ +UW #uw, +IT #it, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ID [ LIST #middle, LAST #back ], +TO #to, +TNT #tnt ] >, +OUTPUT < [ +CLASS #class, +TRAIT [ +HD #ctxthd, +UW #uw, +IT #it, +LB #ctxtlb, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to, +TNT #tnt ] >, +POSITION "C1, +INPUT < [ +TRAIT [ +UW #uw, +IT #it, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ONSET #onset, +ID [ LIST #middle, LAST #back ], +TO #to ] >, +OUTPUT < [ +TRAIT [ +HD #ctxthd, +UW #uw, +IT #it, +LB #ctxtlb, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ONSET #onset, +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to ] >, +POSITION "C1, +CONTEXT < [ +ID [ LIST #middle, LAST #back ], +TO #to, +TRAIT.+RB #ctxtrb ] >, +OUTPUT < [ +TRAIT [ +UW #uw, +IT #it, +LB #lb, +RB #ctxtrb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ONSET #onset, +ID [ LIST #front, LAST #back ], +FROM #from, +TO #to ] >, +POSITION "I1, +OUTPUT < [ +FORM #form, +CLASS named_entity, +TRAIT [ +LB #lb, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +TNT null_tnt & [ +MAIN #main ] ] >, +CONTEXT <> ]. ne_tmt := basic_ne_tmt & [ +OUTPUT < [ +TRAIT.+UW + ] > ]. basic_add_ne_tmt := token_mapping_rule & [ +CONTEXT < [ +FORM #form, +CLASS non_ne, +TRAIT [ +LB #lb, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ONSET #onset, +ID #id, +FROM #from, +TO #to, +TNT.+MAIN #main ] >, +INPUT <>, +OUTPUT < [ +FORM #form, +CLASS named_entity, +TRAIT [ +LB #lb, +RB #rb, +LD #ld, +RD #rd ], +PRED #pred, +CARG #carg, +ONSET #onset, +ID #id, +FROM #from, +TO #to, +TNT null_tnt & [ +MAIN #main ] ] >, +POSITION "O1@C1" ]. add_ne_tmt := basic_add_ne_tmt & [ +OUTPUT < [ +TRAIT.+UW + ] > ]. ;;; Type used initially for moving bracket leftward, but maybe more uses ;;; will be found. The rule adds a near-copy for each of the first two ;;; context tokens, and two copies of the third context token, which follows ;;; the first two. The two and two are needed for the left-bracket rule ;;; because we propose both a generic and a native token for capitalized words ;;; three_four_tmt := token_mapping_rule & [ +CONTEXT < [ +FORM #form1, +CLASS #class1a, +ID #id, +FROM #from, +TO #to, +PRED #pred1, +CARG #carg1, +TNT #tnt1a, +ONSET #onset ], [ +FORM #form1, +CLASS #class1b, +ID #id, +FROM #from, +TO #to, +PRED #pred1, +CARG #carg1, +TNT #tnt1b ], [ +FORM #form2, +ID #id2, +FROM #from2, +TO #to2, +PRED #pred2, +CARG #carg2, +TNT #tnt2 ] >, +OUTPUT < [ +FORM #form1, +CLASS #class1a, +ID #id, +FROM #from, +TO #to, +PRED #pred1, +CARG #carg1, +TNT #tnt1a, +ONSET #onset ], [ +FORM #form1, +CLASS #class1b, +ID #id, +FROM #from, +TO #to, +PRED #pred1, +CARG #carg1, +TNT #tnt1b ], [ +FORM #form2, +CLASS #class1a, +ID #id2, +FROM #from2, +TO #to2, +PRED #pred2, +CARG #carg2, +TNT #tnt2 ], [ +FORM #form2, +CLASS #class1b, +ID #id2, +FROM #from2, +TO #to2, +PRED #pred2, +CARG #carg2, +TNT #tnt2 ] >, +POSITION "C1