;; From erg/tmr/punctuation.tdl ;; Remove straight quote, since it is not yet being replaced for EPGY ;;ditch_apostrophe_tmr := token_mapping_rule & ;;[ +INPUT < [ +FORM ^[[({“"‘]*[’][])}”",;.!?]*$, +CLASS anti_apostrophe ] >, ;; +OUTPUT < > ]. ;; Leave comma as separate token, to help with token counts for CoNLL #| suffix_punctuation_tmr := suffix_punctuation_tmt & [ +INPUT < [ +FORM ^(.*[^])}”",;.!?].*)$ ] >, +CONTEXT < [ +FORM ^([])}”,⸴;.!?]’?)$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${C1:+FORM:1}" ] > ]. ditch_punctuation_tmr := token_mapping_rule & [ +INPUT < [ +FORM ^[[({“‘"]|[])}”".!?]+’?$ ] >, +OUTPUT < >, +CONTEXT < > ]. |# #| card_or_year_ne_tmr := basic_ne_tmt & [ +INPUT < [ +FORM ^([012]?[0-9][0-9]{2}|[3-9][0-9])(\.|-|\*)?$ ] >, +OUTPUT < [ +CLASS card_or_year_or_time_ne, +ONSET c-or-v-onset ] > ]. |# ; Enable sensitivity to onset for |an 11-year effort| ; #| card_or_dom_ne_tmr := basic_ne_tmt & [ +INPUT < [ +FORM ^([2-7,9]|1[0,2-7,9]||2[0-9]|3[01])$ ] >, +OUTPUT < [ +CLASS card_or_dom_ne, +ONSET c-onset ] > ]. card_or_dom_voc_ne_tmr := basic_ne_tmt & [ +INPUT < [ +FORM ^(8|8-|11|11-|18|18-)$ ] >, +OUTPUT < [ +CLASS card_or_dom_ne, +ONSET v-onset ] > ]. |#