;;; -*- Mode: tdl; Coding: utf-8; -*- ;; ;; convert (PTB-style) ASCII directional quotes to UniCode characters. when ;; running from an actual PTB file, these are what we would see as input. ;; opening_single_quote_tmr := one_one_form_tmt & [ +INPUT < [ +FORM "`" ] >, +OUTPUT < [ +FORM "‘" ] > ]. opening_double_quote_tmr := one_one_form_tmt & [ +INPUT < [ +FORM "``" ] >, +OUTPUT < [ +FORM "“" ] > ]. closing_double_quote_tmr := one_one_form_tmt & [ +INPUT < [ +FORM "''" ] >, +OUTPUT < [ +FORM "”" ] > ]. dash_hack_tmr := one_one_form_tmt & [ +INPUT < [ +FORM ^--*$ ] >, +OUTPUT < [ +FORM "_DASH_" ] > ]. final_semicolon_hack_tmr := one_one_sfinal_form_tmt & [ +INPUT < [ +FORM ^[;:]$ ] >, +OUTPUT < [ +FORM "." ] >]. interview_hack_tmr := one_one_generic_tmt & [ +INPUT < [ +FORM ^[:]$ ] >, +OUTPUT < [ +FORM "_colon_," ] >, +CONTEXT <[ +FORM ^FR$ ]>, +POSITION "^, +OUTPUT < [ +FORM "_colon_," ] >, +CONTEXT <[ +TNT.+TAGS <"NE"> ]>, +POSITION "^, +OUTPUT < [ +FORM "_colon_," ] >, +CONTEXT <[ +TNT.+TAGS <"NE"> ], [ +TNT.+TAGS <"NE"> ]>, +POSITION "^, +OUTPUT < [ +FORM "," ] > ]. euer_hack_tmr := one_one_form_tmt & [ +INPUT < [ +FORM ^(t?)eure([smnr])?$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}euere${I1:+FORM:2}" ] > ]. digit_hack_tmr := two_one_final_form_tmt & [ +INPUT < [ +FORM ^([0-9]{1,3})$ ], [ +FORM ^([0-9]{3})([.,?!]*)$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${I2:+FORM:1}${I2:+FORM:2}" ] >]. slash_hack_tmr := one_null_form_tmt & [ +INPUT < [ +FORM ^[/]$] >, +OUTPUT < >, +POSITION "I1<$"]. ditch_prefix_punctuation_tmr := two_one_final_form_tmt & [ +INPUT < [ +FORM ^([[{“‘]+)$ ], [ +FORM ^(.+)$ ] >, +OUTPUT < [ +FORM "${I2:+FORM:1}" ] > ]. ditch_suffix_punctuation_tmr := two_one_initial_form_tmt & [ +INPUT < [ +FORM ^(.+)$ ], [ +FORM ^([]}”"']+)$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}" ] > ]. prefix_punctuation_tmr := two_one_final_form_tmt & [ +INPUT < [ +FORM ^([[({“‘]+)$ ], [ +FORM ^(.+)$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${I2:+FORM:1}" ] > ]. suffix_punctuation_tmr := two_one_initial_form_tmt & [ +INPUT < [ +FORM ^(.+)$ ], [ +FORM ^([])}”",;.!?]+)$ ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${I2:+FORM:1}" ] > ]. downcase_tmr := one_one_form_tmt & [ +INPUT < [ +FORM ^(.*[[:upper:]].*)$ ] > , +OUTPUT < [ +FORM "${lc(I1:+FORM:1)}" ] > ].