;;; -*- Mode: tdl; Coding: utf-8; -*- w_period_plr := %suffix (!. !..) punctuation_period_rule. w_qmark_plr := %suffix (!p !p\?) punctuation_qmark_rule. w_qqmark_plr := %suffix (!p !p\?) punctuation_clmark_qmark_rule. w_qmark-bang_plr := %suffix (!p !p\!) punctuation_clmark_qmark_rule. w_comma_plr := %suffix (!q !q,) punctuation_comma_rule. w_bang_plr := %suffix (!p !p\!) punctuation_bang_rule. w_semicol_plr := %suffix (!. !.;) punctuation_semicol_rule. ; For robustness, where typo leaves two semicolons w_double_semicol_plr := %suffix (!. !.;;) punctuation_semicol_rule. w_rparen_plr := %suffix (!p !p\)) punctuation_rparen_rule. ; For robustness, where mis-editing left comma preceding right paren w_comma-rp_plr := %suffix (!p !p,\)) punctuation_rparen_rule. w_lparen_plr := %prefix (!f (!f) punctuation_lparen_rule. w_rbrack_plr := %suffix (!p !p]) punctuation_rparen_rule. w_lbrack_plr := %prefix (!f [!f) punctuation_lparen_rule. ;; ;; _fix_me_ ;; how many variants of quote marks to include depends on what decisions we ;; make in pre-processing. as of September 2008, we have opted for a total of ;; six, viz. UniCode directional quotes (|“|, |”|, |‘|, and |’|), as well as ;; straight ASCII quotes (|"| and |'|). when an input text makes use of good ;; directional quotes we preserve those (to not introduce ambiguity). also, by ;; default we disambiguate quotes in pre-processing, based on proximity to ;; token boundaries, i.e. in processing inputs following standard orthography ;; we only see proper opening and closing quotes (of which the latter can also ;; be apostrophes or units of measure). however, there also is a configuration ;; for `messed up' inputs, e.g. ones that were pre-tokenized, so we no longer ;; stand a chance of disambiguating quotes. in this mode, straight quotes are ;; ambiguous between opening or closing ones. finally, if only for robustness ;; (or use without the full pre-processors), allow some variants that we do not ;; expect to see in the ideal world: |``|, |''|, and |`|. ;; w_dqright_plr := %suffix (!q !q”) (!q !q") (!q !q'') punctuation_dqright_rule. w_dqleft_plr := %prefix (!f “!f) (!f "!f) (!f ``!f) punctuation_dqleft_rule. w_sqright_plr := %suffix (!q !q’) (!q !q') punctuation_sqright_rule. ;; DPF 20-feb-10 - Let's try doing without the straight quote as left ;; punctuation, since it creates spurious ambiguity for |They'd arrived.| ;; DPF 06-mar-10 - But now we need it for e.g. |The 'cat' arrived.| ;; _fix_me_ ;; in fact, this ambiguity will disappear once we fully move to supporting the ;; UniCode apostrophe, i.e. convert the lexicon to include |don’t|, |’d|, and ;; |o’clock|. at present, we hack around these by `normalizing' (in the wrong ;; direction) apostrophes to straight typewriter quotes. (23-mar-10; oe) ;; w_sqleft_plr := %prefix (!f ‘!f) (!f '!f) (!f `!f) punctuation_sqleft_rule. w_hyphen_plr := %suffix (!p !p-) punctuation_hyphen_rule. ; Add punct_comma_informal_rule for e.g. comma-marked subjects and hd-cmp_u_c heads ; but don't use for generation. w_comma-nf_plr := %suffix (!. !.,) punctuation_comma_informal_rule. w_italleft_plr := %prefix (!f ¦i!f) punctuation_italleft_rule. w_italright_plr := %suffix (!q !qi¦) (!q !q”) (!q !q") (!q !q'') punctuation_italright_rule. w_drop-ileft_plr := %prefix (!f ¦i!f) punct_drop_italleft_rule. w_drop-iright_plr := %suffix (!q !qi¦) punct_drop_italright_rule.