;;; -*- Mode: tdl; Coding: utf-8; -*- w_period_plr := %suffix (!. !..) punctuation_period_rule & [ RNAME lpp ]. w_qmark_plr := %suffix (!p !p\?) punctuation_qmark_rule & [ RNAME lpq ]. w_qqmark_plr := %suffix (!p !p\?) w_qqmark_rule & [ RNAME lpqq ]. w_qmark-bang_plr := %suffix (!p !p\!) w_qmark-bang_rule & [ RNAME lpcq ]. w_comma_plr := %suffix (!q !q,) punctuation_comma_rule & [ RNAME lpcm ]. w_bang_plr := %suffix (!p !p\!) punctuation_bang_rule & [ RNAME lpx ]. w_semicol_plr := %suffix (!. !.;) w_semicol_rule & [ RNAME lpsc ]. ; For robustness, where typo leaves two semicolons w_double_semicol_plr := %suffix (!. !.;;) w_double_semicol_rule & [ RNAME lpds ]. w_rparen_plr := %suffix (!p !p\)) w_rparen_rule & [ RNAME lprp ]. ; For robustness, where mis-editing left comma preceding right paren w_comma-rp_plr := %suffix (!p !p,\)) w_comma-rp_rule & [ RNAME lpcp ]. w_lparen_plr := %prefix (!f \(!f) w_lparen_rule & [ RNAME lplp ]. w_rbrack_plr := %suffix (!p !p]) (!p !p}) (!p !p|) w_rbrack_rule & [ RNAME lprb ]. w_lbrack_plr := %prefix (!f [!f) (!f {!f) (!f |!f) w_lbrack_rule & [ RNAME lplb ]. ;; ;; _fix_me_ ;; how many variants of quote marks to include depends on what decisions we ;; make in pre-processing. as of September 2008, we have opted for a total of ;; six, viz. UniCode directional quotes (|“|, |”|, |‘|, and |’|), as well as ;; straight ASCII quotes (|"| and |'|). when an input text makes use of good ;; directional quotes we preserve those (to not introduce ambiguity). also, by ;; default we disambiguate quotes in pre-processing, based on proximity to ;; token boundaries, i.e. in processing inputs following standard orthography ;; we only see proper opening and closing quotes (of which the latter can also ;; be apostrophes or units of measure). however, there also is a configuration ;; for `messed up' inputs, e.g. ones that were pre-tokenized, so we no longer ;; stand a chance of disambiguating quotes. in this mode, straight quotes are ;; ambiguous between opening or closing ones. finally, if only for robustness ;; (or use without the full pre-processors), allow some variants that we do not ;; expect to see in the ideal world: |``|, |''|, and |`|. ;; w_dqright_plr := %suffix (!q !q”) (!q !q") (!q !q'') punctuation_dqright_rule & [ RNAME lpdr ]. w_dqleft_plr := %prefix (!f ”!f) (!f “!f) (!f "!f) (!f ``!f) punctuation_dqleft_rule & [ RNAME lpdl ]. w_sqright_plr := %suffix (!q !q’) (!q !q') punctuation_sqright_rule & [ RNAME lpsr ]. ;; DPF 20-feb-10 - Let's try doing without the straight quote as left ;; punctuation, since it creates spurious ambiguity for |They'd arrived.| ;; DPF 06-mar-10 - But now we need it for e.g. |The 'cat' arrived.| ;; _fix_me_ ;; in fact, this ambiguity will disappear once we fully move to supporting the ;; UniCode apostrophe, i.e. convert the lexicon to include |don’t|, |’d|, and ;; |o’clock|. at present, we hack around these by `normalizing' (in the wrong ;; direction) apostrophes to straight typewriter quotes. (23-mar-10; oe) ;; w_sqleft_plr := %prefix (!f ‘!f) (!f '!f) (!f `!f) punctuation_sqleft_rule & [ RNAME lpsl ]. w_hyphen_plr := %suffix (!p !p-) w_hyphen_rule & [ RNAME lphr ]. ;; DPF 2012-08-29 - Recent REPP changes now attach three dots to preceding ;; token without a space, when the dots are between two tokens, ;; so add variant of hyphen suffix rule to accommodate. ;; w_threedot_plr := %suffix (!p !p...) w_threedot_rule & [ RNAME lptd ]. w_asterisk_plr := %suffix (!p !p\*) w_asterisk_rule & [ RNAME lpax ]. w_asterisk_pre_plr := %prefix (!p \*!p) w_asterisk_pre_rule & [ RNAME lpaxp ]. ; Add punct_comma_informal_rule for e.g. comma-marked subjects and ; hd-cmp_u_c heads but don't use for generation. w_comma-nf_plr := %suffix (!. !.,) punctuation_comma_informal_rule & [ RNAME lpcr ]. ;; DPF 2012-02-18 - Tried adding double quotes to enable foreign-word analysis ;; but this leads to a lot of ambiguity whenever double quotes appaer. More ;; research is needed. ;; %prefix (!f ¦i!f) (!f ”!f) (!f “!f) (!f "!f) (!f ``!f) ;; %suffix (!q !qi¦) (!q !q”) (!q !q") (!q !q'') w_italleft_plr := %prefix (!f ¦i!f) punctuation_italleft_rule & [ RNAME lpil ]. w_italright_plr := %suffix (!q !qi¦) punctuation_italright_rule & [ RNAME lpir ]. w_drop-ileft_plr := %prefix (!f ¦i!f) punct_drop_italleft_rule & [ RNAME lpxdl ]. w_drop-iright_plr := %suffix (!q !qi¦) punct_drop_italright_rule & [ RNAME lpxdr ].