;;; -*- Mode: tdl; Coding: utf-8; -*- ; ; Period affixed to end of word ; cat. ; ; ; w_period_plr := %suffix (!. !..) punctuation_period_rule & [ RNAME lpp ]. ; ; Double question mark affixed ; cat?? ; ; ; w_qqmark_plr := %suffix (!p !p\?\?) w_qqmark_rule & [ RNAME lpqq ]. ; ; Question mark affixed to word ; cat? ; ; ; w_qmark_plr := %suffix (!p !p\?) punctuation_qmark_rule & [ RNAME lpq ]. ; ; Qmark and exclam point affixed ; cat?! ; ; ; w_qmark-bang_plr := %suffix (!p !p\?\!) w_qmark-bang_rule & [ RNAME lpcq ]. ; ; Qmark and period both affixed ; cat?. ; ; ; w_qmark-period_plr := %suffix (!p !p\?.) w_qmark-period_rule & [ RNAME lpcq ]. ; ; Comma affixed ; cat, ; ; ; w_comma_plr := %suffix (* ,) punctuation_comma_rule & [ RNAME lpcm ]. ; ; Comma affixed ; cat, ; ; ; w_comma-sdwch_plr := %suffix (* ⸴) punctuation_comma_sandwich_rule & [ RNAME lpcs ]. ; ; Exclamation point affixed ; cat! ; ; ; w_bang_plr := %suffix (!p !p\!) punctuation_bang_rule & [ RNAME lpx ]. ; ; Semicolon affixed ; cat; ; ; ; w_semicol_plr := %suffix (!k !k;) w_semicol_rule & [ RNAME lpsc ]. ; For robustness, where typo leaves two semicolons ; ; Robust double semicolon ; cat;; ; ; ; w_double_semicol_plr := %suffix (!. !.;;) w_double_semicol_rule & [ RNAME lpds ]. ; ; Right parenthesis affixed ; cat) ; ; ; w_rparen_plr := %suffix (!p !p\)) w_rparen_rule & [ RNAME lprp ]. ; For robustness, where mis-editing left comma preceding right paren ; ; Comma wrongly before right paren ; cat,) ; ; ; w_comma-rp_plr := %suffix (!p !p,\)) w_comma-rp_rule & [ RNAME lpcp ]. ; ; Left parenthesis prefixed ; (cat ; ; ; w_lparen_plr := %prefix (!f \(!f) w_lparen_rule & [ RNAME lplp ]. ; ; Right square bracket affixed ; cat] ; ; ; w_rbrack_plr := %suffix (!p !p]) (!p !p}) (!p !p|) w_rbrack_rule & [ RNAME lprb ]. ; ; Left square bracket prefixed ; [cat ; ; ; w_lbrack_plr := %prefix (!f [!f) (!f {!f) (!f |!f) w_lbrack_rule & [ RNAME lplb ]. ;; ;; _fix_me_ ;; how many variants of quote marks to include depends on what decisions we ;; make in pre-processing. as of September 2008, we have opted for a total of ;; six, viz. UniCode directional quotes (|“|, |”|, |‘|, and |’|), as well as ;; straight ASCII quotes (|"| and |'|). when an input text makes use of good ;; directional quotes we preserve those (to not introduce ambiguity). also, by ;; default we disambiguate quotes in pre-processing, based on proximity to ;; token boundaries, i.e. in processing inputs following standard orthography ;; we only see proper opening and closing quotes (of which the latter can also ;; be apostrophes or units of measure). however, there also is a configuration ;; for `messed up' inputs, e.g. ones that were pre-tokenized, so we no longer ;; stand a chance of disambiguating quotes. in this mode, straight quotes are ;; ambiguous between opening or closing ones. finally, if only for robustness ;; (or use without the full pre-processors), allow some variants that we do not ;; expect to see in the ideal world: |``|, |''|, and |`|. ;; ; ; Double quote affixed to end ; cat" ; ; ; w_dqright_plr := %suffix (!q !q”) (!q !q") (!q !q'') punctuation_dqright_rule & [ RNAME lpdr ]. ; ; Double quote prefixed ; "cat ; ; ; w_dqleft_plr := %prefix (!f ”!f) (!f “!f) (!f "!f) (!f ``!f) punctuation_dqleft_rule & [ RNAME lpdl ]. ; ; Single quote affixed to end ; cat' ; ; ; w_sqright_plr := %suffix (!q !q’) (!q !q') punctuation_sqright_rule & [ RNAME lpsr ]. ;; DPF 20-feb-10 - Let's try doing without the straight quote as left ;; punctuation, since it creates spurious ambiguity for |They'd arrived.| ;; DPF 06-mar-10 - But now we need it for e.g. |The 'cat' arrived.| ;; _fix_me_ ;; in fact, this ambiguity will disappear once we fully move to supporting the ;; UniCode apostrophe, i.e. convert the lexicon to include |don’t|, |’d|, and ;; |o’clock|. at present, we hack around these by `normalizing' (in the wrong ;; direction) apostrophes to straight typewriter quotes. (23-mar-10; oe) ;; ; ; Single quote prefixed ; 'cat ; ; ; w_sqleft_plr := %prefix (!f ‘!f) (!f '!f) (!f `!f) (!f "‘!f) (!f "'!f) (!f "`!f) (!f '!f) punctuation_sqleft_rule & [ RNAME lpsl ]. ; ; Hyphen affixed to end ; cat- ; ; ; w_hyphen_plr := %suffix (!p !p-) w_hyphen_rule & [ RNAME lphr ]. ; ; hyphen prefixed ; -rw ; ; ; w_lefthyphen_plr := %prefix (!f -!f) w_lefthyphen_rule & [ RNAME lphl ]. ;; DPF 2012-08-29 - Recent REPP changes now attach three dots to preceding ;; token without a space, when the dots are between two tokens, ;; so add variant of hyphen suffix rule to accommodate. ;; ; ; Three dots affixed to end ; cat... ; ; ; w_threedot_plr := %suffix (!p !p...) w_threedot_rule & [ RNAME lptd ]. ; ; Asterisk affixed to end ; cat* ; ; ; w_asterisk_plr := %suffix (!p !p\*) w_asterisk_rule & [ RNAME lpax ]. ; ; Asterisk prefixed to word ; *cat ; ; ; w_asterisk-pre_plr := %prefix (!p \*!p) w_asterisk_pre_rule & [ RNAME lpaxp ]. ; Add punct_comma_informal_rule for e.g. comma-marked subjects and ; hd-cmp_u_c heads but don't use for generation. ; ; Nonformal comma affixed ; cat, ; ; ; w_comma-nf_plr := %suffix (* ,) punctuation_comma_informal_rule & [ RNAME lpcr ]. ;; DPF 2012-02-18 - Tried adding double quotes to enable foreign-word analysis ;; but this leads to a lot of ambiguity whenever double quotes appaer. More ;; research is needed. ;; %prefix (!f ¦i!f) (!f ”!f) (!f “!f) (!f "!f) (!f ``!f) ;; %suffix (!q !qi¦) (!q !q”) (!q !q") (!q !q'') ; ; Italics mark |ยฆi| prefixed ; ยฆicat ; ; ; w_italleft_plr := %prefix (!f ⌊/!f) punctuation_italleft_rule & [ RNAME lpil ]. ; ; Italics mark |iยฆ| affixed ; catiยฆ ; ; ; w_italright_plr := %suffix (!r !r/⌋) punctuation_italright_rule & [ RNAME lpir ]. ; ; Ignored italics mark affixed ; catiยฆ ; ; ; w_drop-ileft_plr := %prefix (!f ⌊/!f) punct_drop_italleft_rule & [ RNAME lpxdl ]. ; ; Ignored italics mark prefixed ; catiยฆ ; ; ; w_drop-iright_plr := %suffix (!r !r/⌋) punct_drop_italright_rule & [ RNAME lpxdr ]. ;; DPF 2014-04-24 ;; Spencer Rarrick proposed the clever idea of introducing an underspecified ;; punctuation rule that will effectively only get used in generation (given ;; the choice of unlikely character sequence for the suffix), and which ;; enables packing in generation to pack the period and question-mark edges, ;; reducing the number of edges in the packed generator chart, and still ;; producing the right end results since edges produced with this underspecified ;; variant will lack an adequately specific value for SF to match the input ;; MRS's requirement, and hence will be discarded in unpacking, in favor of ;; whichever of period or qmark matches the input MRS. ;; MONITOR: Let's keep an eye on this efficiency-motivated rule, to see that ;; it earns its keep and does not cause trouble. ;; DPF 2014-07-24 - Unfortunately, it is not clear how to block this rule's ;; output for MRS input where the SF value is underspecified, as with the ;; MRS produced by parsing a no-punct sentence like |Kim arose| so let's ;; comment it out for now. ;; #| w_generic_clause_plr := %suffix (!p !pzzzz) punctuation_clause_rule & [ RNAME lgcr ]. |#