;; Delimiters (strong and weak brackets) ;; These constraints enable the propagation and discharge of strong and weak ;; delimiters, where the strong ones come from some external source for ;; (optionally labeled) bracketing, and the weak ones are introduced in ;; token mapping for hyphens as in ;; |Santa Clara-based|. ;; Add features in ORTH for left and right delimiters (RB, LB for (strong) ;; brackets), with diff-list values. ;; orthog :+ [ LB bracket_list, RB bracket_list, LD bracket_list, RD bracket_list ]. root_orth :+ [ LB bracket_null, RB bracket_null, LD bracket_null, RD bracket_null ]. ;; DPF 2013-06-20 - We have added support for introducing bracketing ;; and syntactic dependency constraints onto tokens in preprocessing, ;; recorded in their +TRAIT feature. These constraints are propagated ;; here onto attributes of the feature structure which is visible to the ;; syntactic rules. ;; ;; The +LB (left bracket) and +RB (right bracket) attributes take as ;; value a (possibly empty) list of ctypes, elements in the hierarchy ;; of construction types defined in the file `ctypes.tdl'. The basic ;; idea is that a well-formed phrase cannot cross brackets, and each ;; construction will unify its own ctype with that required by a ;; bracket pair (where the bracket ctype can be more or less ;; construction-specific. Such bracket constraints could be supplied ;; by a preprocessor such as a named-entity recognizer or a chunk ;; parser. At present we have two types of token-mapping rules that ;; introduce brackets: (1) weak brackets to keep hyphenated expressions such ;; as |snow-covered| together, avoiding spurious analyses that would ;; associate the left and right sides of the hyphen to different ;; phrases; and (2) some placeholder token-mapping rules for ;; named entities such as |[New York] [Stock Exchange]|, where eventually ;; we would expect to get these labeled bracketed expressions from some ;; external resource. ;; ;; Propagate delimiters from tokens to ORTH, both for strong and weak brackets: ;; (1) strong brackets (LB, RB) let us constrain constructions over a phrase ;; based on external guidance via the insertion in the input string of the ;; special tokens ⌊(⌋ and ⌊)⌋ as in ;; |We admire ⌊(⌋ the cat ⌊)⌋ in the garden.| ;; ;; ;; DPF 2019-11-28 - With punct clitics, for multi-words, need to propagate ;; +LB from last token, since that is where the token-mapping stamps a nonempty ;; bracket for a following punct token. ;; DPF 2020-02-15 - Similarly need to keep +RB from first token, for left ;; punct marks as in |(such as| ;; basic_word :+ [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], TOKENS [ +LIST.FIRST.+TRAIT [ +LB #lb, +LD #ld ], +LAST.+TRAIT [ +RB #rb, +RD #rd ] ] ]. ;; Lexical rules: identify delimiter constraints on dtr and mother lex_rule_supermost :+ [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], ARGS.FIRST.ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ]. phrase :+ [ ORTH [ LD bracket_null, RD bracket_null ] ]. ;; Unary phrases: identify delimiter constraints on dtr and mother. phrase :+ [ ORTH [ LD bracket_null, RD bracket_null ] ]. basic_basic_unary_phrase :+ [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ]. n-adj_phrase :+ [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ]. basic_npadv_mod_phrase :+ [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ]. ;; Normal headed binary phrases: block crossing of delimiter brackets, and ;; propagate from dtrs to mother. no_inner_delim_phrase := sign & [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ], ARGS < [ ORTH [ LB #lb, RB bracket_null, LD #ld, RD bracket_null ] ], [ ORTH [ LB bracket_null, RB #rb, LD bracket_null, RD #rd ] ] > ]. basic_head_initial :+ no_inner_delim_phrase. binary_headed_phrase :+ no_inner_delim_phrase. basic_head_filler_phrase :+ no_inner_delim_phrase. basic_head_comp_or_marker_phrase :+ no_inner_delim_phrase. basic_head_spec_phrase :+ no_inner_delim_phrase. subjh_rule_basic :+ no_inner_delim_phrase. basic_head_mod_phrase_simple :+ no_inner_delim_phrase. ;n_n_cmpnd_phr :+ no_inner_delim_phrase. basic_n_n_cmpnd_phr :+ no_inner_delim_phrase. fw_sequence_left_phrase :+ no_inner_delim_phrase. fw_sequence_right_phrase :+ no_inner_delim_phrase. basic_run_on_rule :+ no_inner_delim_phrase. min_coord_phr :+ no_inner_delim_phrase. np_pred_phr :+ no_inner_delim_phrase. binary_frag_rule :+ no_inner_delim_phrase. bridge_binary_phrase :+ no_inner_delim_phrase. basic_appos_phr :+ no_inner_delim_phrase. binary_measure_np_phrase :+ no_inner_delim_phrase. punct_marker_hfinal_nobrk_rule :+ no_inner_delim_phrase. punct_marker_hinit_nobrk_rule :+ no_inner_delim_phrase. ;; Special head_final binary phrases: Pull delimiters on dtrs outward to ;; left and right perimeters of mother, by appending values of dtrs. ;; DPF 2018-06-12 - Exclude internal strong brackets. perimeter_delims_phrase := phrase & [ ORTH [ LB [ LIST #lblist, LAST #lblast ], RB [ LIST #rblist, LAST #rblast ], LD [ LIST #ldlist, LAST #ldlast ], RD [ LIST #rdlist, LAST #rdlast ] ], ARGS < [ ORTH [ LB [ LIST #lblist, LAST #lbmid ], RB [ LIST #rblist, LAST #rbmid ], LD [ LIST #ldlist, LAST #ldmid ], RD [ LIST #rdlist, LAST #rdmid ] ] ], [ ORTH [ LB [ LIST #lbmid, LAST #lblast ], RB [ LIST #rbmid, LAST #rblast ], LD [ LIST #ldmid, LAST #ldlast ], RD [ LIST #rdmid, LAST #rdlast ] ] ] > ]. #| basic_np_name_cmpnd_phr :+ perimeter_delims_phrase. basic_n_v-or-adj_cmpnd_phr :+ perimeter_delims_phrase. adj_v_cmpnd_phr :+ perimeter_delims_phrase. binary_measure_np_phrase :+ perimeter_delims_phrase. num_noun_sequence_phrase :+ perimeter_delims_phrase. |# basic_basic_n_v-or-adj_cmpnd_phr :+ no_inner_delim_phrase. adj_v_cmpnd_phr :+ no_inner_delim_phrase. adv_v_cmpnd_phr :+ no_inner_delim_phrase. num_noun_sequence_phrase :+ no_inner_delim_phrase. ;; Note: Also added constraints in roots.tdl, which cannot be included here.