;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; in January 2009, the ERG assumptions about handling of unknown words (in ;;; PET) changed fundamentally. this file preserves a number of PET settings ;;; that are obsolete in the new, chart mapping universe (which requires the ;;; use of suitable PET binaries, compiled off the `cm' SVN branch). there is ;;; no expectation, expressed or implied, that the settings below still make ;;; sense with the current version of the ERG. instead, to hopefully ease the ;;; transition, documenting which settings have been obsoleted might help in ;;; adapting external tools (preparing input to the ERG and PET). this file ;;; will disappear before too long. (21-jan-09; oe) ;;; ;; ;; the following two turn on (procedural) `characterization', i.e. code that ;; will destructively modify the AVMs of new edges, to record the surface ;; positions (as passed in from an external tokenizer) in each new relation. ;; i.e. those that have no characterization information yet. this mechanism ;; is brittle (both in the LKB and PET), and we hope to eventually replace it ;; in the new chart mapping universe: there, the grammarian has full control ;; over how to `pick up' information from input tokens, and characterization ;; can be accomplished without a specialized procedure `behind the scenes'. ;; mrs-cfrom-path := "SYNSEM.LOCAL.CONT.RELS.LIST.CFROM". mrs-cto-path := "SYNSEM.LOCAL.CONT.RELS.LIST.CTO". ;; ;; path to LABEL for relation-to-word mapping, absolute and within a relation ;; label-path := "SYNSEM.LKEYS.--KEYREL.WLINK". label-path-tail := "WLINK". ;;; ;;; generic lexical entries for unknown words: basically, for each unknown ;;; token in the input all generic entries are postulated. optionally, there ;;; are two devices to filter out generic entries: suffix-based and by virtue ;;; of POS tag information. generic entries that require a certain suffix ;;; (`generic-le-suffixes') only fire if the input form has the suffix. if ;;; the input word has one more more POS tags associated to it, these are ;;; looked up in the `posmapping' table: this table is a list of pairs (tag, ;;; gle) where `gle' is the name of one of the generic items in `generic-les'. ;;; a non-empty `posmapping' table will filter all generic entries that are not ;;; explicitly licensed by a POS tag. ;;; generic-lexentry-status-values := generic-lex-entry. ;;; ;;; some generic lexical entries require inflectional marking. this mechanism ;;; is a filter on which generic entries proposed by other means can survive: ;;; generic entries listed here will only be postulated if the required suffix ;;; can be matched against the input token. ;;; ;;; when using only generic entries licensed by a POS tag, the suffix filter ;;; really does not make a lot of sense anymore. (6-jun-03; oe) ;;; #| generic-le-suffixes := $generic_trans_verb_pres3sg "S" $generic_trans_verb_past "ED" $generic_trans_verb_psp "ED" $generic_trans_verb_prp "ING" $generic_pl_noun "S" ;; ;; when running without a POS tagger, effectively disable a few generics ;; $generic_adj_compar "_block_" $generic_adj_superl "_block_" . |# ;;; DPF 17-sept-07 ;;; Tried doing without the adjective, since TNT appears to mostly guess both ;;; an adjective and a noun, and our generic mass-count noun can almost always ;;; do the work of the adjective. This would avoid large amounts of spurious ;;; ambiguity for most occurrences of these pairs. ;;; But unfortunately TNT doesn't always guess both, so we need JJ when it's ;;; the only guess. Maybe we can effect this with the new token-mapping ;;; machinery ... posmapping := UpperAndLowerCase $genericname UpperAndLowerCaseInitial $genericname JJ $generic_adj JJR $generic_adj_compar JJS $generic_adj_superl CD $generic_number NN $generic_mass_count_noun NNS $generic_pl_noun NNPS $generic_pl_noun NNP $genericname FW $generic_mass_noun RB $generic_adverb VB $generic_trans_verb_bse VBD $generic_trans_verb_past VBG $generic_trans_verb_prp VBN $generic_trans_verb_psp VBP $generic_trans_verb_presn3sg VBZ $generic_trans_verb_pres3sg #| JJ $generic_adj JA $generic_adj JB $generic_adj JBR $generic_adj_compar JBT $generic_adj_superl JJT $generic_adj_superl NN $generic_mass_noun NN1 $generic_sg_noun NN2 $generic_pl_noun NP1 $genericname NP2 $genericname NNSB $generic_title_noun NNSB1 $generic_title_noun NNSB2 $generic_title_noun RR $generic_adverb VV0 $generic_trans_verb_bse VV0 $generic_trans_verb_presn3sg VVD $generic_trans_verb_past VVD $generic_trans_verb_psp VVN $generic_trans_verb_psp VVN $generic_trans_verb_past VVG $generic_trans_verb_prp VVZ $generic_trans_verb_pres3sg |# . ;;; ;;; the setting `pos-completion' enables an additional mechanism to do with ;;; processing of generic lexical entrie: whenever we receive POS information ;;; as part of the input, we check to see whether the built-in lexical entries ;;; suffice to satisfy the POS annotations: each lexical entry retrieved for an ;;; input token ;;; ;;; ;;; ;;; is mapped to an application-specific POS tag, using the `type-to-pos' map, ;;; and checking the type of each lexical entry for subsumption against the ;;; left-hand side of each `type-to-pos' rule. some or all POS annotations ;;; from the input may be `satisfied' under this mapping by built-in lexical ;;; entries, e.g. for the example above, there may be lexical entries whose ;;; type maps to `pos_1' and `pos_3'; unless all POS annotations are satisfied ;;; after all built-in lexical entries have been processed, the remaining POS ;;; categories are processed by the regular `posmapping' look-up. note that, ;;; as a side effect, an empty `type-to-pos' map will always result in having ;;; all generic lexical entries activated (modulo the filter described above), ;;; even for input tokens that were found in the native lexicon. ;;; #| pos-completion. type-to-pos := basic_noun_word NN basic_noun_word NNS basic_noun_word NNP basic_pronoun_word NN basic_pronoun_word NNS basic_pronoun_word NNP . |# ;; 'preprocessor': FSPP rule file preprocessor := "preprocessor.fsr". ; insert surface form of ersatzes at end of this path ersatz-carg-path := "SYNSEM.LKEYS.KEYREL.CARG". mrs-carg-path := "SYNSEM.LKEYS.KEYREL.CARG".