;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2012 -- 2012 Stephan Oepen (oe@ifi.uio.no); ;;; see `LICENSE' for conditions. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; an experiment: template-based named entity detection, where we conjecture ;;; there is no ambiguity about the correct analysis of a sequence of tokens. ;;; presumably, we will be conservative about putting entries on this list, to ;;; stay clear of problems like: [when i met] |Dan| |Flickinger| [called.] ;;; ;;; the NER rules use the bracketing machinery to fully determine the internal ;;; structure of the phrase; furthermore, they require that all components be ;;; analyzed in terms of generic lexical entries, i.e. fully vanilla proper ;;; nouns. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; dow_jones_industrial_average_tmr := four_four_ner_tmt & [ +INPUT < [ +FORM "Dow" ], [ +FORM "Jones" ], [ +FORM "Industrial" ], [ +FORM "Average" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm, npnm, npnm > ] ], [ +TRAIT [ +LB <>, +RB < npnm > ] ], [ +TRAIT [ +LB <>, +RB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. dow_jones_tmr := two_two_ner_tmt & [ +INPUT < [ +FORM "Dow" ], [ +FORM "Jones" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. wall_street_journal_tmr := three_three_ner_tmt & [ +INPUT < [ +FORM "Wall" ], [ +FORM "Street" ], [ +FORM "Journal" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm, npnm > ] ], [ +TRAIT [ +LB <>, +RB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. securities_and_exchange_commission_tmr := four_four_ner_tmt & [ +INPUT < [ +FORM "Securities" ], [ +FORM "and" ], [ +FORM "Exchange" ], [ +FORM "Commission" ] >, +OUTPUT < [ +TRAIT [ +LB < ctnp, npnm > ] ], [ +TRAIT [ +LB < hmkn >, +RB <> ] ], [ +TRAIT [ +LB < >, +RB < hmkn, ctnp > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. new_york_stock_exchange_tmr := four_four_ner_tmt & [ +INPUT < [ +FORM "New" ], [ +FORM "York" ], [ +FORM ^[Ss]tock$ ], [ +FORM ^[Ee]xchange$ ] >, +OUTPUT < [ +TRAIT [ +LB < npnm, npn > ] ], [ +TRAIT [ +LB <>, +RB < npnm > ] ], [ +TRAIT [ +LB < nnc >, +RB < > ] ], [ +TRAIT [ +RB < nnc, npn > ] ] > ]. new_york_tmr := two_two_ner_tmt & [ +INPUT < [ +FORM "New" ], [ +FORM "York" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. new_mexico_tmr := two_two_ner_tmt & [ +INPUT < [ +FORM "New" ], [ +FORM "Mexico" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. new_jersey_tmr := two_two_ner_tmt & [ +INPUT < [ +FORM "New" ], [ +FORM "Jersey" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ]. new_hampshire_tmr := two_two_ner_tmt & [ +INPUT < [ +FORM "New" ], [ +FORM "Hampshire" ] >, +OUTPUT < [ +TRAIT [ +LB < npnm > ] ], [ +TRAIT [ +RB < npnm > ] ] > ].