;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2009 -- 2011 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;

;;
;; now, with +CLASS information available, optionally make any token a proper
;; NE that is (a) capitalized and not initial, (b) spelled in mixed case
;; (|LinGO|), or (c) initial all-caps (a sub-set of capitalized).  note that
;; we want these rules to also fire on single-character tokens, as e.g. in
;; |the A Team| or |the I and J columns| (dan may tame the |I| proper name by
;; adding a non-nominative native lexical entry for it).
;;
;; _fix_me_
;; the ERG lexicon includes a few entries (e.g. titles like |Mr.| and |Jr.|)
;; with capitalized orthography.  currently capitalized NEs are about the only
;; class of generics that can survive alongside a native entry (in the lexical
;; filtering phase), hence it might make sense to prune unwanted tokens here,
;; even though that means knowledge about the ERG lexicon is applied at token
;; mapping time already?                                       (23-jan-09; oe)
;;
;; come to think of it, i suspect the |I| special case mentioned above would
;; fall into this class too?                                    (6-aug-11; oe)
;;
capitalized_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & [ +INITIAL -, +CASE capitalized ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   

mixed_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & [ +CASE mixed ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   

upper_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & 
                      [ +INITIAL +, +CASE capitalized+upper ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   

;;;
;;; digit plus fraction: |2 3/8|, |-12 1/2|
;;;
;;; _fix_me_
;;; these are quite interesting: the grammar has a compositional analysis of
;;; the structure, hence there is no (longer any) need to match them as an NE
;;; (thus reducing the inventory of initial-to-internal tokenization changes).
;;; however, for improved efficiency, we would like to put a pair of brackets
;;; around the pair of tokens.  to make things more interesting, these are one
;;; of the few cases challenging the assumptions we would like to make about
;;; bracketings around hyphenated expressions, e.g. |2 1/2-year| or |2–3 1/2|.
;;; hence, we want to try and move internal brackets to the periphery.  to do
;;; this generically, i believe i need to convert +LB and +RB to difference
;;; lists, so i can actually append them.
;;;
digit_fraction_tmr := two_two_trait_tmt &
[ +INPUT < [ +CLASS card_ne, 
             +TRAIT [ +UW #uw1, +RB #rb1 ], +TICK - ],
           [ +CLASS frct_ne,
             +TRAIT [ +UW #uw2, +LB #lb2 ] ] >,
  +OUTPUT < [ +TRAIT [ +UW #uw1, +LB < hcp . #lb2 >, +RB <> ], +TICK + ],
            [ +TRAIT [ +UW #uw2, +LB <>, +RB < hcp . #rb1 > ] ] > ].

;;; Special case for digit |1| (e.g. |1 2/3|) which is a native lexical entry 
;;; since it has singular agreement
;;;
one_fraction_tmr := two_two_trait_tmt &
[ +INPUT < [ +FORM "1",
             +TRAIT [ +UW #uw1, +RB #rb1 ], +TICK - ],
           [ +CLASS frct_ne,
             +TRAIT [ +UW #uw2, +LB #lb2 ] ] >,
  +OUTPUT < [ +TRAIT [ +UW #uw1, +LB < hcp . #lb2 >, +RB <> ], +TICK + ],
            [ +TRAIT [ +UW #uw2, +LB <>, +RB < hcp . #rb1 > ] ] > ].

;;; Similarly, added bracket-leftward rule for |$20-per-share| where by
;;; now the affected tokens are |$ 20-|
;;;
curr_digit_tmr := two_two_trait_tmt &
[ +INPUT < [ +FORM ^((?:AU?|CA?|HK|NZ|US)?\$|S[Kk]r)$,
             +TRAIT [ +UW #uw1, +RB #rb1 ], +TICK - ],
           [ +CLASS card_ne,
             +TRAIT [ +UW #uw2, +LB < ctype . #lb2 >, +RB #rb2 ] ] >,
  +OUTPUT < [ +TRAIT [ +UW #uw1, +LB < ctype . #lb2 >, +RB <> ], +TICK + ],
            [ +TRAIT [ +UW #uw2, +LB <>, +RB #rb2 ] ] > ].

#|
curr_digit_tmr := bracket_move_left_tmr &
[ +CONTEXT < [ +CLASS card_ne ] >,
  +INPUT < [ +FORM ^((?:AU?|CA?|HK|NZ|US)?\$|S[Kk]r)$ ] > ].
|#