;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2009 -- 2018 Stephan Oepen (oe@ifi.uio.no); 
;;; see `LICENSE' for conditions.
;;;

;;
;; now, with +CLASS information available, optionally make any token a proper
;; NE that is (a) capitalized and not initial, (b) spelled in mixed case
;; (|LinGO|), or (c) initial all-caps (a sub-set of capitalized).  note that
;; we want these rules to also fire on single-character tokens, as e.g. in
;; |the A Team| or |the I and J columns| (dan may tame the |I| proper name by
;; adding a non-nominative native lexical entry for it).
;;
;; _fix_me_
;; the ERG lexicon includes a few entries (e.g. titles like |Mr.| and |Jr.|)
;; with capitalized orthography.  currently capitalized NEs are about the only
;; class of generics that can survive alongside a native entry (in the lexical
;; filtering phase), hence it might make sense to prune unwanted tokens here,
;; even though that means knowledge about the ERG lexicon is applied at token
;; mapping time already?                                       (23-jan-09; oe)
;;
;; come to think of it, i suspect the |I| special case mentioned above would
;; fall into this class too?                                    (6-aug-11; oe)
;;

capitalized_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & [ +INITIAL -, +CASE capitalized ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   

;; DPF 2020-06-01 - We don't want +INITIAL + to remain on output, since we
;; want generic names to be able to occur in initial position, and in the
;; educ version of the grammar this +INITIAL property is handled by a special
;; rule that for some reason does not want to apply to generic names.
#|
mixed_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & [ +CASE mixed ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   

upper_name_tmr := add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & 
                      [ +INITIAL +, +CASE capitalized+upper ] ] >,
  +OUTPUT < [ +CLASS proper_ne ] > ].   
|#
mixed_name_tmr := basic_add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & [ +CASE mixed ] ] >,
  +OUTPUT < [ +TRAIT.+UW +, +CLASS proper_ne ] > ].   

upper_name_tmr := basic_add_ne_tmt &
[ +CONTEXT < [ +CLASS alphanumeric & 
                      [ +INITIAL +, +CASE capitalized+upper ] ] >,
  +OUTPUT < [ +TRAIT.+UW +, +CLASS proper_ne ] > ].   

;;;
;;; digit plus fraction: |2 3/8|, |-12 1/2|
;;;
;;; _fix_me_
;;; these are quite interesting: the grammar has a compositional analysis of
;;; the structure, hence there is no (longer any) need to match them as an NE
;;; (thus reducing the inventory of initial-to-internal tokenization changes).
;;; however, for improved efficiency, we would like to put a pair of brackets
;;; around the pair of tokens.  to make things more interesting, these are one
;;; of the few cases challenging the assumptions we would like to make about
;;; bracketings around hyphenated expressions, e.g. |2 1/2-year| or |2–3 1/2|.
;;; hence, we want to try and move internal brackets to the periphery.  to do
;;; this generically, i believe i need to convert +LB and +RB to difference
;;; lists, so i can actually append them.
;;;
;;; we no longer believe this pattern to be universally valid, so discard.
;;;

#|
;;; Similarly, added bracket-leftward rule for |$20-per-share| where by
;;; now the affected tokens are |$ 20-|
;;; DPF 2017-11-20 - But these two are now handled instead by leaving a
;;; single token in the chart for the whole hyphenated expression (in addition
;;; to the split-up tokens), so this rule is not needed either.
;;;
curr_digit_tmr := two_two_trait_tmt &
[ +INPUT < [ +FORM ^((?:AU?|CA?|HK|NZ|US)?\$|S[Kk]r)$,
             +TRAIT [ +UW #uw1, +RB #rb1 ], +TICK - ],
           [ +CLASS card_ne,
             +TRAIT [ +UW #uw2, +LB < ctype . #lb2 >, +RB #rb2 ] ] >,
  +OUTPUT < [ +TRAIT [ +UW #uw1, +LB < ctype . #lb2 >, +RB <> ], +TICK + ],
            [ +TRAIT [ +UW #uw2, +LB <>, +RB #rb2 ] ] > ].

curr_digit_tmr := bracket_move_left_tmr &
[ +CONTEXT < [ +CLASS card_ne ] >,
  +INPUT < [ +FORM ^((?:AU?|CA?|HK|NZ|US)?\$|S[Kk]r)$ ] > ].
|#