;;; -*- Mode: tdl; Coding: utf-8; -*- ;;; ;;; Copyright (c) 2009 -- 2010 Stephan Oepen (oe@ifi.uio.no); ;;; see ‘LICENSE’ for conditions. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; a second pass at lightweight NEs, now that we have further split up tokens ;;; at hyphens and dashes. ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; numerals, including some sub-sets (days of the month or years). in some of ;;; the rules |1| is deliberately excluded, because (unlike everyone else) it ;;; requires singular agreement. hence, we assume that all usages of |1| need ;;; be accounted for in the native lexicon, including as hours or days of the ;;; month. ;;; ;; ;; (candidate) days of the month: |2| to |9|, |10| to |29|, |30|, and |31| ;; card_or_dom_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^([2-9]|[1-2][0-9]|3[01])$ ] >, +OUTPUT < [ +CLASS card_or_dom_ne ] > ]. ;; ;; (candidate) years: |950|, |1805|, |1957|, |2005|, et al. ;; ;; DPF 2012-12-09 - Allow for trailing hyphen (or decimal point) in order to ;; get e.g. |1990-model car| ;; card_or_year_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^[12]?[0-9]{3}(\.|-)?$ ] >, +OUTPUT < [ +CLASS card_or_year_ne ] > ]. ;; ;; any sequence of digits, with optional sign and optional decimal point. to ;; accept measure phrases like |900-pound|, where at this point |900-| is one ;; token, allow an optional trailing hyphen. ;; card_ne_1_tmr := ne_tmt & [ +INPUT < [ +FORM ^[-+±~]?([2-9]|[1-9][0-9][0-9]*)(\.|-)?$ ] >, +OUTPUT < [ +CLASS card_ne ] > ]. ;; ;; floating point numbers, with optional sign and at least one decimal. again ;; for measures phrases, allow optional hyphen: |1.5-inch pianist|. ;; DPF 2012-05-27 - Generalized this class to also allow treatment of |8.10| ;; as a proper name, as in |Ubuntu 8.10|. ;; ;; DPF 2012-09-19 - In some contexts such as the WSJ, an expression like ;; |stocks rose 0.3 point| is accepted with a sub-one decimal value and a ;; singular measure noun. So we split the old rule into two, one for ;; decimal numbers 1.0 or greater (and the negative equivalent), and the ;; other for decimals with a 0 before the decimal. ;; card_ne_2_tmr := ne_tmt & [ +INPUT < [ +FORM ^[-+±~]?[1-9]+\.[0-9]+$ ] >, +OUTPUT < [ +CLASS card_or_proper_ne ] > ]. card_ne_3_tmr := ne_tmt & [ +INPUT < [ +FORM ^[-+±~]?[0]*\.[0-9]+$ ] >, +OUTPUT < [ +CLASS card_or_decimal_ne ] > ]. ;; ;; US-style or German separators, optional sign and decimals: e.g. |23,000.-| ;; card_ne_4_tmr := ne_tmt & [ +INPUT < [ +FORM ^[-+±]?[1-9][0-9]{0,2}([,.][0-9]{3})+([,.]([0-9]*|-))?$ ] >, +OUTPUT < [ +CLASS card_ne ] > ]. ;; ;; ordinals, mostly parallel to the numerals ;; ord_or_dom_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^([23]?(1st|2nd|3rd)|(2?[1-9]|1[04-9]|20|30|31)th)$ ] >, +OUTPUT < [ +CLASS ord_or_dom_ne ] > ]. ord_ne_tmr := ne_tmt & [ +INPUT < [ +FORM ^[0-9]*((^|[^1])(1st|2nd|3rd)|(11|12|13|[04-9])th)$ ] >, +OUTPUT < [ +CLASS ord_ne ] > ]. ;; ;; fractions, again, this time pulled apart from a hyphenated context ;; fraction_ne_2_tmr := ne_tmt & [ +INPUT < [ +FORM ^[0-9]+(\.[0-9]*)?/[1-9][0-9]*[-–]?$ ] >, +OUTPUT < [ +CLASS frct_ne ] > ]. ;; ;; _fix_me_ ;; another pass at the decades, for cases like |mid-1800s|; see ‘ne1.tdl’ for ;; the original rules. to clean these up, we would need something like the ;; ‘group call’ mechanism in REPP. (27-aug-11; oe) ;; decade_ne_3_tmr := ne_tmt & [ +INPUT < [ +FORM ^(?:1[0-9])?[0-9]0[sS]$ ] >, +OUTPUT < [ +CLASS plur_ne ] > ]. decade_ne_4_tmr := two_one_tmt & [ +INPUT < [ +FORM ^((?:1[0-9])?[0-9]0)$, +TRAIT #trait, +CLASS non_ne, +PRED #pred, +CARG #carg, +TNT.+MAIN #main ], [ +FORM ^(['’][sS])$, +CLASS non_ne ] >, +OUTPUT < [ +FORM "${I1:+FORM:1}${I2:+FORM:1}", +TRAIT #trait, +CLASS plur_ne, +PRED #pred, +CARG #carg, +TNT null_tnt & [ +MAIN #main ] ] > ].