;;;-*- Mode: Lisp; Package: CGP -*- (in-package :cgp) #|| 1. N [noun] (i) Type: 1. Common 2. Proper (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Case: 1. Nominative 2. Genitive 3. Dative 4. Accusative 5. Vocative (v) Countability: 1. Countable 2. Mass (vi) Definiteness: 1. Definite 2. Indefinite 3. Unmarked [Danish] (ii) Gender: 4. Common [Danish, Dutch] (iv) Case: 6. Vocative 7. Indeclinable [Greek] 2. V [verb] (i) Person: 1. First 2. Second 3. Third (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Finiteness: 1. Finite 2. Non-finite (v) Verb form / Mood: 1. Indicative 2. Subjunctive 3. Imperative 4. Conditional 5. Infinitive 6. Participle 7. Gerund 8. Supine (vi) Tense: 1. Present 2. Imperfect 3. Future 4. Past (vii) Voice: 1. Active 2. Passive (viii) Status: 1. Main 2. Auxiliary (ix) Aspect: 1. Perfective 2. Imperfective (x) Separability: 1. Non-separable 2. Separable (xi) Reflexivity: 1. Reflexive 2. Non-reflexive (xii) Auxiliary: 1. Have 2. Be (viii) Status: 3. Semi-auxiliary (xiii) Aux.-function: 1. Primary 2. Modal [English] (v) Verb-form / Mood: 9. -Ing form [English] 3. AJ [adjective] (i) Degree: 1. Positive 2. Comparative 3. Superlative (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Case: 1. Nominative 2. Genitive 3. Dative 4. Accusative (v) Inflection-type: 1. Weak-Flection 2. Strong-Flection 3. Mixed (vi) Use: 1. Attributive 2. Predicative (vii) NP Function: 1. Premodifying 2. Postmodifying 3. Head-function (iv) Case: 5. Vocative 6. Indeclinable [Greek] 4. PD [pronoun/determiner] (i) Person: 1. First 2. Second 3. Third (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Possessive: 1. Singular 2. Plural (v) Case: 1. Nominative 2. Genitive 3. Dative 4. Accusative 5. Non-genitive 6. Oblique (vi) Category: 1. Pronoun 2. Determiner 3. Both (vii) Pron.-Type: 1. Demonstrative 2. Indefinite 3. Possessive 4. Int./Rel. 5. Pers./Refl. (viii) Det.-Type: 1. Demonstrative 2. Indefinite 3. Possessive 4. Int./Rel. 5. Partitive (ix) Special Pronoun Type: 1. Personal 2. Reflexive 3. Reciprocal (x) Wh-Type: 1. Interrogative 2. Relative 3. Exclamatory (xi) Politeness: 1. Polite 2. Familiar (ii) Gender: 4. Common [Danish] (v) Case: 7. Prepositional [Spanish] (xii) Strength 1. Weak 2. Strong [French, Dutch, Greek] 5. AT [article] (i) Article-Type: 1. Definite 2. Indefinite (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Case: 1. Nominative 2. Genitive 3. Dative 4. Accusative - (i) Article-Type: 3. Partitive [French] (ii) Gender: 4. Common [Danish] (iv) Case: 5. Vocative 6. Indeclinable [Greek] 6. AV [adverb] (i) Degree: 1. Positive 2. Comparative 3. Superlative (ii) Adverb-Type: 1. General 2. Degree (iii) Polarity: 1. Wh-type 2. Non-wh-type (iv) Wh-type: 1. Interrogative 2. Relative 3. Exclamatory (ii) Adverb-Type: 3. Particle 4. Pronominal [English, German, Dutch] 7. AP [adposition] (i) Type: 1. Preposition (i) Type: 2. Fused prep-art (i) Type: 3. Postposition 4. Circumposition [German, English] 8. C [conjunction] (i) Type: 1. Coordinating 2. Subordinating (ii) Coord-Type: 1. Simple 2. Correlative 3. Initial 4. Non-initial (iii) Subord.-type: 1. With-finite 2. With-infin. 3. Comparative [German] 9. NU [numeral] (i) Type: 1. Cardinal 2. Ordinal (ii) Gender: 1. Masculine 2. Feminine 3. Neuter (iii) Number: 1. Singular 2. Plural (iv) Case: 1. Nominative 2. Genitive 3. Dative 4. Accusative (v) Function: 1. Pronoun 2. Determiner 3. Adjective 10. I [interjection] 11. U [unique/unassigned] - - (i) Unique-type: 1. Infinitive marker [German zu, Danish at, Dutch, English] 2. Negative particle [English not, n't] 3. Existential marker [English there, Danish der] 4. Second negative particle [French pas] 5. Anticipatory er [Dutch] 6. Mediopassive voice marker se [Portuguese] 7. Preverbal particle [Greek] 12. R [residual] (i) Type: 1. Foreign word 2. Formula 3. Symbol 4. Acronym 5. Abbreviation 6. Unclassified (ii) Number: 1. Singular 2. Plural (iii) Gender: 1. Masculine 2. Feminine 3. Neuter 13. PU [punctuation] (i) 1. Period 2. Comma 3. Question mark ...etc. ... (i) 1. Sentence-final 2. Sentence-medial 3. Left-Parenthetical 4. Right-Parenthetical ||# (defun write-eagles-feature (stream features plist) (loop for (num f) on plist by #'cddr do (if (find f features) (write-string (string num) stream) (write-string "0" stream)))) (defun code-eagles-tagset (stream pos &rest features) (labels ((write-feature (plist) (let ((foundp nil)) (loop for (num f) on plist by #'cddr when (and (not foundp) (find f features)) do (write-string (string num) stream) (setf foundp t)) (unless foundp (write-string "0" stream))))) (case pos ;; 1. N [noun] (N (write-string "N" stream) ;; (i) Type: (write-feature '("1" Common "2" Proper)) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter "4" Common-gender)) ;; (iii) Number: (write-feature '("1" Singular "2" Plural)) ;; (iv) Case: (write-feature '("1" Nominative "2" Genitive "3" Dative "4" Accusative "5" Vocative "6" Indeclinable)) ;; (v) Countability: (write-feature '("1" Countable "2" Mass)) ;; (vi) Definiteness: (write-feature '("1" Definite "2" Indefinite "3" Unmarked))) ;; 2. V [verb] (V (write-string "V" stream) ;; (i) Person: (write-feature '("1" First "2" Second "3" Third)) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter)) ;; (iii) Number: (write-feature '("1" Singular "2" Plural)) ;; (iv) Finiteness: (write-feature '("1" Finite "2" Non-finite )) ;; (v) Verb form / Mood: (write-feature '("1" Indicative "2" Subjunctive "3" Imperative "4" Conditional "5" Infinitive "6" Participle "7" Gerund "8" Supine "9" ing-form)) ;; (vi) Tense: (write-feature '("1" Present "2" Imperfect "3" Future "4" Past )) ;; (vii) Voice: (write-feature '("1" Active "2" Passive )) ;; (viii) Status: (write-feature '("1" Main "2" Auxiliary)) ;; (ix) Aspect: (write-feature '("1" Perfective "2" Imperfective)) ;; (x) Separability: (write-feature '("1" Non-separable "2" Separable)) ;; (xi) Reflexivity: (write-feature '("1" Reflexive "2" Non-reflexive)) ;; (xii) Auxiliary: (write-feature '("1" Have "2" Be)) ;; (viii) Status: (write-feature '("3" Semi-auxiliary)) ;; (xiii) Aux.-function: (write-feature '("1" Primary "2" Modal))) ;; [English] ;; 3. AJ [adjective] (AJ (write-string "AJ" stream) ;; (i) Degree: (write-feature '("1" Positive "2" Comparative "3" Superlative )) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter )) ;; (iii) Number: (write-feature '("1" Singular "2" Plural )) ;; (iv) Case: (write-feature '("1" Nominative "2" Genitive "3" Dative "4" Accusative "5" Vocative "6" Indeclinable)) ;; (v) Inflection-type: (write-feature '("1" Weak-Flection "2" Strong-Flection "3" Mixed )) ;; (vi) Use: (write-feature '("1" Attributive "2" Predicative )) ;; (vii) NP Function: (write-feature '("1" Premodifying "2" Postmodifying "3" Head-function))) ;; 4. PD [pronoun/determiner] (PD (write-string "PD" stream) ;; (i) Person: (write-feature '("1" First "2" Second "3" Third )) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter "4" Common)) ;; (iii) Number: (write-feature '("1" Singular "2" Plural )) ;; (iv) Possessive: (write-feature '("1" Singular "2" Plural )) ;; (v) Case: (write-feature '("1" Nominative "2" Genitive "3" Dative "4" Accusative "5" Non-genitive "6" Oblique "7" Prepositional)) ;; (vi) Category: (write-feature '("1" Pronoun "2" Determiner "3" Both)) ;; (vii) Pron.-Type: (write-feature '("1" Demonstrative "2" Indefinite "3" Possessive "4" Int./Rel. "5" Pers/Refl)) ;; (viii) Det.-Type: (write-feature '("1" Demonstrative "2" Indefinite "3" Possessive "4" Int/Rel "5" Partitive)) ;; (ix) Special Pronoun Type: (write-feature '("1" Personal "2" Reflexive "3" Reciprocal)) ;; (x) Wh-Type: (write-feature '("1" Interrogative "2" Relative "3" Exclamatory)) ;; (xi) Politeness: (write-feature '("1" Polite "2" Familiar)) ;; (xii) Strength: (write-feature '("1" Weak "2" Strong))) ;; [French, Dutch, Greek] ;; 5. AT [article] (AT (write-string "AT" stream) ;; (i) Article-Type: (write-feature '("1" Definite "2" Indefinite "3" Partitive)) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter "4" Common)) ;; (iii) Number: (write-feature '("1" Singular "2" Plural )) ;; (iv) Case: (write-feature '("1" Nominative "2" Genitive "3" Dative "4" Accusative "5" Vocative "6" Indeclinable))) ;; 6. AV [adverb] (AV (write-string "AV" stream) ;; (i) Degree: (write-feature '("1" Positive "2" Comparative "3" Superlative)) ;; (ii) Adverb-Type: (write-feature '("1" General "2" Degree "3" Particle "4" Pronominal)) ;; (iii) Polarity: (write-feature '("1" Wh-type "2" Non-wh-type )) ;; (iv) Wh-type: (write-feature '("1" Interrogative "2" Relative "3" Exclamatory))) ;; 7. AP [adposition] (AP (write-string "AP" stream) ;; (i) Type: (write-feature '("1" Preposition "2" Fused-prep-art "3" Postposition "4" Circumposition))) ;; 8. C [conjunction] (C (write-string "C" stream) ;; (i) Type: (write-feature '("1" Coordinating "2" Subordinating)) ;; (ii) Coord-Type: (write-feature '("1" Simple "2" Correlative "3" Initial "4" Non-initial)) ;; (iii) Subord.-type: (write-feature '("1" With-finite "2" With-infin. "3" Comparative))) ;; 9. NU [numeral] (NU (write-string "NU" stream) ;; (i) Type: (write-feature '("1" Cardinal "2" Ordinal)) ;; (ii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter)) ;; (iii) Number: (write-feature '("1" Singular "2" Plural )) ;; (iv) Case: (write-feature '("1" Nominative "2" Genitive "3" Dative "4" Accusative )) ;; (v) Function: (write-feature '("1" Pronoun "2" Determiner "3" Adjective))) ;; 10. I [interjection] (I (write-string "I" stream)) ;; 11. U [unique/unassigned] (U (write-string "U" stream) ;; (i) Unique-type: (write-feature '("1" Infinitive-marker ;; [German zu, Danish at, Dutch, English] "2" Negative-particle ;; [English not, n't] "3" Existential-marker ;; [English there, Danish der] "4" Second-negative-particle ;; [French pas] "5" Anticipatory-er ;; [Dutch] "6" Mediopassive-voice-marker-se ;; [Portuguese] "7" Preverbal-particle))) ;; [Greek] ;; 12. R [residual] (R (write-string "R" stream) ;; (i) Type: (write-feature '("1" Foreign-word "2" Formula "3" Symbol "4" Acronym "5" Abbreviation "6" Unclassified)) ;; (ii) Number: (write-feature '("1" Singular "2" Plural)) ;; (iii) Gender: (write-feature '("1" Masculine "2" Feminine "3" Neuter))) ;; 13. PU [punctuation] (PU (write-string "PU" stream) ;; (i) "1" Period "2" Comma "3" Question mark ...etc. ... ;; (i) (write-feature '("1" Sentence-final "2" Sentence-medial "3" Left-Parenthetical "4" Right-Parenthetical)))))) ;; * A common noun, feminine, plural, countable, is represented: N122010 #+test (print (with-output-to-string (stream) (code-eagles-tagset stream 'N 'common 'feminine 'plural 'countable))) (defun cg-to-eagles-features (pos cg-features) (collecting (labels ((translate (plist) (loop for (cg-f eagles-f) on plist by #'cddr when (find cg-f cg-features) do (if (consp eagles-f) (collect-append eagles-f) (collect eagles-f))))) (case pos (subst (collect 'N) (translate '(ent singular fl plural mask masculine fem feminine nøyt neuter ub indefinite be definite appell common prop proper))) (adj (cond ((find ' cg-features) (collect 'NU) (collect 'ordinal) (translate '(ent singular fl plural mask masculine fem feminine nøyt neuter))) (t (collect 'AJ) (translate '(pos positive komp comparative sup superlative ent singular fl plural mask masculine fem feminine nøyt neuter ub indefinite be definite))))) (verb (collect 'V) (translate '(perf-part (participle past) inf infinitive pass passive pres present pret past imp imperative auxiliary auxiliary )) (unless (or (find ' cg-features) (find ' cg-features)) (collect 'main))) (det (cond ((find 'kvant cg-features) (collect 'NU) (collect 'cardinal)) (t (collect 'PD) (collect 'determiner))) (translate '(dem demonstrative mask masculine fem feminine nøyt neuter ent singular fl plural sp int/rel poss possessive))) (pron (collect 'PD) (translate '(pers (pers/refl personal) refl (pers/refl reflexive) res (pers/refl reciprocal) sp int/rel ;; hum \1 first \2 second \3 third nom nominative akk accusative mask masculine fem feminine nøyt neuter ent singular fl plural høflig polite))) (konj (collect 'C) (collect 'coordinating) ;;(translate '(clb ...)) ) (prep (collect 'AP) (collect 'preposition)) (adv (collect 'AV)) (clb (collect 'PU) (translate '( sentence-final sentence-final sentence-final sentence-final sentence-final ;; ?? sentence-medial))) ( (collect 'PU) (collect 'sentence-medial)) ( (collect 'PU) (collect 'sentence-medial)) (inf-merke (collect 'U) (collect 'inifinitive-marker)) (sbu (collect 'C) (collect 'subordinating)) (fork (collect 'R) (collect 'abbreviation) (translate '(ent singular fl plural mask masculine fem feminine nøyt neuter))) (symb (collect 'R) (collect 'symbol)) (ukjent (collect 'R) (collect 'foreign-word)) (interj (collect 'I)))))) #+test (print (cg-to-eagles-features 'verb '(perf-part))) #+test (print (cg-to-eagles-features 'subst '(mask appell ub ent))) #+old ;; used for tree-tagger pre-3.1 (defun penn-to-eagles-features (penn-tag) (case penn-tag (JJ '(AJ positive)) (JJR '(AJ comparative)) (JJS '(AJ superlative)) (RB '(AV)) (RBR '(AV comparative)) (RBS '(AV superlative)) (CD '(NU cardinal)) (CC '(C coordinating)) (DT '(PD determiner)) (EX '(U existential-marker)) (FW '(R foreign-word)) (UH '(I)) (LS '(R unclassified)) (MD '(V auxiliary modal)) (NN '(N singular common)) (NNS '(N plural common)) (NP '(N singular proper)) (NPS '(N plural proper)) (RP '(AV)) ;; ?? (PP '(PD pronoun pers/refl personal)) (PP$ '(PD pronoun possessive)) (WP$ '(PD pronoun int/rel)) (PDT '(PD determiner)) ;; predeterminer (IN '((AP preposition) (C subordinating))) (SYM '(R symbol)) (TO '((U inifinitive-marker) (AP preposition))) (VB '((V non-finite infinitive) (V finite imperative) (V finite subjunctive))) (VBD '(V finite past)) (VBG '((V non-finite gerund) (V non-finite participle present))) (VBN '(V non-finite participle past)) (VBP '(V finite present)) ;; non-3.sg (VBZ '(V finite present third singular)) (WDT '(PD determiner int/rel)) (WRB '(AV wh-type)))) ;; for tree-tagger 3.1: ;; From: http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html ;; The tagset used by the TreeTagger is a refinement of this tagset [sic: the Penn-Treebank tagset] ;; where the second letter of the verb part-of-speech tags ;; distinguishes between "be" verbs (B), "have" verbs (H) and other ;; verbs (V). ;; These verbs will in eagles be marked with auxilliary (defun penn-to-eagles-features (penn-tag) (case penn-tag (JJ '(AJ positive)) (JJR '(AJ comparative)) (JJS '(AJ superlative)) (RB '(AV)) (RBR '(AV comparative)) (RBS '(AV superlative)) (CD '(NU cardinal)) (CC '(C coordinating)) (DT '(PD determiner)) (EX '(U existential-marker)) (FW '(R foreign-word)) (UH '(I)) (LS '(R unclassified)) (MD '(V auxiliary modal)) (NN '(N singular common)) (NNS '(N plural common)) (NP '(N singular proper)) (NPS '(N plural proper)) (RP '(AV)) ;; ?? (PP '(PD pronoun pers/refl personal)) (PP$ '(PD pronoun possessive)) (WP$ '(PD pronoun int/rel)) (PDT '(PD determiner)) ;; predeterminer (IN '((AP preposition) (C subordinating))) (SYM '(R symbol)) (TO '((U inifinitive-marker) (AP preposition))) ;; Other verbs: (VV '((V non-finite infinitive) (V finite imperative) (V finite subjunctive))) (VVD '(V finite past)) (VVG '((V non-finite gerund) (V non-finite participle present))) (VVN '(V non-finite participle past)) (VVP '(V finite present)) ;; non-3.sg (VVZ '(V finite present third singular)) ;; 'Be' verbs and 'have' verbs (same as above, but includes 'auxiliary' attribute) ((VB VH) '((V auxiliary non-finite infinitive) (V auxiliary finite imperative) (V auxiliary finite subjunctive))) ((VBD VHD) '(V auxiliary finite past)) ((VBG VHG) '((V auxiliary non-finite gerund) (V auxiliary non-finite participle present))) ((VBN VHN) '(V auxiliary non-finite participle past)) ((VBP VHP) '(V auxiliary finite present)) ;; non-3.sg ((VBZ VHZ) '(V auxiliary finite present third singular)) (WDT '(PD determiner int/rel)) (WRB '(AV wh-type)))) ;;; EOF