;    MULTEXT morphology tool
;    English morphology description for mmorph
;    Version 2.3, October 1995
;    Copyright (c) 1994,1995 ISSCO/SUISSETRA, Geneva, Switzerland

;; Rules for English inflection
;; Author: Graham Russell <russell@divsun.unige.ch>
;; Additions by: Susan Armstrong <susan@divsun.unige.ch>
;;
;; GJR ISSCO 20-1-95
;;
;; SA ISSCO last change 16-5-95
;;---------------------------------------------------------------

;; Declare the surface and lexical alphabets.  

@ Alphabets

lexical : a b c d e f g h i j k l m n o p q r s t u v w x y z 
	  ; complex lexical symbols
          u_a u_e u_i u_o u_u x_o y_o ch qu 
	  "-" "'" "/" "." "_" "$" "%"
	  0 1 2 3 4 5 6 7 8 9 
	                
	  A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

surface : a b c d e f g h i j k l m n o p q r s t u v w x y z 
          "-" "'" "/" "." "_" "$" "%"
	  0 1 2 3 4 5 6 7 8 9 
	                
	  A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

;; We could have a spelling rule that allowed e.g. lexical ""
;; to match both surface "" and surface "a".

;;---------------------------------------------------------------
;; Declare the attributes we'll be using, together with their 
;; possible values.

@ Attributes

;; NUMber: SinGular and PLural
;;
num  : sg pl

;; PERson: 1st, 2nd, 3rd
;;
per  : 1 2 3

;; GENder: masculine, feminine, neuter
;;
gen  : m f n
    
;; VFM (Verb ForM) values:
;; bse  - "base" (read, eat, walk, etc.)
;; prp  - "present participle" (reading, eating, walking, etc.)
;; psp  - "past participle" (read, ate, walked, etc.)
;; pres - "present finite" (read, reads, eat, eats, etc.)
;; past - "past finite" (read, ate, walked, etc.)
;;
; vfm  : pres past bse psp prp 
; vfm  : fin bse psp prp 
vfm  : fin pres past bse psp prp 

tns  : pres past

;; DEGree of adjectives: BaSE, COMParative, and SUPerlative
;;
deg  : pos comp sup

;; INFLectable (i.e. complete word)?
;;
infl : yes no

;; Classification of (ir)regular items.
;;
;; Verbs:                              
;; r   regular: past tense in -(e)d - past participle in -(e)d
;; i1  regular past tense, irregular past participle
;; i2  irregular past tense and past participle
;; i3  dual form past tense and past participle
;;
;; Nouns:
;; r   regular plural in -(e)s
;; i   irregular/non-existent plural
;;
;; Adjectives:
;; r   comparative/superlative in -(e)r/-(e)st, adverb in -ly
;; r1  comparative/superlative in -(e)r/-(e)st, adverb in -ally (none?)
;; r2  comparative/superlative in -(e)r/-(e)st, no -ly adverb
;; i   no lexical comparative/superlative, adverb in -ly
;; i1  no lexical comparative/superlative, adverb in -ally
;; i2  no lexical comparative/superlative, no -(al)ly adverb
r  : r r1 r2 i1 i2 i3 i

;; TYP is where we put things we don't know what to call...
;; SA - added dummy to get any disjunction to print out
typ : c p p1 p2 v a m per ref gen poss sub crd ind def ord card cmp dummy

;; type of complement for a subordinating conjunction:
;; finite (tensed sentence), infinitival, subjunctive, obligatory "that"
ctyp: f i s t

;; position of a coordinating conjunction:
;; initial (both, neither, either), medial (and, but, or, nor)
cpos: i m

case: nom acc

;; number, person gender of "possessor":
;;
p_num: sg pl
p_per: 1 2 3
p_gen: m f n

;; "function" of an adverb:
;; modifier (quickly, soon,...), specifier (very, too, how,...)
fn  : mod spe

;; q: questions only; r: relatives only (q|r for both, obviously)
wh  : no q r

;; POSition of a coordinating conjunction
pos : pre post

;; all types need to be declared with at least one attribute, 
;; which must have at least one value.  Use this when they are
;; not needed in the description
dummy : perhaps

;; TEMPORARY NEW ATTRIBUTES FOR FULL CORPUS COVERAGE
;; SA - ADDED char for single character symbols, acro(nym), code, for(eign)
;; and "s" for the possessive "'s" marker
;; ttyp for temporary type
ttyp : char acro code for s

;;---------------------------------------------------------------
;; Declare the "types" we'll be using, together with their 
;; attributes.  Attributes following "|" won't be printed out
;; SA added a few new (perhaps temporary) attributes 
@ Types

N	:   num typ gen | r
A	:   deg | r
; V	:   vfm num per typ | infl r
V	:   tns vfm num per typ | r
Adv	:   deg fn wh
nsuf	:   num
adjsuf	:   deg 
; vsuf	:   num per vfm r
vsuf	:   tns num per vfm | r
advsuf  :   r
Det     :   typ wh num p_num p_per p_gen
C	:   typ ctyp cpos
Pro	:   typ wh per num gen case p_num p_gen p_per
Num	:   typ
Adp	:   pos
;; 
;; SA - new
;;
ABBR	: ttyp
POSS	: ttyp
MISC	: ttyp
SYMB	: ttyp
;;---------------------------------------------------------------
;; Word-structure rules.

@ Grammar

;; Goal rules define what counts as a good complete word:
;; 
GoalN:	    N[]
GoalA:	    A[]
GoalV:	    V[]
GoalAdv:    Adv[]
;; No other rules for subsequent categories:
GoalC:	    C[]
GoalPro:    Pro[]
GoalDet:    Det[]
GoalNum:    Num[]
GoalAdp:    Adp[]
;; SA added some new "words"
GoalABBR:	ABBR[]
GoalPOS:	POSS[]
GoalMISC:	MISC[]
GoalSYMB:	SYMB[]

;;
;; Plural nouns.  The RHS noun feature structure will not
;; unify with lexical entries marked as 'r=i', so nouns
;; with irregular or non-existent plurals are not accepted.
;;
NPL : N[num=pl typ=c gen=$g] 
	<- N[num=sg typ=c gen=$g r=r] 
	   nsuf[num=pl]

;; affixes for this rule:
;;
N.plural	: "s"	    nsuf[num=pl]

;;
;; Comparative and superlative adjectives.  The value of 'deg' 
;; in the LHS and the suffix is unified  - and restricted to 
;; 'comp' or 'sup'
;; 
ADJ1 : A[deg=$deg=comp|sup] 
	<- A[deg=pos r=r|r2] 
	   adjsuf[deg=$deg]

ADJ.comparative :   "er"   adjsuf[deg=comp]
ADJ.superlative :   "est"  adjsuf[deg=sup]

;; Adverb from adjective.  The 'r' value in the stem controls 
;; whether this rule can apply, and if it does which suffix is
;; involved.  
ADV : Adv[fn=mod deg=pos wh=no]
	<- A[deg=pos r=$r]
	   advsuf[r=$r]
ADV.ly1 : "ly"	    advsuf[r=r|i]
ADV.ly2 : "ally"    advsuf[r=r1|i1]

;;
;; Various verb forms.  Again we restrict application by referring
;; to the value of 'r'.
;;
; VB1 : V[vfm=pres num=sg per=3 typ=v]
; 	<- V[vfm=bse typ=v]
; 	   vsuf[vfm=pres]
VB1 : V[vfm=fin tns=$tns num=$num per=$per typ=v]
	<- V[vfm=bse typ=v r=$r]
	   vsuf[vfm=fin tns=$tns num=$num per=$per r=$r]

; VB2 : V[vfm=$vfm typ=v]
; 	<- V[vfm=bse typ=v r=$r]
; 	   vsuf[vfm=$vfm!=pres r=$r]
VB2 : V[vfm=$vfm typ=v]
	<- V[vfm=bse typ=v r=$r]
	   vsuf[vfm=$vfm=prp|psp r=$r]

;; affixes
; VB.pres	    :	"s"	vsuf[vfm=pres]
; VB.past.reg :	"ed"	vsuf[vfm=psp|past r=r|i3]
; VB.past.i1  :	"ed"	vsuf[vfm=past r=i1]
VB.pres	    :	"s"	vsuf[vfm=fin tns=pres per=3 num=sg]
VB.prp	    :	"ing"	vsuf[vfm=prp]
VB.psp	    :	"ed"	vsuf[vfm=psp r=r|i3]
VB.pfin	    :	"ed"	vsuf[vfm=fin tns=past r=r|i1]

;;===============================================================
;; 
;; Spelling Rules
;; 
;;---------------------------------------------------------------
;; Define character classes and give them names.

@ Classes
        
CO:     b c d f g h j k l m n p q r s t v w x z
SC:     s c

;;---------------------------------------------------------------
;; Define names for sets of pairs of characters.  
;; "X/Y" matches X in surface string and Y in lexical string.
;; "<X Y>/Z" matches surface string XY and lexical string Y.
;; "<>" matches the empty string.

@ Pairs
            
l1_s2_SZ:   <s s e>/s <z z e>/z

l1_s2:      <b b>/b <c k>/c <d d>/d <g g>/g <k k>/k <l l>/l 
            <m m>/m <n n>/n <p p>/p <r r>/r <t t>/t <v v>/v

SXZ:        s/s x/x z/z 

;; We can "inherit" from other pair sets, provided that they have 
;; already been defined (i.e. before the compiler sees this):
I_YSXZ:     i/y SXZ

EI:         e/e i/i

;; Plain vowel character matches, inherited by "VV":
V_no_u_V:   a/a e/e i/i o/o u/u o/x_o o/y_o

;; Lexical unstressed vowel characters matched with surface plain:
lUnStr_sPlain:   a/u_a e/u_e i/u_i o/u_o u/u_u

;; Complex lexical symbols corresponding to surface character
;; sequences:
lComplex_sSimple:   lUnStr_sPlain <q u>/qu <c h>/ch

;; The general "VV" pair set includes the lexical "u_V" symbols,
;; representing the "unstressed Vs" used in rules lex_1_surf_2[ab].
VV:          lUnStr_sPlain V_no_u_V

;; Consonant pairs augmented with a match for <q u>/qu.  Lexical
;; "qu" behaves like a consonant and not a consonant-vowel 
;; sequence for the purpose of rules lex_1_surf_2[ab].
CC:         b/b c/c d/d f/f g/g h/h j/j k/k l/l m/m n/n p/p q/q 
            r/r s/s t/t v/v w/w x/x z/z ?/qu ?/ch

lO2_sOE:    <o e>/x_o <o e>/y_o

lO2_sO:	    o/x_o o/y_o

LE:	    e/e l/l

V_no_a:	    e/e i/i o/o u/u y/y

LNR:	    l/l n/n r/r

CC_no_LNR:  b/b c/c d/d f/f g/g h/h j/j k/k m/m p/p q/q s/s t/t 
	    v/v w/w x/x z/z ?/qu

l1_s2_no_c: <b b>/b <d d>/d <g g>/g <k k>/k <l l>/l <m m>/m <n n>/n 
	    <p p>/p <r r>/r <s s>/s <t t>/t <v v>/v <z z>/z

SS_or_VV:   VV s/s

VV_no_u:    a/a e/e i/i o/o o/x_o o/y_o

CC_no_q:    b/b c/c d/d f/f g/g h/h j/j k/k l/l m/m n/n p/p
            r/r s/s t/t v/v w/w x/x z/z

not_E:	    CC lUnStr_sPlain a/a i/i o/o u/u y/y
;;---------------------------------------------------------------
;; The spelling rules.
;; 
;; left_context - focus - right_context
;; "*" matches morpheme boundary
;; "?" matches anything

@ Spelling

; lex_1_surf_2a:
; ;; big+er->bigger, hop+ing->hopping, tap+ed->tapped, etc.
; ;; Preceding vowel pair must not be e/u_e - see lexical entries
; ;; for "offer" and "prefer"
;     <=>	    CC V_no_u_V - l1_s2 - * VV

lex_1_surf_2b:
;; bus+s->busses, fez+s->fezzes, quiz+s->quizzes, etc
    <=>	    CC V_no_u_V - l1_s2_SZ * - s/s

Gem_1:
    <=>	    CC V_no_u_V - l1_s2_no_c - * VV
Gem_2:
    <=>	    CC V_no_u_V - <c k>/c - * V_no_a
Gem_3:
    <=>	    CC V_no_u_V - <c k>/c - * a/a CC_no_LNR


lex_Odd_surf_Plain:
;; Complex lexical symbols match their associated surface sequences 
;; everywhere
    <=>	    - lComplex_sSimple - 


surfonly_E_1:
;; boss+s<->bosses, box+s<->boxes
    <=>	   SXZ * - e/<> - s/s

surfonly_E_2:
;; dish+s<->dishes, catch+s<->catches
    <=>	    SC h/h * - e/<> - s/s

surfonly_E_3:
;; potato+s<->potatoes, avocado+s<->avocadoes (e optional)
    =>	    CO - lO2_sOE - * s 

allow_lO2_sO:
;; avocado+s<-> avocados, echo<->echo
    =>	    - lO2_sO -

block_lO2_nosE:
;; echo+s<->echoes (e not optional)
    <=	    - <o e>/y_o - * s

; lexonly_E1:
; ;; large+est<->largest, agree+ed<->agreed
;     <=>	    - <>/e - * EI
lexonly_E1a:
;; large+est<->largest, agree+ed<->agreed
    <=>	  e/e  - <>/e - * e/e
lexonly_E1b:
;; large+est<->largest, agree+ed<->agreed
    <=>	   not_E - <>/e - * EI


;; Because of the behaviour of qu, we must split the next one into 
;; two parts, one for left contexts without a u/u pair, and one for
;; u/u preceded by a non-q: 
;; true+ly<->truly, but oblique+ly<->obliquely
lexonly_E2a:
    <=>	    VV_no_u - <>/e - * l/l
lexonly_E2b:
    <=>	   CC_no_q u/u - <>/e - * l/l


lex_Y_surf_I:
;; easy+er<->easier, carry+s<->carries, heavy+ly<->heavily
    <=>	    CO - i/y - * LE

lex_Y_surf_IE:
;; fry+s<->fries
    <=>	    CO - <i e>/y - * s/s

lex_I_surf_Y:
;; lie+ing<->lying, etc.
    <=>	    - y/i <>/e - * i/i

lex_LE_surf_0:
    <=>	    CC - <>/l <>/e - * l y

lex_only_L:
;; full+ly<->fully
    <=>	    l/l - <>/l - * l/l
;;---------------------------------------------------------------

@ Lexicon

#include "lex.proper"

#include "lex.exp.N"

#include "lex.exp.A"

#include "lex.exp.V"

#include "lex.exp.minor"

;;---------------------------------------------------------------
