# Last edited on 2000-02-05 00:37:29 by stolfi

# Probabilistic grammar for non-empty mantle+crust suffixes of words with
# non-empty cores.

# The "core" consists of gallows letters (incl. platform gallows).
# The "suffix" is anything that follows the core.

# Suffixes may be pure crust (S), or start with a mantle suffix (N1S)
# There is also a small number (1.8%) of `bad' suffixes with crust
# letters before mantles.

NS:
   8487 0.50506 N1S
   8008 0.47655 S
    309 0.01839 (NSBAD)

# Non-empty mantle+crust suffixes with non-empty mantle suffix.
#
# The distribution of crust suffixes after a non-empty mantle suffix
# seems to depend mainly on whether the latter ends with isolated "e"
# or not. The symbols NE and NX generate these two subsets.

N1S:
   4614 0.54168 NX.SX
     17 0.00200 NX
   3873 0.45468 NE.SE
     14 0.00164 NE

# The following non-terminal is provided for debugging, and is not part of the 
# suffix grammar.  It generates the distribution
# of non-empty crust suffixes for words with non-empty core,
# irrespective of the mantle suffix:

Sdb:
   8008 0.48639 S
   4597 0.27922 SX
   3859 0.23439 SE
    
# The following non-terminal is provided for debugging too.
# It generates the distribution of non-empty mantle suffixes
# for words with non-empty core.

Ndb: 
   4631 0.52464 NX
   3887 0.44035 NE
    309 0.03501 (NBAD)

# There are two sets of crust suffixes after a non-empty mantle suffix,
# NX-loving (S1) and NE-loving (S2).  All four combinations do occur, but
# in different proportions.

# Distribution of non-empty crust suffixes after NX mantle suffix:

SX:
   2661 0.57886 S1
   1936 0.42114 S2

# Distribution of non-empty crust suffixes after NE mantle suffix:

SE:
   2449 0.63627 S2
   1400 0.36373 S1

# Distribution of non-empty crust suffixes that follow the core
# directly without any mantle suffix:

S:
     80 0.01085 o
       
     85 0.01152 d.T4
     10 0.00136 d

      2 0.00027 ad
      9 0.00122 ad.T4

    214 0.02901 od.T4
     35 0.00475 od
       
     32 0.00434 l
      8 0.00108 l.T3

    723 0.09802 al
    192 0.02603 al.T3
      3 0.00041 oal
      2 0.00027 oal.T3

    601 0.08148 ol
    141 0.01912 ol.T3
      2 0.00027 ool
      0 0.00000 ool.T3
       
    785 0.10643 ar
     97 0.01315 ar.T3
    309 0.04189 or
     48 0.00651 or.T3
     10 0.00136 oar
      2 0.00027 oar.T3
      3 0.00041 oor
      0 0.00000 oor.T3
      1 0.00014 r
      1 0.00014 r.T3
       
      7 0.00095 s
      8 0.00108 s.T3
     10 0.00136 as
      2 0.00027 as.T3
     26 0.00352 os
      6 0.00081 os.T3
       
   1700 0.23048 y
     56 0.00759 y.T5
     20 0.00271 oy
      2 0.00027 oy.T5
       
   2100 0.28471 SAI
     22 0.00298 o.SAI
       
     11 0.00149 a
       
      3 0.00041 ay
      2 0.00027 iin
      4 0.00054 iir
      2 0.00027 il

# BUILDING BLOCKS:

NX:
   2456 0.53264 ee
   1738 0.37692 CH
    198 0.04294 e.ee
     82 0.01778 e.CH

    101 0.02190 CH.ee
     24 0.00520 ee.CH
     10 0.00217 CH.CH
      2 0.00043 eeee

NE:
   2743 0.71025 e
   1082 0.28017 CH.e
     37 0.00958 e.CH.e

CH: 
   2659 0.85581 ch
    448 0.14419 sh


# crust suffixes that follow a non-empty mantle suffix and are more
# frequent after NX than NE

S1:
   2200 0.61111 y
    700 0.19444 OR1
    314 0.08722 o
    128 0.03556 d
     97 0.02694 s
     44 0.01222 aiin
     27 0.00750 am
     19 0.00528 OR1.T2
     16 0.00444 oy
      9 0.00250 oar
      9 0.00250 odain
      9 0.00250 sy
      7 0.00194 r
      5 0.00139 n
      5 0.00139 odol
      4 0.00111 an
      4 0.00111 m
      3 0.00083 l

# Crust suffixes that follow a non-empty mantle suffix and are generally more
# frequent after NE than NX

S2:
   1700 0.47606 dy
    900 0.25203 OL1
    372 0.10417 ody
    102 0.02856 dar
     87 0.02436 OL1.T2
     58 0.01624 dal
     57 0.01596 od
     52 0.01456 daiin
     40 0.01120 odaiin
     34 0.00952 odar
     27 0.00756 dain
     26 0.00728 odal
     23 0.00644 dam
     21 0.00588 om
     15 0.00420 ain
     14 0.00392 dair
     12 0.00336 dor
     10 0.00280 dol
      5 0.00140 ochy
      4 0.00112 ddy
      4 0.00112 g
      4 0.00112 odair
      4 0.00112 odam

# Non-empty crust suffixes after (NX.OL1|NE.OR1)
T2:
     37 0.37755 y
     17 0.17347 dy
      9 0.09184 s
      7 0.07143 ol
      6 0.06122 aiin
      5 0.05102 or
      3 0.03061 d
      2 0.02041 ain
      2 0.02041 om
      2 0.02041 os
      1 0.01020 al
      1 0.01020 am
      1 0.01020 an
      1 0.01020 ar
      1 0.01020 g
      1 0.01020 l
      1 0.01020 o
      1 0.01020 ody

# Relative distributon of initial "ol", "al", "os", "as" in those
# crust suffixes that are more frequent after NE than after NX:
OL1:
    680 0.77982 ol
    110 0.12615 al
     78 0.08945 os
      4 0.00459 as

# Relative distribution of initial "ar", "or" in those
# crust suffixes that are more frequent after "NE"-type mantle suffix:

OR1:
    355 0.79065 or
     94 0.20935 ar

# Non-empty things that follow initial "al" or "ar" in pure-crust suffixes:

T3:
    148 0.31489 y
     74 0.15745 dy
     40 0.08511 aiin
     27 0.05745 or
     26 0.05532 ar
     22 0.04681 al
     21 0.04468 ol
     15 0.03191 am
     10 0.02128 d
     10 0.02128 s
      9 0.01915 ain
      9 0.01915 ody
      7 0.01489 air
      5 0.01064 dal
      5 0.01064 o
      4 0.00851 daiin
      4 0.00851 om
      4 0.00851 os
      3 0.00638 airy
      3 0.00638 arar
      3 0.00638 dam
      3 0.00638 m
      3 0.00638 ory
      3 0.00638 r
      3 0.00638 sy
      2 0.00426 a
      2 0.00426 od
      2 0.00426 oiin
      2 0.00426 raiin
      1 0.00213 g

# Non-empty things that may follow initial
# "y" or "oy" in pure-crust suffixes
T5:
     20 0.43478 dy
      9 0.19565 d
      7 0.15217 daiin
      3 0.06522 l
      2 0.04348 aiin
      2 0.04348 dal
      2 0.04348 r
      1 0.02174 dain

# Pure-crust suffixes that start with [ao]i+[mngr] or [ao][mng]
SAI:
   1117 0.49034 aiin
    654 0.28709 ain
    204 0.08955 am
    135 0.05926 air
     37 0.01624 oiin
     30 0.01317 an
     25 0.01097 om
     22 0.00966 aiir
     12 0.00527 aiiin
     11 0.00483 aim
      6 0.00263 ail
      4 0.00176 airam
      3 0.00132 ai
      3 0.00132 airar
      3 0.00132 airody
      3 0.00132 ais
      3 0.00132 oiiin

# Non-empty things that can follow initial "od" or "ad" in
# pure crust suffixes:
T4:
    190 0.63973 y
     39 0.13131 aiin
     19 0.06397 ar
     16 0.05387 al
      8 0.02694 ain
      5 0.01684 ol
      4 0.01347 air
      4 0.01347 aly
      3 0.01010 aiir
      3 0.01010 an
      2 0.00673 a
      2 0.00673 am
      2 0.00673 or