Decided to create another error-tolerant encoding even more 
  "lossy" than HOP.  This one collapses FSG A with O, 
  R with 2, S with T.  Also ignore spaces (periods):
  
    --- fsg2ecc ------------------------
    #! /n/gnu/bin/gawk -f

    # Recoding an interlinear file from the FSG alphabet to 
    # my Super-Lossy Fault-Tolerant encoding

    BEGIN {
      print "# Output of fsg2ecc - Stolfi's Semi-Analytic Fault-Tolerant alphabet"
    }

    /^ *$/ { print; next }
    /^ *#/ { print; next }
    /^<[^>.;]*>/ { print; next }

    /^<[^>]*\.[^>]*;[A-Z]> / {
      curtxt = substr($0,20)

      # We discard  "%" and "!" since the conversion
      # will destroy synchronism anyway.
      gsub(/[%!]/, "", curtxt);

      # We also discard spaces ("." in the evt format),
      # since they are not reliable
      gsub(/[.]/, "", curtxt);

      # First, the conversion from FSG to JSA (Stolfi's super-analytic)
      gsub(/IIIK/, "iiiij",  curtxt);
      gsub(/IIIL/, "iiiiu",  curtxt);
      gsub(/IIIR/, "iiiis",  curtxt);
      gsub(/IIIE/, "iiiix",  curtxt);
      gsub(/IIE/,  "iiix",   curtxt);
      gsub(/IIR/,  "iiis",   curtxt);
      gsub(/IIK/,  "iiij",   curtxt);
      gsub(/HZ/,   "cqjc",   curtxt);
      gsub(/PZ/,   "cqgc",   curtxt);
      gsub(/DZ/,   "cljc",   curtxt);
      gsub(/FZ/,   "clgc",   curtxt);
      gsub(/IE/,   "iix",    curtxt);
      gsub(/IR/,   "iis",    curtxt);
      gsub(/IK/,   "iij",    curtxt);
      gsub(/2/,    "cs",     curtxt);
      gsub(/4/,    "q",      curtxt);
      gsub(/6/,    "cj",     curtxt);
      gsub(/7/,    "ig",     curtxt);
      gsub(/8/,    "cg",     curtxt);
      gsub(/A/,    "ci",     curtxt);
      gsub(/C/,    "c",      curtxt);
      gsub(/D/,    "lj",     curtxt);
      gsub(/E/,    "ix",     curtxt);
      gsub(/F/,    "lg",     curtxt);
      gsub(/G/,    "cy",     curtxt);
      gsub(/H/,    "qj",     curtxt);
      gsub(/I/,    "i",      curtxt);
      gsub(/K/,    "ij",     curtxt);
      gsub(/L/,    "iu",     curtxt);
      gsub(/M/,    "iiiu",   curtxt);
      gsub(/N/,    "iiu",    curtxt);
      gsub(/O/,    "o",      curtxt);
      gsub(/P/,    "qg",     curtxt);
      gsub(/R/,    "is",     curtxt);
      gsub(/S/,    "cc",     curtxt);  # Was "csc" in JSA
      gsub(/T/,    "cc",     curtxt);
      gsub(/V/,    "?",      curtxt);
      gsub(/Y/,    "?",      curtxt);

      # Now, the conversion from JSA to ECC:

      gsub(/[ql]j/, "H",     curtxt);
      gsub(/[ql]g/, "P",     curtxt);
      gsub(/ij/,    "k",     curtxt);
      gsub(/ii*x/,  "e",     curtxt);
      gsub(/is/,    "r",     curtxt);
      gsub(/iiu/,   "n",     curtxt);
      gsub(/y/,     "i",     curtxt);
      gsub(/ci/,    "a",     curtxt);
      gsub(/cg/,    "8",     curtxt);
      gsub(/cs/,    "r",     curtxt);
      gsub(/ii*r/,  "w",     curtxt);
      gsub(/i*n/,   "m",     curtxt);
      gsub(/a/,     "o",     curtxt);

      print (substr($0,1,19) curtxt);
      next
    }
    ------------------------------------
  
    cat bio-m-evt.evt \
      | fsg2ecc \
      > bio-m-ecc.evt
      
    cat bio-m-ecc.evt \
      | make-consensus-interlin \
      > bio-x-ecc.evt
  
    cat bio-x-ecc.evt \
      | egrep '^<.*;J> ' \
      | sed \
          -e 's/{[^}]*}//g' \
      > bio-j-ecc.evt

    extract-words-from-interlin \
        -chars "8coqHPemrwk" \
        bio-j-ecc.evt \
        bio-j-ecc

     lines   words     bytes file        
    ------ ------- --------- ------------
      1605    1605     35644 bio-j-ecc.wds
       767     767     33204 bio-j-ecc.dic
       333     333     13811 bio-j-ecc-gut.wds
       333     333     13811 bio-j-ecc-gut.dic
       840     840      2445 bio-j-ecc-fun.wds
         2       2         5 bio-j-ecc-fun.dic
       432     432     19388 bio-j-ecc-bad.wds
       432     432     19388 bio-j-ecc-bad.dic
       
  Here are the statistics.  Keep in mind that 
  spaces were deleted, and here " " means line break.

    Digraph counts:

           TT           8     c     o     q     H     P     e     m     r     w     k
        ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
          333     .    39    15    51    89    24    38    11     .    66     .     .
      8  1166     4     2    92  1052     9     2     .     4     .     1     .     .
      c  4351     1   909  2389   585     1   183    18   232     3    30     .     .
      o  3864   189   113   211   261   576   972    41   683   402   384    10    22
      q   728     .     .    10   718     .     .     .     .     .     .     .     .
      H  1347     .     2   853   484     .     .     .     5     1     2     .     .
      P   109     .     1    75    33     .     .     .     .     .     .     .     .
      e   958    64    67   360   224    29   162    10    18     .    24     .     .
      m   406    24    24   188   148    13     1     .     2     .     6     .     .
      r   517    31     9   153   302    11     3     2     3     .     3     .     .
      w    10     .     .     5     5     .     .     .     .     .     .     .     .
      k    22    20     .     .     1     .     .     .     .     .     1     .     .
        ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
    TOT 13811   333  1166  4351  3864   728  1347   109   958   406   517    10    22

    Next-symbol probability (× 99):

             8  c  o  q  H  P  e  m  r  w  k
         -- -- -- -- -- -- -- -- -- -- -- --
          . 12  4 15 26  7 11  3  . 20  .  .
      c   . 21 54 13  .  4  .  5  .  1  .  .
      o   5  3  5  7 15 25  1 17 10 10  .  1
      8   .  .  8 89  1  .  .  .  .  .  .  .
      q   .  .  1 98  .  .  .  .  .  .  .  .
      H   .  . 63 36  .  .  .  .  .  .  .  .
      P   .  1 68 30  .  .  .  .  .  .  .  .
      w   .  . 50 50  .  .  .  .  .  .  .  .
      e   7  7 37 23  3 17  1  2  .  2  .  .
      m   6  6 46 36  3  .  .  .  .  1  .  .
      r   6  2 29 58  2  1  .  1  .  1  .  .
      k  90  .  .  5  .  .  .  .  .  5  .  .
         -- -- -- -- -- -- -- -- -- -- -- --
    TOT   2  8 31 28  5 10  1  7  3  4  0  0
    
  Note that "e", "m", and "r" have become more similar.
  It is curious that "8" and "q" have very similar 
  next-symbol statistics.  Also curious that P and H 
  become identical...

    Previous-symbol probability (× 99):

        TT     w  k  m  e  H  P  q  r  8  c  o
        -- -- -- -- -- -- -- -- -- -- -- -- --
         2  .  .  .  .  1  2 35 12 13  3  .  1
      o 28 56 99 99 98 71 71 37 78 74 10  5  7
      c 31  .  .  .  1 24 13 16  .  6 77 54 15
      8  8  1  .  .  .  .  .  .  1  .  .  2 27
      q  5  .  .  .  .  .  .  .  .  .  .  . 18
      H 10  .  .  .  .  1  .  .  .  .  . 19 12
      P  1  .  .  .  .  .  .  .  .  .  .  2  1
      e  7 19  .  .  .  2 12  9  4  5  6  8  6
      m  3  7  .  .  .  .  .  .  2  1  2  4  4
      r  4  9  .  .  .  .  .  2  1  1  1  3  8
      w  0  .  .  .  .  .  .  .  .  .  .  .  .
      k  0  6  .  .  .  .  .  .  .  .  .  .  .
        -- -- -- -- -- -- -- -- -- -- -- -- --

    Symbol entropy: 2.693
    
  An encouraging sign: with this encoding, all labels in f77v can be found in
  the text of the bio section, hand B.

  Let's try to discern word/syllabe boundaries from the 
  line breaks, in this reduced encoding:

    cat bio-j-ecc-gut.wds \
      | tr -d '\012' \
      | enum-ngraphs -v n=2 \
      | egrep -v '\*' \
      > .bio-j-ecc-tt-2.grm
      
    cat .bio-j-ecc-tt-2.grm \
      | sed -e 's/^\(.\)\(.\)$/\1:\2/g' \
      > .bio-j-ecc-tt-1-1.grm

    cat .bio-j-ecc-tt-1-1.grm \
      | sort | uniq -c | expand \
      | compute-freqs \
      > .bio-j-ecc-tt-1-1.frq
     
  Digraph frequencies around line breaks, ignoring spaces:

    cat bio-j-ecc-gut.wds \
      | sed -e 's/^\(..\).*\(..\)$/\1\2/g' \
      | tr -s '\012' ':' \
      | enum-ngraphs -v n=3 \
      | egrep -v '\*' \
      | egrep '^.:.$' \
      > .bio-j-ecc-nl-1-1.grm
  
    cat .bio-j-ecc-nl-1-1.grm \
      | sort | uniq -c | expand \
      | compute-freqs \
      > .bio-j-ecc-nl-1-1.frq

    compare-freqs \
        .bio-j-ecc-tt-1-1.frq \
        .bio-j-ecc-nl-1-1.frq \
      | compute-count-ratio \
          -v nmin=10 -v mw=10 -v mc=40 \
      | sort +0.0 -0.2r +4 -5nr \
      > .bio-j-ecc-tt-nl-1-1.cmp
     
    cat .bio-j-ecc-tt-nl-1-1.cmp \
      | print-pattern-classes \
          -v rowchars='co8qHPwemrk' \
          -v colchars='co8qHPwemrk'
          
    Pattern classes:

          c  o  8  q  H  P  w  e  m  r  k
         -- -- -- -- -- -- -- -- -- -- --
     c | -- -- -- -? -- --  . -- -? --  .
     q | -- --  .  .  .  .  .  .  .  .  .
     H | -- -- -?  .  .  .  . -? -? -?  .
     P | -- -- -?  .  .  .  .  .  .  .  .
     w | -? -?  .  .  .  .  .  .  .  .  .
     8 | -- -- -? || -? -?  . -?  . -?  .
     o | -- || || ++ -- || -- -- -- ++ --
     e | -- -- || || -- ||  . ||  . ||  .
     m | -- -- || || -? +?  . +?  . ##  .
     r | -- -- || || +? +?  . -?  . +?  .
     k | -? +? +? +?  . +?  . -?  . +?  .

  Fixing the count ratio and classification as in previous manual
  classification experiment:
  
    --- compute-count-ratio-new ------------------------
    #! /n/gnu/bin/gawk -f
    # 
    # Usage: "$0 -v nmin=NNN -v mw=N.NNN mc=N.NNN
    #
    # Computes the ratio of two counts for a list of patterns.
    # The input must be the output of compare-freqs, in the 
    # format " NT FT  NL FL  patt", where "NT","NL" are
    # two counts, and "FT","FL" the corresponding relative 
    # frequencies.  The output will have the format
    # " NT FT  NL FL  rat mk patt" where "rat=(NL)/(NT+2)".
    #
    # The "mk" field is a class code, assigned based on the 
    # ratio and its certainty, and the parameters "mw", "mc",
    # and "nmin", as follows:

    function classify(NT, NL, ratio, nmin, mw, mc)
    {
      if (ratio >= 1.0/mw) 
        { if (NT >= nmin) 
            { return "++" }  # Probably word break
          else
            { return "+?" }  # unimportant but looks more like a word break
        }
      else if (ratio >= 0.005)
        { if (NL >= nmin)
            { return "::" }  # possible syllabe break
          else
            { return ":?" }  # uncertain but looks more like syllabe break
        }
      else 
        { if (2*NT < mc) 
            { return "??" }  # too rare, can't tell
          else if (NT < 2*mc) 
            { return "-?" }  # uncertain but looks more like non-break
          else 
            { return "--" }  # non-break
        }
    }

    /^##/ { 
      $0 = substr($0, 3);
      printf "##%11.11s  %11.11s  RelFr  MK  %s\n", $1, $2, $3; next
    }

    /^# / { 
      $0 = substr($0, 3);
      printf "# %11.11s  %11.11s  -----  --  %s\n", $1, $2, $3; next
    }

    /[0-9]\.[0-9]/ { 
      if (mw == 0)   { print "must define mw" > "/dev/stderr"; exit 1; }
      if (mc == 0)   { print "must define mc" > "/dev/stderr"; exit 1; }
      if (nmin == 0) { print "must define nmin" > "/dev/stderr"; exit 1; }
      NT = $1
      NL = $3
      rat = (NL/(NT+2));
      mark = classify(NT, NL, rat, nmin, mw, mc)
      printf "  %5d %5.3f  %5d %5.3f %6.3f  %s  %s\n", $1, $2, $3, $4, rat, mark, $5;
      next
    }
    ----------------------------------------------------
      
    compare-freqs \
        .bio-j-ecc-tt-1-1.frq \
        .bio-j-ecc-nl-1-1.frq \
      | compute-count-ratio-new \
          -v nmin=5 -v mw=8 -v mc=40 \
      | sort +0.0 -0.2r +4 -5nr \
      > .bio-j-ecc-tt-nl-1-1-new.cmp
     
    cat .bio-j-ecc-tt-nl-1-1-new.cmp \
      | print-pattern-classes \
          -v rowchars='qHPwco8rekm' \
          -v colchars='mwkco8eHPqr'

          m  w  k  c  o  8  e  H  P  q  r
         -- -- -- -- -- -- -- -- -- -- --
     q |  .  .  . ?? --  .  .  .  .  .  .
     H | ??  .  . -- -- ?? ??  .  .  . ??
     P |  .  .  . -? -? ??  .  .  .  .  .
     w |  .  .  . ?? ??  .  .  .  .  .  .
     c | ??  .  . -- -- -- -- -- ?? +? -?
     o | -- ?? -? :: :: ++ :: :: ++ :: ::
     8 |  .  .  . -- -- ?? ?? ?? +? ++ +?
     r |  .  .  . :? :? ++ ?? ++ ++ ++ ++
     e |  .  .  . :? :: :: ++ :? ++ ++ ++
     k |  .  .  . +? +? +? +?  . +? ++ ++
     m |  .  .  . -- :? :? +? ?? +? ++ ++

  Non-breaks:
  
    [qHPw]:.
    .:[mwk]
    [c]:[co8eHPr]
    [8]:[co]
    [m]:[c]
    
  "Word" breaks: 
  
    [8rk]:[8]
    [8erkm]:[eHPqr]
    [o]:[8P]
    [k]:[co]
    
  Possible "Syllabe" breaks:
  
    all else.
    
  Recomputing with mw=5 instead of 8:
  
    compare-freqs \
        .bio-j-ecc-tt-1-1.frq \
        .bio-j-ecc-nl-1-1.frq \
      | compute-count-ratio-new \
          -v nmin=5 -v mw=5 -v mc=40 \
      | sort +0.0 -0.2r +4 -5nr \
      > .bio-j-ecc-tt-nl-1-1-new.cmp
     
    cat .bio-j-ecc-tt-nl-1-1-new.cmp \
      | print-pattern-classes \
          -v rowchars='qHPwco8rekm' \
          -v colchars='mwkco8eHPqr'


          m  w  k  c  o  8  e  H  P  q  r
         -- -- -- -- -- -- -- -- -- -- --
     q |  .  .  . ?? --  .  .  .  .  .  .
     H | ??  .  . -- -- ?? ??  .  .  . ??
     P |  .  .  . -? -? ??  .  .  .  .  .
     w |  .  .  . ?? ??  .  .  .  .  .  .
     c | ??  .  . -- -- -- -- -- ?? +? -?
     o | -- ?? -? :: :: :: :: :: ++ :: ::
     8 |  .  .  . -- -- ?? ?? ?? +? :? +?
     e |  .  .  . :? :: :: :? :? ++ ++ ++
     r |  .  .  . :? :? ++ ?? ++ ++ ++ ++
     k |  .  .  . +? +? +? +?  . +? ++ ++
     m |  .  .  . -- :? :? +? ?? +? ++ ++


  Non-breaks:
  
    [qHPw]:.
    .:[mwk]
    [c]:[Pr]
    [8]:[co]
    [m]:[c]
    
  "Word" breaks: 
  
    [8erkm]:[eHPqr]
    [8]:[8]
    [rkm]:[o8]
    [k]:[c]
    
  Possible "Syllabe" breaks:
  
    all else (should check digraphs).
    
  Overall tetragram frequencies: 

    cat bio-j-ecc-gut.wds \
      | tr -d ' \012' \
      | enum-ngraphs -v n=4 \
      | egrep -v '\*' \
      | sed \
          -e 's/^\(..\)\(..\)$/\1:\2/g'  \
      > .bio-j-ecc-gut-tt-2-2.grm

    cat .bio-j-ecc-gut-tt-2-2.grm \
      | egrep -v '[qHPw]:.|.:[mwk]|[c]:[co8eHPr]|[8]:[co]|[m]:[c]' \
      | egrep -v '[8rk]:[8]|[8erkm]:[eHPqr]|[o]:[8P]|[k]:[co]' \
      | sort | uniq -c | expand \
      | compute-freqs \
      > .bio-j-ecc-gut-tt-2-2.frq

  Tetragram frequencies around line breaks, ignoring spaces:

    cat bio-j-ecc-gut.wds \
      | sed -e 's/^\(..\).*\(..\)$/\1\2/g' \
      | tr -s '\012' ':' \
      | enum-ngraphs -v n=5 \
      | egrep -v '\*' \
      | egrep '^..:..$' \
      > .bio-j-ecc-gut-nl-2-2.grm
  
    cat .bio-j-ecc-gut-nl-2-2.grm \
      | egrep -v '[qHPw]:.|.:[mwk]|[c]:[co8eHPr]|[8]:[co]|[m]:[c]' \
      | egrep -v '[8rk]:[8]|[8erkm]:[eHPqr]|[o]:[8P]|[k]:[co]' \
      | sort | uniq -c | expand \
      | compute-freqs \
      > .bio-j-ecc-gut-nl-2-2.frq

  Comparisons:

    compare-freqs \
        .bio-j-ecc-gut-tt-2-2.frq \
        .bio-j-ecc-gut-nl-2-2.frq \
      | compute-count-ratio-new \
          -v nmin=5 -v mw=8 -v mc=40 \
      | sort +0.0 -0.2r +4 -5nr \
      > .bio-j-ecc-gut-tt-nl-2-2-new.cmp

    cat .bio-j-ecc-gut-tt-nl-2-2-new.cmp \
      | print-pattern-classes

         oc  cc  8o 8c oH oP oe or om o8 oq oo ok ow qo qc ro rq Ho Hc eo ec rc e8 eq er ee eH eP r8 rH rP re rr ce cH cP cm co He H8 Hm 8P 8e 8r
         --  --  -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
    oo |  .  ??   .  . ??  . ??  .  .  .  .  .  .  .  .  . ??  . -? -- ?? -? ?? ?? ?? ?? ?? -? ??  .  . ??  .  .  . ??  .  .  .  . ??  .  .  .  .
    qo |  .  ??   .  . ??  . ?? ??  .  .  .  .  .  . ??  . ?? ?? -- -- ?? -? ?? ?? ?? ?? ?? ?? ??  .  .  .  .  .  . ??  .  .  . ??  .  .  .  .  .
    ko |  .  ??   .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    wo |  .   .   .  .  .  .  .  .  .  .  .  .  .  .  .  . ??  . ?? ??  .  .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
                                                                                                                                                 
    Ho | +?  ++   .  . :? ?? ?? +?  .  .  .  .  .  . ++ +? :? ?? ?? +? ?? ?? -? ??  .  .  . ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    Po |  .  ??   .  .  .  .  .  .  .  .  .  .  .  .  .  . ++  . ?? ?? ?? ?? ??  .  .  .  . ?? ??  . ??  .  .  .  . ??  .  .  .  .  .  .  .  .  .
    eo | +?  ++   .  . +?  . +?  . ??  .  .  .  .  . ++  . ++ ?? :? -? ?? -? ?? ?? ?? ?? ?? ??  . ??  . ??  . ??  .  .  .  .  .  .  .  .  .  .  .
    mo |  .  ??   .  .  .  .  .  .  .  .  .  .  .  . +?  . +? +? -? -? ?? ?? ?? ?? ?? ?? ?? ?? ??  .  .  .  .  .  . ??  .  .  .  .  . ??  .  .  .
    ro | +?  ??   .  . +?  . +?  . ?? +?  .  .  .  . +?  . :? ?? ?? :? :? -? ?? ?? ?? ?? ?? -? ?? ?? ?? ?? ?? ??  . ??  .  .  . ??  .  .  .  .  .
    8o | ++  :?   .  . :: ?? :? ?? ?? +?  . ??  .  . :: ?? ++ ?? ++ ++ ?? :? :? ?? ?? ??  . ?? ?? ?? ?? ??  . ?? ?? ?? +?  .  .  .  .  .  .  .  .
    co | +?  :?   .  . :? ?? :? ??  . ??  . ?? ??  . :: ?? :? ?? :? :? -? :? :? ?? ?? ?? ?? -? ?? ?? ?? ??  . ??  . ?? ??  .  .  .  .  .  .  .  .
                                                                                                                                                 
    oe | ++  --  :? ++ :? ?? :? -? ?? ?? ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ?? ?? ??  .  .  .  .  .  .  .  .
    om | ++   .  :? :? :? ?? -? ??  . ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ?? ??
    or | ++  :?   .  . -? ?? :? -? -? ?? ?? ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . -? ?? ?? ??  .  .  .  .  .  .  .
    ce | +?  :?  :? :? ?? ?? :? ?? ?? ?? ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ?? ??  .  .  .  .  .  . ??  .  .
                                                                                                                                                 
    Hc |  .   .   .  .  .  .  .  .  .  .  .  .  .  . +?  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
                                                                                                                                                 
    cr |  .  ??   .  . ??  . ?? ?? ??  . ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ??  .  .  . ??  .  .  .  .  .  .
    er |  .  ??   .  .  .  . ?? ?? ??  .  . +?  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .
    kr |  .   .   .  .  .  . ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    mr |  .   .   .  .  .  . ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .
    rr |  .  ??   .  .  .  . ?? ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    8e |  .  ??   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    He |  .  ??   .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    cc |  .   .   .  .  .  .  .  .  .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    ee |  .  ??   .  .  .  . ?? ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    ke |  .   .   .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    me |  .  ??   .  .  .  . ??  .  . ??  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    re |  .  ??   .  .  .  .  .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    Hm |  .   .   .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    cm |  .   .   .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    8r |  .   .   .  .  .  . ??  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
    Hr |  .   .   .  .  .  .  .  . ??  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
         --  --  -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
         oc  cc  8o 8c oH oP oe or om o8 oq oo ok ow qo qc ro rq Ho Hc eo ec rc e8 eq er ee eH eP r8 rH rP re rr ce cH cP cm co He H8 Hm 8P 8e 8r
    
    
  Note that :oH resembles :cH, could it be a mistreading?
  
  From this table, the only reasonably certain entries are
  
    "Word" boundary:     
      eo:cc eo:ro eo:qo
      Ho:cc Ho:qo 
      Po:ro
      8o:oc 8o:ro 8o:Ho 80:Hc 
      oe:oc oe:8c 
      om:oc 
      or:oc
    
    Non-boundary:  
      oo:Hc
      qo:Ho qo:Hc
      oe:cc
    
    "Syllabe" boundary:  
      8o:qo  8o:oH
      co:qo 
  
  We could extend these to "don't care" cases as follows:
  
    "Word" boundary:                  
      [HPerm8c]o:o[crm8]
      [HPemr]o:(cc|qo|qc)
      [emr]o:o[HPeqokw]
      [Pem8o]o:r[oq]
      8o:H[oc]
      Ho:Hc
      (oe|om|or|ce):oc
      oe:8c
      Hc:qo

    "Syllabe" boundary:
       [HP8c]o:o[HPeqokw] 
       [8c]o:(cc|qo|qc|ec|rc)
       Ho:ro
       eo:Ho
       ro:(ro|Hc|eo)
       co:(r[oq]|H[oc])
       (om|or|ce):(cc|8o|8c)
       oe:8o
       o[em]:oH
       (oe|or|ce):oe
          
    Non-break:
      ([cekmr8H]r|oo|qo|ko|wo):..
      ..:(e[8qreHP]|r[8HPer]|c[eHPmo]|8[Per])
      ([HPem]o|oe|or|om|ce):(eo|ec|rc|8o|8c)
      [r8c]o:8[oc]
      (oe|om|or|ce):(o[Pr8mqokw]|q[oc]|r[oq]|H[oc])
      (ro|Ho):(rq|Ho)
      (mo|Po):(Ho|Hc)
      ro:(ec|rc)
      co:eo
      eo:Hc
      om:oe
      or:oH
      ce:oH
      oe:cc

    cat bio-j-ecc-gut.wds \
      | sed -e 's/\(.\)/\1 /g' -e 's/ $//g' \
      | split-ecc-by-nl-patterns \
      | split-ecc-by-nl-patterns \
      | tr -d ' \-' | tr '+:' ' \-' \
      > .bio-j-ecc-gut-split.ecc
    
  Here is a sample of the result:
  
    8ocHcoe Hok ooHcco-eccco-Hce-8o-ccco-oHccco-qoHcc8o
    Pccc8o-qoHcc8o-oHomccc8o-qoHor-ccoe-oeccc8o-qoHo
    Pccc8o Hcc8o-qoHc8o-qoHc8o-qoHc8o-qoHc8o-qoHomoeccc8o
    rom qoHom qoe Hccoeo romccc8o r-o-eor-ccc8o-oHcc8o-qoHo
    Pccc8o-r-cccPcco-eccc8o ro 8ce-ccce-cco-Hoeccc8o-qoHok
    roecccc8o-qoeccc8o-qoe-o-Homccor ro-r-o-eo
    qoHccc8o-qoeccco-qoHo cccocHcco-qoHomor
    qoHomoe Hcco-qoe Ho-ro-romccccHcoeo r-oe
    8omoecccoe-8omoe qoeo 8o ro 8o
    Hccc8o Pccc8o-qoHcco-r-o-e-oe-8owccccHco-qoe ecccc8o-qoHcc8oe-oeccc8o
    qo 8omccccHo qoHco-qoHomcccHo qoHce-8omccc8o-oHce-oeccc8o-oHo-r-o-eok
    roe Hc8o-oHce-8o roHo-oHo-roHo-r-oe Homoe Hc8o
    qoHc8o 8o-ccccHo qoHc8o-qoHcc8o-qoHccc8oe-oe
    qoHcc8o-qoHcc8o-qoHc8o-qoHc8o-qoHcc8oe-8o
    occc8o-qoHcc8o-qoHcc8o-oe Hcc8o-oHco-Hoe-8o
    8ccc8o-qoHc8o-qoHcc8o-qoHcco-qoHcc8o 8or
    occc8o-cccHo-r-oe-8o-qoHomccHo-roHo-r-oe-8o
  
  Ditto, without "-"s:
  
    8ocHcoe Hok ooHccoecccoHce8occcooHcccoqoHcc8o
    Pccc8oqoHcc8ooHomccc8oqoHorccoeoeccc8oqoHo
    Pccc8o Hcc8oqoHc8oqoHc8oqoHc8oqoHc8oqoHomoeccc8o
    rom qoHom qoe Hccoeo romccc8o roeorccc8ooHcc8oqoHo
    Pccc8orcccPccoeccc8o ro 8ceccceccoHoeccc8oqoHok
    roecccc8oqoeccc8oqoeoHomccor roroeo
    qoHccc8oqoecccoqoHo cccocHccoqoHomor
    qoHomoe Hccoqoe HororomccccHcoeo roe
    8omoecccoe8omoe qoeo 8o ro 8o
    Hccc8o Pccc8oqoHccoroeoe8owccccHcoqoe ecccc8oqoHcc8oeoeccc8o
    qo 8omccccHo qoHcoqoHomcccHo qoHce8omccc8ooHceoeccc8ooHoroeok
    roe Hc8ooHce8o roHooHoroHoroe Homoe Hc8o
    qoHc8o 8occccHo qoHc8oqoHcc8oqoHccc8oeoe
    qoHcc8oqoHcc8oqoHc8oqoHc8oqoHcc8oe8o
    occc8oqoHcc8oqoHcc8ooe Hcc8ooHcoHoe8o
    8ccc8oqoHc8oqoHcc8oqoHccoqoHcc8o 8or
    occc8occcHoroe8oqoHomccHoroHoroe8o
  
  I have split Landini's file into one chunk per page
  
    csplit \
      --prefix 'chunk-' \
      --suffix '%03d.evt' \
      - '^# *$' '{*}'
  
  and then futher edited it manually, splitting each 
  page into homogeneous "textual units" (all normal text,
  all labels, etc.)
  
  The files are L16/fNNN and L16/fNNN.L, where fNNN is the panel
  number (as in f85r1) and L is the location code within that panel.
  Files without location code contain general comments about the panel.
  
  See L16/README for a detailed description of the files and
  my editings.