Hacking at the Voynich manuscript - Side notes
002 Transforming Chinese into Voynichese

# Last edited on 2004-01-08 11:10:08 by stolfi

[ Originally Notes/012, renumbered to Notes/002 on 1999-01-31 ]
  
The goal here is to produce a "natural" phonetic encoding of Chinese
that looks like Voynichese, in particular one that produces
words the prefix-midfix-suffix paradigm.

I. STATISTICS OF CHINESE ELEMENTS

  ln -s ../../Texts/PART/chin-mch.txt
  ln -s ../../Texts/PART/chin-tao.txt

  set sams = ( mch tao )
  set samc = "mch,tao"

  foreach sam ( ${sams} )
    cat chin/${sam}.txt \
      | egrep -v '^[#]' \
      | tr 'A-ZÜ' 'a-zü' \
      | tr -c 'a-zü0-9' '\012' \
      | egrep '.' \
      > chin/${sam}.wds
  end
  cat chin/{$samc}.wds > chin/tot.wds

  dicio-wc chin/???.wds

    lines   words     bytes file        
  ------- ------- --------- ------------
     3777    3777     18287 chin-mch.wds
     5442    5442     26736 chin-tao.wds
     9219    9219     45023 chin.wds

  foreach sam ( ${sams} )
    cat chin/${sam}.wds \
      | sort | uniq \
      > chin/${sam}.dic
  end
  cat chin/{$samc}.dic | sort | uniq > chin/tot.dic
  dicio-wc chin/{$samc}.dic chin/tot.dic 

    lines   words     bytes file        
  ------- ------- --------- ------------
      505     505      2556 chin/mch.dic
      468     468      2425 chin/tao.dic
      675     675      3462 chin/tot.dic

  bool 1-2 chin/{mch,tao}.dic > .mch-tao
  bool 2-1 chin/{mch,tao}.dic > .tao-mch
  bool 2.1 chin/{mch,tao}.dic > .tao.mch
  bool 2+1 chin/{mch,tao}.dic > .tao+mch

  dicio-wc chin/{mch,tao}.dic .{mch,tao}?{mch,tao}

    lines   words     bytes file        
  ------- ------- --------- ------------
      505     505      2556 chin/mch.dic
      468     468      2425 chin/tao.dic
      207     207      1037 .mch-tao
      675     675      3462 .tao+mch
      170     170       906 .tao-mch
      298     298      1519 .tao.mch

  cat chin/tot.dic \
    | tr -d '[0-9]' \
    | sort | uniq \
    > chin/tot-notone.dic
  dicio-wc chin/tot.dic chin/tot-notone.dic

    lines   words     bytes file        
  ------- ------- --------- ------------
      675     675      3462 chin/tot.dic
      324     324      1367 chin/tot-notone.dic

  foreach sam ( ${sams} )
    cat chin/${sam}.wds \
      | sort | uniq -c | expand \
      | sort +0 -1nr \
      | compute-freqs \
      > chin/${sam}.wfr
  end

  egrep '^[aeiouüwy]' chin/tot-notone.dic

  foreach sam ( ${sams} )
    echo " "; echo "${sam}"
    cat chin/${sam}.wds \
      | pinyin-factor-words \
      > chin/${sam}.fac
    cat chin/${sam}.fac \
      | egrep -v '^([@b-df-hj-np-tv-z]|[csz]h){[aeiouü]+:[0-4]}([@rn]|ng)$' \
      | head -20
  end
  cat chin/{$samc}.fac > chin/tot.fac
    
  dicio-wc chin/{$samc}.fac chin/tot.fac

      lines   words     bytes file        
    ------- ------- --------- ------------
       3777    3777     33135 chin/mch.fac
       5442    5442     47641 chin/tao.fac
       9219    9219     80776 chin/tot.fac

  mkdir chin/{pref,midf,tone,suff}

  foreach sam ( tot ${sams} )
    echo " "; echo "${sam}"
    cat chin/${sam}.fac \
      | sed -e 's/{.*$/-/g' \
      | sort | uniq -c | expand \
      | sort +0 -1nr \
      | compute-freqs \
      > chin/pref/${sam}.frq
    cat chin/${sam}.fac \
      | sed -e 's/^[a-z@]*{/-/g' -e 's/:[0-4]}[nrg@]*$/:/g' \
      | sort | uniq -c | expand \
      | sort +0 -1nr \
      | compute-freqs \
      > chin/midf/${sam}.frq
    cat chin/${sam}.fac \
      | sed -e 's/^[a-z@]*{[a-zü]*:/:/g' -e 's/}[nrg@]*$/-/g' \
      | sort | uniq -c | expand \
      | sort +0 -1nr \
      | compute-freqs \
      > chin/tone/${sam}.frq
    cat chin/${sam}.fac \
      | sed -e 's/^.*}/-/g' \
      | sort | uniq -c | expand \
      | sort +0 -1nr \
      | compute-freqs \
      > chin/suff/${sam}.frq
  end
  
  foreach elem ( pref midf tone suff )
    tabulate-frequencies -dir chin/${elem} -title ${elem} tot ${sams}
    cat chin/${elem}/all.cmp-cts
    cat chin/${elem}/all.cmp-frq
    cat chin/${elem}/all.cmp-top
  end

Prefix statistics:

     counts                         frequencies ×9999
  ----------------------------     ----------------------
     tot     mch     tao  pref  |   tot   mch   tao  pref  
  ------  ------  ------  ----  |  ----  ----  ----  ----  
    1793     568    1225  @-    |  1944  1503  2250  @-    
     887     364     523  sh-   |   961   963   960  sh-   
     834     197     637  zh-   |   904   521  1170  zh-   
     760     473     287  d-    |   823  1251   526  d-    
     512     157     355  b-    |   554   415   651  b-    
     450     209     241  x-    |   487   552   442  x-    
     435     215     220  j-    |   471   568   403  j-    
     413     178     235  g-    |   447   470   431  g-    
     384      90     294  q-    |   416   237   539  q-    
     340     128     212  r-    |   368   338   389  r-    
     339     198     141  h-    |   367   523   258  h-    
     312     166     146  z-    |   337   439   267  z-    
     264     131     133  t-    |   285   346   243  t-    
     262     173      89  l-    |   283   457   163  l-    
     241      90     151  m-    |   260   237   276  m-    
     202      74     128  ch-   |   218   195   234  ch-   
     195      91     104  f-    |   211   240   190  f-    
     186      43     143  s-    |   201   113   262  s-    
     156      90      66  n-    |   168   237   120  n-    
     131      75      56  k-    |   141   198   102  k-    
      82      35      47  c-    |    88    92    85  c-    
      41      32       9  p-    |    43    84    16  p-    

    9219    3777    5442  TOTAL | 

    sorted freqs ×99
    --------------------
    tot    mch    tao   
    ------ ------ ------
    19 @-  14 @-  22 @- 
     9 sh- 12 d-  11 zh-
     8 zh-  9 sh-  9 sh-
     8 d-   5 j-   6 b- 
     5 b-   5 x-   5 q- 
     4 x-   5 h-   5 d- 
     4 j-   5 zh-  4 x- 
     4 g-   4 g-   4 g- 
     4 q-   4 l-   3 j- 
     3 r-   4 z-   3 r- 
     3 h-   4 b-   2 m- 
     3 z-   3 t-   2 z- 
     2 t-   3 r-   2 s- 
     2 l-   2 f-   2 h- 
     2 m-   2 m-   2 t- 
     2 ch-  2 n-   2 ch-
     2 f-   2 q-   1 f- 
     1 s-   1 k-   1 l- 
     1 n-   1 ch-  1 n- 
     1 k-   1 s-   1 k- 
     0 c-   0 c-   0 c- 
     0 p-   0 p-   0 p- 

Midfix statistics:

     counts                        |   frequencies ×9999
  ------------------------------   |  ------------------------
     tot     mch     tao  midf     |   tot   mch   tao  midf   
  ------  ------  ------  ----     |  ----  ----  ----  ----   
    2413     830    1583  -i:      |  2616  2197  2908  -i:    
    1505     718     787  -e:      |  1631  1900  1445  -e:    
     979     237     742  -u:      |  1061   626  1362  -u:    
     721     302     419  -a:      |   781   799   769  -a:    
     593     224     369  -ia:     |   642   592   677  -ia:   
     385     181     204  -uo:     |   417   478   374  -uo:   
     357      99     258  -uei:    |   386   261   473  -uei:  
     282     103     179  -ü:      |   305   272   328  -ü:    
     265     117     148  -iou:    |   286   309   271  -iou:  
     255     171      84  -ai:     |   276   452   153  -ai:   
     245     120     125  -o:      |   265   317   229  -o:    
     215      85     130  -ao:     |   232   224   238  -ao:   
     182      84      98  -ua:     |   196   221   179  -ua:   
     137      74      63  -ou:     |   148   195   115  -ou:   
     128      92      36  -ei:     |   138   243    65  -ei:   
     117      75      42  -ie:     |   126   198    76  -ie:   
     115      76      39  -iao:    |   124   200    71  -iao:  
     115      78      37  -ue:     |   124   206    67  -ue:   
     104      67      37  -üe:     |   112   176    67  -üe:   
      47      17      30  -üa:     |    50    44    54  -üa:   
      45      16      29  -io:     |    48    41    52  -io:   
      14      11       3  -uai:    |    14    28     5  -uai:  
    9219    3777    5442  TOTAL

    tot      mch      tao     
    -------- -------- --------
    25 -i:   21 -i:   28 -i:  
    16 -e:   18 -e:   14 -e:  
    10 -u:    7 -a:   13 -u:  
     7 -a:    6 -u:    7 -a:  
     6 -ia:   5 -ia:   6 -ia: 
     4 -uo:   4 -uo:   4 -uei:
     3 -uei:  4 -ai:   3 -uo: 
     3 -ü:    3 -o:    3 -ü:  
     2 -iou:  3 -iou:  2 -iou:
     2 -ai:   2 -ü:    2 -ao: 
     2 -o:    2 -uei:  2 -o:  
     2 -ao:   2 -ei:   1 -ua: 
     1 -ua:   2 -ao:   1 -ai: 
     1 -ou:   2 -ua:   1 -ou: 
     1 -ei:   2 -ue:   0 -ie: 
     1 -ie:   1 -iao:  0 -iao:
     1 -iao:  1 -ie:   0 -ue: 
     1 -ue:   1 -ou:   0 -üe: 
     1 -üe:   1 -üe:   0 -ei: 
     0 -üa:   0 -üa:   0 -üa: 
     0 -io:   0 -io:   0 -io: 
     0 -uai:  0 -uai:  0 -uai:

Tone statistics:

       tot     mch     tao  tone       tot   mch   tao  tone 
    ------  ------  ------  ----      ----  ----  ----  ---- 
      2966    1174    1792  :4-       3216  3107  3292  :4-  
      2379     837    1542  :2-       2580  2215  2833  :2-  
      1830     634    1196  :1-       1984  1678  2197  :1-  
      1595     697     898  :3-       1729  1844  1649  :3-  
       449     435      14  :0-        486  1151    25  :0-  
      9219    3777    5442  TOTAL

      tot    mch    tao   
      ------ ------ ------
      31 :4- 30 :4- 32 :4-
      25 :2- 21 :2- 28 :2-
      19 :1- 18 :3- 21 :1-
      17 :3- 16 :1- 16 :3-
       4 :0- 11 :0-  0 :0-

Paired midfix:tone statistics:


     counts                        |   frequencies ×9999
  ------------------------------   |  ------------------------
     tot     mch     tao  midf     |   tot   mch   tao  midf   
  ------  ------  ------  ----     |  ----  ----  ----  ----   
     714     291     423  -i:4-    |   773   769   776  -i:4- 
     654     176     478  -i:1-    |   708   465   877  -i:1- 
     589     161     428  -e:2-    |   638   425   785  -e:2- 
     568     158     410  -i:2-    |   615   417   752  -i:2- 
     499      84     415  -u:4-    |   540   221   762  -u:4- 
     410     149     261  -i:3-    |   444   393   479  -i:3- 
     336     333       3  -e:0-    |   363   881     5  -e:0- 
     280     119     161  -a:1-    |   303   314   295  -a:1- 
     239      93     146  -a:4-    |   258   245   267  -a:4- 
     236      63     173  -u:2-    |   255   166   317  -u:2- 
     227      56     171  -ia:1-   |   245   147   313  -ia:1-
     220      96     124  -e:4-    |   238   253   227  -e:4- 
     191      58     133  -e:3-    |   206   153   243  -e:3- 
     190      71     119  -ia:4-   |   205   187   218  -ia:4-
     174      72     102  -iou:3-  |   188   190   186  -iou:3-
     170      81      89  -uei:4-  |   183   213   163  -uei:4-
     169      70      99  -e:1-    |   182   184   181  -e:1- 
     162      64      98  -u:3-    |   175   168   179  -u:3- 
     146       8     138  -uei:2-  |   157    20   253  -uei:2-
     136      42      94  -ao:4-   |   147   110   172  -ao:4-
     133      34      99  -uo:4-   |   143    89   181  -uo:4-
     132      46      86  -a:2-    |   142   121   157  -a:2- 
     122      57      65  -ia:2-   |   131   150   118  -ia:2-
     116      85      31  -ai:4-   |   125   224    56  -ai:4-
      96      45      51  -ü:4-    |   103   118    93  -ü:4- 
      81      40      41  -ua:4-   |    87   105    74  -ua:4-
      81      24      57  -uo:3-   |    87    63   104  -uo:3-
      80      51      29  -uo:2-   |    86   134    52  -uo:2-
      79      60      19  -uo:1-   |    85   158    34  -uo:1-
      75      19      56  -u:1-    |    80    49   102  -u:1- 
      74       8      66  -ü:2-    |    79    20   120  -ü:2- 
      73      33      40  -o:1-    |    78    86    73  -o:1- 
      72      63       9  -üe:2-   |    77   166    16  -üe:2-
      71      39      32  -ü:3-    |    76   102    58  -ü:3- 
      70      59      11  -ai:2-   |    75   155    19  -ai:2-
      70      56      14  -ue:2-   |    75   147    25  -ue:2-
      67      56      11  -i:0-    |    72   147    19  -i:0- 
      66      44      22  -ie:3-   |    71   115    39  -ie:3-
      63      20      43  -o:4-    |    67    52    78  -o:4- 
      61      32      29  -iou:4-  |    65    84    52  -iou:4-
      58      32      26  -ou:4-   |    62    84    47  -ou:4-
      57      31      26  -a:3-    |    61    81    47  -a:3- 
      57      26      31  -o:2-    |    61    68    56  -o:2- 
      56      39      17  -iao:4-  |    60   102    30  -iao:4-
      55      31      24  -ao:3-   |    59    81    43  -ao:3-
      53      39      14  -ia:3-   |    56   102    25  -ia:3-
      48      37      11  -o:3-    |    51    97    19  -o:3- 
      44      35       9  -ei:4-   |    47    92    16  -ei:4-
      43      33      10  -ei:3-   |    46    86    17  -ei:3-
      42      14      28  -ai:3-   |    45    36    50  -ai:3-
       .       .       .  ...      |     .     .     .  ...   
    9219    3777    5442  TOTAL


    sorted freqs ×99
    --------------------
    tot        mch        tao       
    ---------- ---------- ----------
     7 -i:4-    8 -e:0-    8 -i:1-  
     7 -i:1-    7 -i:4-    7 -e:2-  
     6 -e:2-    4 -i:1-    7 -i:4-  
     6 -i:2-    4 -e:2-    7 -u:4-  
     5 -u:4-    4 -i:2-    7 -i:2-  
     4 -i:3-    3 -i:3-    4 -i:3-  
     3 -e:0-    3 -a:1-    3 -u:2-  
     3 -a:1-    2 -e:4-    3 -ia:1- 
     2 -a:4-    2 -a:4-    2 -a:1-  
     2 -u:2-    2 -ai:4-   2 -a:4-  
     2 -ia:1-   2 -u:4-    2 -uei:2-
     2 -e:4-    2 -uei:4-  2 -e:3-  
     2 -e:3-    1 -iou:3-  2 -e:4-  
     2 -ia:4-   1 -ia:4-   2 -ia:4- 
     1 -iou:3-  1 -e:1-    1 -iou:3-
     1 -uei:4-  1 -u:3-    1 -e:1-  
     1 -e:1-    1 -u:2-    1 -uo:4- 
     1 -u:3-    1 -üe:2-   1 -u:3-  
     1 -uei:2-  1 -uo:1-   1 -ao:4- 
     1 -ao:4-   1 -ai:2-   1 -uei:4-
     1 -uo:4-   1 -e:3-    1 -a:2-  
     1 -a:2-    1 -ia:2-   1 -ü:2-  
     1 -ia:2-   1 -i:0-    1 -ia:2- 
     1 -ai:4-   1 -ia:1-   1 -uo:3- 
     1 -ü:4-    1 -ue:2-   1 -u:1-  
     0 -ua:4-   1 -uo:2-   0 -ü:4-  
     0 -uo:3-   1 -a:2-    0 -o:4-  
     0 -uo:2-   1 -ü:4-    0 -ua:4- 
     0 -uo:1-   1 -ie:3-   0 -o:1-  
     0 -u:1-    1 -ao:4-   0 -ü:3-  
     0 -ü:2-    1 -ua:4-   0 -ai:4- 
     0 -o:1-    1 -ia:3-   0 -o:2-  
     0 -üe:2-   1 -iao:4-  0 -ü:1-  
     0 -ü:3-    1 -ü:3-    0 -iou:4-
     0 -ai:2-   0 -o:3-    0 -uo:2- 
     0 -ue:2-   0 -ei:4-   0 -ai:3- 
     0 -i:0-    0 -uo:4-   0 -a:3-  
     0 -ie:3-   0 -ei:3-   0 -ou:4- 
     0 -o:4-    0 -o:1-    0 -üe:1- 
     0 -iou:4-  0 -iou:4-  0 -uei:1-
     0 -ou:4-   0 -ou:4-   0 -ao:3- 
     0 -a:3-    0 -a:3-    0 -ie:3- 
     0 -o:2-    0 -ao:3-   0 -io:4- 
     0 -iao:4-  0 -o:2-    0 -ua:3- 
     0 -ao:3-   0 -iao:3-  0 -ou:3- 
     0 -ia:3-   0 -uo:3-   0 -ua:1- 
     0 -o:3-    0 -o:4-    0 -uo:1- 
     0 -ei:4-   0 -ou:3-   0 -üa:2- 
     0 -ei:3-   0 -u:1-    0 -iao:4-
     0 -ai:3-   0 -ua:2-   0 -ua:2- 

Suffix statistics

     counts                        |   frequencies ×9999
  ------------------------------   |  ------------------------
     tot     mch     tao  suff     |   tot   mch   tao  suff
  ------  ------  ------  ----     |  ----  ----  ----  ----
    6452    2628    3824  -@       |  6998  6957  7026  -@  
    1322     613     709  -n       |  1433  1622  1302  -n  
    1276     514     762  -ng      |  1383  1360  1399  -ng 
     169      22     147  -r       |   182    57   269  -r  
    9219    3777    5442  TOTAL
   
    sorted freqs ×99
    --------------------
    tot    mch    tao   
    ------ ------ ------
    69 -@  68 -@  69 -@ 
    14 -n  16 -n  13 -ng
    13 -ng 13 -ng 12 -n 
     1 -r   0 -r   2 -r 

II STATISTICS OF VOYNICHESE ELEMENTS

  cat ../008/prefs-all.frq \
    | compute-freqs \
    > voyn-pref.frq

  cat ../008/midfs-all.frq \
    | compute-freqs \
    > voyn-midf.frq

  cat ../008/suffs-all.frq \
    | compute-freqs \
    > voyn-suff.frq

III PAIRING CHINESE AND VOYNICHESE COMPONENTS BY FREQUENCY:

Let's see if we can match the Voynichese prefixes to 
Chinese consonants:

  pr -m -t -w 80 voyn-pref.frq chin/pref/tot.frq | expand

   1859 0.3984 -                           1793 0.1945 @-
   1296 0.2778 qo-                          887 0.0962 sh-
    607 0.1301 o-                           834 0.0905 zh-
    255 0.0547 ol-                          760 0.0824 d-
    209 0.0448 l-                           512 0.0555 b-
    108 0.0231 y-                           450 0.0488 x-
     75 0.0161 d-                           435 0.0472 j-
     45 0.0096 r-                           413 0.0448 g-
     36 0.0077 qol-                         384 0.0417 q-
     29 0.0062 s-                           340 0.0369 r-
     23 0.0049 q-                           339 0.0368 h-
     21 0.0045 sol-                         312 0.0338 z-
     12 0.0026 dy-                          264 0.0286 t-
      8 0.0017 sal-                         262 0.0284 l-
      7 0.0015 so-                          241 0.0261 m-
      6 0.0013 dal-                         202 0.0219 ch-
      6 0.0013 olo-                         195 0.0212 f-
      5 0.0011 a-                           186 0.0202 s-
      5 0.0011 dol-                         156 0.0169 n-
      4 0.0009 al-                          131 0.0142 k-
      4 0.0009 lo-                           82 0.0089 c-
      4 0.0009 or-                           41 0.0044 p-
      .....

That's no good -- there are too many Voynichese prefixes,
and their frequencies drop too fast.

Let's try Chinese tones and Voynichese prefixes:

  pr -m -t -w 80 voyn-pref.frq chin/tone/tot.frq | expand

     voynichese           chinese      
   ----------------     ---------------         
   1859 0.3984 -        2966 0.3217 :4-
   1296 0.2778 qo-      2379 0.2581 :2-
    607 0.1301 o-       1830 0.1985 :1-
    255 0.0547 ol-      1595 0.1730 :3-
    209 0.0448 l-        449 0.0487 :0-
    108 0.0231 y-        
     75 0.0161 d-                       
     45 0.0096 r-                       
     36 0.0077 qol-                     
     29 0.0062 s-                       
     23 0.0049 q-                       
     21 0.0045 sol-                     
     .............

Let's assume that Voyn y- and o- are allographs of the same prefix, 
ol- and l- and d- and r- are separate words.  We can get roughly the
correct frequencies by the following mapping: 

    voynichese            chinese       
    ----------            ------------      
    0.3984 -              0.3704 :04-    
    0.2778 qo-            0.2581 :2-    
    0.1532 o-,y-          0.1985 :1-
    0.0700 ol-,qol-,sol-  0.1730 :3- 

The other Voynichese prefixes can be interpreted as 
separate words.

<<<STOPPD HERE>>>

Let's now look at Chinese consonants and Voynichese
midfixes.  

    pr -m -t -w 80 voyn-midf.frq chin/pref/tot.frq  | expand

    824 0.1766 -k-                         1793 0.1945 @-
    588 0.1260 -che-                        887 0.0962 sh-
    514 0.1102 -she-                        834 0.0905 zh-
    387 0.0829 -kee-                        760 0.0824 d-
    354 0.0759 -t-                          512 0.0555 b-
    347 0.0744 -ke-                         450 0.0488 x-
    179 0.0384 -te-                         435 0.0472 j-
    121 0.0259 -ch-                         413 0.0448 g-
    113 0.0242 -tee-                        384 0.0417 q-
    105 0.0225 -shee-                       340 0.0369 r-
     95 0.0204 -chee-                       339 0.0368 h-
     83 0.0178 -sh-                         312 0.0338 z-
     58 0.0124 -pche-                       264 0.0286 t-
     49 0.0105 -chckh-                      262 0.0284 l-
     38 0.0081 -kch-                        241 0.0261 m-
     38 0.0081 -p-                          202 0.0219 ch-
     33 0.0071 -tche-                       195 0.0212 f-
     31 0.0066 -sheckh-                     186 0.0202 s-
     28 0.0060 -tch-                        156 0.0169 n-
     27 0.0058 -kche-                       131 0.0142 k-
     26 0.0056 -chcth-                       82 0.0089 c-
     25 0.0054 -checkh-                      41 0.0044 p-
     25 0.0054 -shckh-                  
     24 0.0051 -shek-                   
     22 0.0047 -kshe-                   
     20 0.0043 -ee-                     
     17 0.0036 -checth-                 
     17 0.0036 -chek-                   


We should merge Voynichese k with t, and include -d- and -- from the
unifixes. 
  
  To get the right proportions at the top frequency, we need to merge
  Chinese d- and j-.


   voynichese                chinese       
   ------------------------  --------------------------
   1123 0.196  --            473 0.125  d-
   1216 0.212  -k-           402 0.106  y-
    588 0.102  -che-         364 0.096  sh-
    582 0.101  -kch-         215 0.057  j-              
    526 0.092  -ke-          209 0.055  x- 
    514 0.089  -she-         198 0.052  h- 
    301 0.052  -d-           197 0.052  zh-
    170 0.030  -s-           178 0.047  g- 
    143 0.025  -kche-        173 0.046  l- 
    141 0.025  -ch-          166 0.044  z- 
    105 0.018  -shee-        157 0.042  b- 
     95 0.017  -chee-        131 0.035  t- 
     87 0.015  -chckh-       130 0.034  r- 
     83 0.014  -sh-           91 0.024  f- 
     42 0.007  -checkh-       90 0.024  m- 
     40 0.004  -ckh-          90 0.024  n- 
     39 0.007  -kshe-         90 0.024  q- 
     37 0.006  -shckh-        75 0.020  k- 
     31 0.005  -sheckh-       74 0.020  ch-
     24 0.004  -shek-         43 0.011  s- 
     17 0.003  -chek-         35 0.009  c- 
                              32 0.008  p- 
                              
Let's try Chinese midfixes (vowel groups) and Voynichese midfixes:

    pr -m -t -w 80 voyn-midf.frq chin/midf/tot.frq  | expand

    824 0.1766 -k-                         2413 0.2617 -i:
    588 0.1260 -che-                       1505 0.1632 -e:
    514 0.1102 -she-                        979 0.1062 -u:
    387 0.0829 -kee-                        721 0.0782 -a:
    354 0.0759 -t-                          593 0.0643 -ia:
    347 0.0744 -ke-                         385 0.0418 -uo:
    179 0.0384 -te-                         357 0.0387 -uei:
    121 0.0259 -ch-                         282 0.0306 -ü:
    113 0.0242 -tee-                        265 0.0287 -iou:
    105 0.0225 -shee-                       255 0.0277 -ai:
     95 0.0204 -chee-                       245 0.0266 -o:
     83 0.0178 -sh-                         215 0.0233 -ao:
     58 0.0124 -pche-                       182 0.0197 -ua:
     49 0.0105 -chckh-                      137 0.0149 -ou:
     38 0.0081 -kch-                        128 0.0139 -ei:
     38 0.0081 -p-                          117 0.0127 -ie:
     33 0.0071 -tche-                       115 0.0125 -iao:
     31 0.0066 -sheckh-                     115 0.0125 -ue:
     28 0.0060 -tch-                        104 0.0113 -üe:
     27 0.0058 -kche-                        47 0.0051 -üa:
     26 0.0056 -chcth-                       45 0.0049 -io:
     25 0.0054 -checkh-                      14 0.0015 -uai:
     25 0.0054 -shckh-                  
     24 0.0051 -shek-                   
     22 0.0047 -kshe-                   
     20 0.0043 -ee-                     
     17 0.0036 -checth-                 
     17 0.0036 -chek-  
     .................

Suggestive, but not really a match.

Finally Chinese terminations and Voynichese suffixes (including 
unifixes minus -d):

Chinese vowels in termination:

    613 0.092 -n
    514 0.078 -ng
     20 0.003 -r
     
   1772 0.267 -i-, yi-
   1075 0.162 -u-, w-
   1034 0.156 -e-
    970 0.146 -a-
    623 0.094 -o-
      9 0.001 -ü-

  Micro-elements of the Voynichese suffixes and unifixes:

   1817 0.363 -dy
   1325 0.265 -y
     25 0.005 -am

    630 0.126 -aiin
    595 0.119 -ol
    369 0.074 -or
     86 0.017 -ain
     51 0.010 -d
     12 0.002 -air

     48 0.010 -s
     10 0.002 -r
  
  We can't get a god match, the Chinese terminators -ng -n -r 
  are much rarer than the Voynichese terminators -dy -y -am.
  
  We can try perhaps
  
   1772 0.267 -i-, yi-     1817 0.363 -dy    
   1075 0.162 -u-, w-      1325 0.265 -y     
   1034 0.156 -e-           630 0.126 -aiin  
    970 0.146 -a-           595 0.119 -ol    
    623 0.094 -o-           369 0.074 -or    
    613 0.092 -n             86 0.017 -ain   
    514 0.078 -ng            51 0.010 -d     
     20 0.003 -r             25 0.005 -am    
      9 0.001 -ü-            12 0.002 -air   
  
  Maybe we shoud have left y- as a separate Chinese consonant.

  Well, let's try it:    
    
    cat chin/${sam}.wds \
      | sed -f chin/to-voyn.sed \
      | head -200 \
      | fmt -w 60
    
Future message to the list:

    ------------------------------------------------------------
    For example, consider this sample of pseudo-voynichese:
    
      osol qokcholy qocheaiin qokaiiny qokainaiin ocheaiin
      qocheaindy qokcheolaiin oddol odaiindy okchaiinaindy
      qosaiin kor olory kcholor kaiinain dory chekaiin
      chekaiindy kaiinain sheaiin daiin daiin chain chain daiin
      checkhaiinainy shckhol cheordy qokcheainaiin qosheor
      kaiinaindy qocheaind kchaiindy daiin dor kchaiinaindy
      kaindy qochckhol qodol kor kaiinain sheeaiindy osaiin
    
    For the "code" and plaintext, see
    http://www.dcc.unicamp.br/~stolfi/voynich/Notes/014/chin/to-voyn.sed
    http://www.dcc.unicamp.br/~stolfi/voynich/Notes/014/chin/mch.txt
    This code is still not perfect --- too many "aiin"s in the middle
    of words, and words that are too long --- but it is a somewhat
    "natural" encoding of the original language, gives roughly correct
    voynichese letter statistics, and has a fairly clear p-m-s
    structure.  Of course this trick does not prove that Voynichese
    is Chinese, but merely shows that 
    ------------------------------------------------------------