# Markov-generated Pseudo-vietnamese text.
# Last edited on 2004-03-01 04:50:09 by stolfi

LINKS

  ln -s ${STOLFIHOME}/voynich/work/format-words-filled

PREPARING THE DATA

  The markov monkey requires a model text. We use the 
  Pentateuch main.src, minus comments and @-directives,
  but with.
  
    cat ../../viet/ptt/main.wds \
      | gawk \
          ' /^[$] .*{v[0-9]+} *$/ { ok = 1; next; } \
            /^[$] .*{(au|tt)} *$/ { ok = 0; next; } \
            /^[^ap] / { next; } \
            (ok) { print $2; } \
          ' \
      > temp.tks
  
  Now run a Markov monkey of order 3:
  
    cat temp.tks \
      | char-monkey \
          -v order=3 \
          -v tokens=200 \
      | format-words-filled
  
  Now for real, generate some 40000 tokens:
  
    rm -f main.src
    cat pref.src >> main.src
    cat temp.tks \
      | char-monkey \
          -v order=3 \
          -v tokens=40000 \
      | format-words-filled \
      | insert-controls \
      >> main.src