#! /bin/bash
# Last edited on 2023-05-15 19:47:34 by stolfi

cmd=${0##*/}
usage="${cmd} [-format {FORMAT}] [-show {SHOW}]" 

# Exit on first error/abort:
set -e

format="svg"
show=0
while [[ ( $# -gt 0 ) && ( "/$1" =~ /-* ) ]]; do
  if [[ ( $# -ge 2 ) && ( "/$1" == "/-show" )  ]]; then
    show="$2"; shift; shift
  elif [[ ( $# -ge 2 ) && ( "/$1" == "/-format" )  ]]; then
    format="$2"; shift; shift
  else
    echo "bad option"; echo "usage: ${usage}" 1>&2; exit 1
  fi
done

if [[ $# -ne 0  ]]; then
  echo "usage: ${usage}" 1>&2; exit 1
fi

# Generates a set of Zipf plots comparing Voynichese, 
# Gruggish, and various other languages.

xtra=0 # If 1, creates the test plots only, else the normal plots only.

norm=$(( 1 - ${xtra} ))
voyn=${norm}
bibl=${norm}
euro=${norm}
semi=${norm}
asia=${norm}
synt=${norm}

if [[ ${xtra} -ne 0 ]]; then
  echo "### New plots ###" 1>&2
  
  # Test of line colors etc:
  oname="zipf-test-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/hea.2 "0 VMS Herbal A 2"  \
      voyn/prs/heb.2 "1 VMS Herbal B 2"  \
      voyn/prs/zod.1 "2 VMS Zodiac 1 - Prose"  \
      voyn/prs/str.1 "3 VMS Stars 1 - Prose"  \
      voyn/prs/tot.1 "4 VMS prose" \
      voyn/lab/tot.1 "5 VMS labels"    \
      voyn/prs/cos.1 "6 VMS Cosmo 1 - Prose"  \
      voyn/prs/unk.1 "7 VMS Unknown 1 - Prose"  \
      voyn/prs/unk.2 "8 VMS Unknown 2 - Prose"  \
      voyn/prs/unk.4 "9 VMS Unknown 4 - Prose"  \
    ${oname}

  # Testing key placement (short names):
  oname="zipf-test-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/cos.1 "VMS Cosmo 1 - Prose"  \
      voyn/lab/tot.1 "VMS labels"    \
    ${oname}

  # Testing key placement (long names):
  oname="zipf-test-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/cos.1 "VMoynich MS Cosmological section 1 - Prose"  \
      voyn/lab/tot.1 "VMoynich MS labels"    \
    ${oname}

  # Test of timing (5k distinct words):
  oname="zipf-test-time-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}

  # Test of timing (10 k distinct words):
  oname="zipf-test-time-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}

  # Test of timing (20 k distinct words):
  oname="zipf-test-time-3"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}

  # Test of timing (40 k distinct words):
  oname="zipf-test-time-4"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}
fi  

if [[ ${bibl} -ne 0 ]]; then
  echo "### Bible plots ###" 1>&2
  
  # Comparison of the five books of the Vulgate Pentateuch
  oname="zipf-laot-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      latn/ptt/gen.1 "Vulgate Genesis" \
      latn/ptt/exo.1 "Vulgate Exodus" \
      latn/ptt/num.1 "Vulgate Numeri" \
      latn/ptt/lev.1 "Vulgate Leviticus" \
      latn/ptt/deu.1 "Vulgate Deuteronomium" \
    ${oname}

  # Comparison of the five books of the hebrew Pentateuch (Tanakh)
  oname="zipf-heot-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      hebr/tav/gen.1 "Tanakh Genesis" \
      hebr/tav/exo.1 "Tanakh Exodus" \
      hebr/tav/num.1 "Tanakh Numeri" \
      hebr/tav/lev.1 "Tanakh Leviticus" \
      hebr/tav/deu.1 "Tanakh Deuteronomium" \
    ${oname}
  
  # Comparison of the four books of the Vulgate New Testament:
  oname="zipf-lant-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      latn/nwt/mat.1 "Vulgate Matthew" \
      latn/nwt/mrk.1 "Vulgate Mark" \
      latn/nwt/luk.1 "Vulgate Luke" \
      latn/nwt/joh.1 "Vulgate John" \
    ${oname}
 
fi  

if [[ ${voyn} -ne 0 ]]; then
  echo "### Voynichese ###" 1>&2

  # Voynichese prose and labels:
  oname="zipf-voyn-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/tot.1 "VMS prose" \
      voyn/lab/tot.1 "VMS labels"    \
    ${oname}

  # Voynichese Herbal B, biology, and Stars 2
  oname="zipf-voyn-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/bio.1 "VMS Biology"   \
      voyn/prs/heb.1 "VMS Herbal B 1" \
      voyn/prs/str.2 "VMS Stars 2 - Prose"   \
    ${oname}

  # Voynichese Herbal A and Pharma 1, 2:
  oname="zipf-voyn-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/hea.1 "VMS Herbal A 1"   \
      voyn/prs/pha.1 "VMS Pharma 1 - Prose"   \
      voyn/prs/pha.2 "VMS Pharma 2 - Prose"  \
    ${oname}

  # Voynichese Herbal A and B:
  oname="zipf-voyn-3"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/hea.1 "VMS Herbal A 1"   \
      voyn/prs/heb.1 "VMS Herbal B 1"     \
    ${oname}

  # Voynichese Cosmo 2, 3:
  oname="zipf-voyn-4"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/cos.2 "VMS Cosmo 2 - Prose"  \
      voyn/prs/cos.3 "VMS Cosmo 3 - Prose"  \
    ${oname}

  # Voynichese Cosmo 1 prose:
  oname="zipf-voyn-5"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/cos.1 "VMS Cosmo 1 - Prose"  \
    ${oname}

  # Voynichese Miscellanea:
  oname="zipf-voyn-6"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/hea.2 "VMS Herbal A 2"  \
      voyn/prs/heb.2 "VMS Herbal B 2"  \
      voyn/prs/zod.1 "VMS Zodiac 1 - Prose"  \
      voyn/prs/str.1 "VMS Stars 1 - Prose"  \
    ${oname}

  # Voynichese Unknown sections 1,2,4-7:
  oname="zipf-voyn-7"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/unk.1 "VMS Unknown 1 - Prose"  \
      voyn/prs/unk.2 "VMS Unknown 2 - Prose"  \
      voyn/prs/unk.4 "VMS Unknown 4 - Prose"  \
      voyn/prs/unk.5 "VMS Unknown 5 - Prose"  \
      voyn/prs/unk.6 "VMS Unknown 6 - Prose"  \
      voyn/prs/unk.7 "VMS Unknown 7 - Prose"  \
    ${oname}

  # Voynichese Unknown section 3:
  oname="zipf-voyn-8"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyn/prs/unk.3 "VMS Unknown 3 - Prose"  \
    ${oname}

  # Voynichese Herbal A and German:
  oname="zipf-voyn-hea-euro-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/hea.1 "VMS Herbal A 1"  \
      germ/sim/tot.1 "German - Simplicissimus"  \
    ${oname}

  # Voynichese Herbal A and English:
  oname="zipf-voyn-hea-engl-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/hea.1 "VMS Herbal A 1"   \
      engl/cul/tot.1 "English - Culpeper Herbal"    \
    ${oname}

  # Voynichese Herbal B and Tibetan:
  oname="zipf-voyn-heb-tibe-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/heb.1 "VMS Herbal B 1"   \
      tibe/pmi/tot.1 "Tibetan - Illusion" \
      tibe/ccv/tot.1 "Tibetan - Comment" \
    ${oname}

fi

if [[ ${euro} -ne 0 ]]; then
  echo "### English ###" 1>&2

  # English, 17th and 19th centuries:
  oname="zipf-engl-0"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      engl/cul/tot.1 "Culpeper Herbal"  \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}

  # English, 15th, 17th, and 19th centuries:
  oname="zipf-engl-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}  \
      engl/twp/tot.1 "Towneley Plays"   \
      engl/cul/tot.1 "Culpeper Herbal"  \
      engl/wow/tot.1 "Wells War of Worlds"       \
    ${oname}

  # Bibles in Latin, Greek, Russian:
  oname="zipf-euro-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      latn/ptt/tot.1 "Latin Vulgate OT"    \
      grek/nwt/tot.1 "Greek Byzantine NT"    \
      russ/ptr/tot.1 "Russian Synodal OT"      \
    ${oname}

  # Novels in Spanish and Portuguese:
  oname="zipf-euro-3"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      span/qvi/one.1 "Spanish - Don Quixote"   \
      port/csm/tot.1 "Portug. - Dom Casmurro"   \
    ${oname}

  # Novels in European languages - German, Russian, French, Italian, Middle English:
  oname="zipf-euro-4"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      germ/sim/tot.1 "German - Simplicissimus"    \
      russ/pic/tot.1 "Russian - Roadside Picnic"   \
      fran/tal/tot.1 "French - Terre a la Lune"    \
      ital/psp/tot.1 "Italian - Promessi Sposi"   \
      engl/twp/tot.1 "M.English - Towneley Plays"   \
    ${oname}

  # Spanish, two novels by same author, 10 years apart:
  oname="zipf-span-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      span/qvi/one.1 "Don Quixote - Part I"  \
      span/qvi/two.1 "Don Quixote - Part II" \
    ${oname}

  # Russian, 20th novel and 19th bible:
  oname="zipf-russ-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      russ/pic/tot.1 "Roadside Picnic"  \
      russ/ptr/tot.1 "Synodal Pentateuch"      \
    ${oname}
fi

if [[ ${semi} -ne 0 ]]; then
  echo "### Semitic languages ###" 1>&2

  # Religious Geez, Hebrew, Arabic:
  oname="zipf-semi-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      geez/gok/tot.1 "Geez Glory of Kings"       \
      hebr/tav/tot.1 "Hebrew Torah "   \
      arab/quv/tot.1 "Arabic Quran"   \
    ${oname}

  # Arabic Quran with various spellings:
  oname="zipf-semi-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      arab/quv/tot.1 "Arabic Quran - vowels"         \
      arab/quf/tot.1 "Arabic Quran - vowels+sukuns"  \
      arab/qph/tot.1 "Arabic Quran - phonetic"       \
      arab/qcs/tot.1 "Arabic Quran - no vowels"      \
      arab/qud/tot.1 "Arabic Quran - devowelled"     \
    ${oname}

  # Arabic Quran with no vowels and devowelled:
  oname="zipf-semi-3"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      arab/qcs/tot.1 "Arabic Quran - no vowels"    \
      arab/qud/tot.1 "Arabic Quran - devowelled"   \
    ${oname}

  # Arabic Quran with vowels and devowelled:
  oname="zipf-semi-4"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      arab/quv/tot.1 "Arabic Quran - vowels"     \
      arab/qud/tot.1 "Arabic Quran - devowelled" \
    ${oname}

  # Hebrew Tanak with vowels and devowelled:
  oname="zipf-semi-5"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      hebr/tav/tot.1 "Hebrew Tanakh - vowels"   \
      hebr/tad/tot.1 "Hebrew Tanakh - devoweled"   \
    ${oname}
fi

if [[ ${asia} -ne 0 ]]; then
  echo "### Asian languages ###" 1>&2

  # Tibetan play, Chinese novel, Vietnamese:
  oname="zipf-asia-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      tibe/pmi/tot.1 "Tibetan - Illusion"      \
      chin/red/tot.1 "Chinese - Red Mansion"      \
      viet/ptt/tot.1 "Vietnamese - Cadman OT" \
    ${oname}

  # Three samples of Tibetan:
  oname="zipf-tibe-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      tibe/pmi/tot.1 "Tibetan - Illusion"   \
      tibe/vim/tot.1 "Tibetan - Vimalakirti"   \
      tibe/ccv/tot.1 "Tibetan - Comment"   \
    ${oname}

  # Various samples of Chinese:
  oname="zipf-chin-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      chin/red/tot.1 "Red Mansion"    \
      chin/ptt/tot.1 "Union OT - Pentateuch"  \
      chin/ptn/tot.1 "New Trans OT - Pentateuch"  \
      chin/voa/tot.1 "V. of Amer. (ideograms)"  \
      chip/voa/tot.1 "V. of Amer. (pinyin)"  \
    ${oname}

  # Two samples of Western translated into Vietnamese:
  oname="zipf-viet-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      viet/ptt/tot.1 "Cadman's OT - Pentateuch"  \
      viet/nwt/tot.1 "Catholic NT - Gospels" \
    ${oname}
fi

if [[ ${synt} -ne 0 ]]; then
  echo "### Synthetic and encrypted texts ###" 1>&2

  # Native Chinese in plain and in Roman Code:
  oname="zipf-chin-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      chin/red/tot.1 "Red Mansion - (ideograms)"      \
      chrc/red/tot.1 "Red Mansion - Roman codes" \
    ${oname}

  # English encoded in various ways: 
  oname="zipf-code-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}  \
      engl/wow/tot.1 "Wells WotW - plain"    \
      enrc/wow/tot.1 "Wells WotW - Roman code"   \
      envg/wow/tot.1 "Wells WotW - Vigenere"   \
    ${oname}

  # Dialects of Gruggish:
  oname="zipf-rugg"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyp/grs/tot.1 "Rugg's text (sfw)" \
      voyp/grm/tot.1 "Rugg's text (hand)" \
    ${oname}

  # Rugg's text (hand-produced) versus Voynichese:
  oname="zipf-rugg-voyn-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyp/grs/tot.1 "Rugg's text (sfw)" \
      voyn/prs/hea.1 "VMS Herbal A"    \
      voyn/prs/heb.1 "VMS Herbal B"   \
    ${oname}

  # Rugg's text (software simulated) versus Voynichese:
  oname="zipf-rugg-voyn-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyp/grm/tot.1 "Rugg's text (hand)"      \
      voyn/prs/hea.1 "VMS Herbal A" \
      voyn/prs/heb.1 "VMS Herbal B"  \
    ${oname}

  # Rugg's text (hand), Voynichese Biology, whole text:
  oname="zipf-rugg-voyn-3"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \
      voyp/grm/tot.1 "Rugg's text (hand)"      \
      voyn/prs/bio.1 "VMS Biology" \
      voyn/prs/tot.1 "VMS whole - prose"  \
    ${oname}

  # Rugg's text (hand), Voynichese Herbal A, and Greek NT:
  oname="zipf-voyn-hea-euro-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyp/grm/tot.1 "Rugg's text (hand)"    \
      voyn/prs/hea.1 "VMS Herbal A"   \
      grek/nwt/tot.1 "Greek Byzantine NT"    \
    ${oname}

  # Voynichese Herbal B, Rugg's text (sfw):
  oname="zipf-voyn-heb-rugg-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyn/prs/heb.1 "VMS Herbal B"   \
      voyp/grs/tot.1 "Rugg's text (sfw)" \
    ${oname}


  # Rugg's text (sfw), and Tibetan:
  oname="zipf-tibe-rugg-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      voyp/grs/tot.1 "Rugg's text (sfw)" \
      tibe/pmi/tot.1 "Tibetan - Illusion" \
      tibe/ccv/tot.1 "Tibetan - Comment" \
    ${oname}

  # Vietnamese and Monkey text:
  oname="zipf-monk-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      viet/ptt/tot.1 "Cadman's OT - Pentateuch"  \
      viep/mky/tot.1 "Monkey synth"       \
    ${oname}

  #   Tibetan and Monkey text:
  oname="zipf-monk-2"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}   \
      tibe/pmi/tot.1 "Tibetan - Illusion" \
      tibe/ccv/tot.1 "Tibetan - Comment" \
      viep/mky/tot.1 "Monkey synth"       \
    ${oname}

  # Three files of pseudo-Vietnamese:
  oname="zipf-viep-1"; echo "${oname}"
  compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show}    \
      viep/grs/tot.1 "Rugg's text (sfw)"  \
      viet/ptt/tot.1 "Cadman's OT - Pentateuch"  \
      viep/mky/tot.1 "Monkey synth" \
      envt/wow/tot.1 "Wells WotW - Viet code"    \
      engl/wow/tot.1 "Wells WotW - plain"      \
    ${oname}

fi