# Attributes of samples and subsections # Last edited on 2023-05-10 09:57:16 by stolfi # # Fields are # # SUBDIR SOURCE GUDNUM KEY DESCR # # where # # SUBDIR is LANG/BUK/TAG.N (in "dat" directory). # SOURCE is LANG/BUK (in "langbank" directory). # GUDNUM is number of good words to take in this section. # KEY is a short id for plot keys. # DESCR is a documentation string. # # The last section of any sample *must* be "tot.1". #---------------------------------------------------------------------- # Modern English in ISO Latin-1: engl/wow/tot.1 engl/wow 35027 WoW War_of_the_Worlds engl/wnm/tot.1 engl/wow 1003 WoW-N War_of_the_Worlds_-_Names #---------------------------------------------------------------------- # 1600's English in ISO Latin-1: engl/cul/pre.1 - 2700 CpH-P Culpeper's_Herbal_-_Preamble engl/cul/her.1 - 25177 CpH-H Culpeper's_Herbal_-_Herbs engl/cul/rec.1 - 7150 CpH-R Culpeper's_Herbal_-_Recipes engl/cul/tot.1 engl/cul 9999999 CpH Culpeper's_Herbal engl/cpn/tot.1 engl/cul 1003 CpH-N Culpeper's_Herbal_-_Plant_Names #---------------------------------------------------------------------- # Middle English (1460) in ISO Latin-1: engl/twp/tot.1 engl/twp 35027 TwP Towneley_Plays #---------------------------------------------------------------------- # Early Church Latin in ISO Latin-1: latn/ptt/gen.1 - 9118 VOT-1 Vulgate_OT_Genesis latn/ptt/exo.1 - 7254 VOT-2 Vulgate_OT_Exodus latn/ptt/num.1 - 4981 VOT-3 Vulgate_OT_Numeri latn/ptt/lev.1 - 6984 VOT-4 Vulgate_OT_Leviticus latn/ptt/deu.1 - 6690 VOT-5 Vulgate_OT_Deuteronomium latn/ptt/tot.1 latn/ptt 9999999 VOT-P Vulgate_OT_Pentateuch latn/nwt/mat.1 - 9798 VNT-1 Vulgate_NT_Matthew latn/nwt/mrk.1 - 6130 VNT-2 Vulgate_NT_Mark latn/nwt/luk.1 - 10736 VNT-3 Vulgate_NT_Luke latn/nwt/joh.1 - 8363 VNT-4 Vulgate_NT_John latn/nwt/tot.1 latn/nwt 9999999 VNT-G Vulgate_NT_Gospels #---------------------------------------------------------------------- # 1300's Academic Latin in ISO Latin-1: latn/ock/tot.1 latn/ock 35027 Ock Ockam's_Dialogus #---------------------------------------------------------------------- # Early Church Greek (Byzantine Koiné) in JSGR: grek/nwt/mat.1 - 9921 BNT-1 Byzantine_NT_Matthew grek/nwt/mrk.1 - 6156 BNT-2 Byzantine_NT_Mark grek/nwt/luk.1 - 10525 BNT-3 Byzantine_NT_Luke grek/nwt/joh.1 - 8425 BNT-4 Byzantine_NT_John grek/nwt/tot.1 grek/nwt 9999999 BNT-G Byzantine_NT_Gospels #---------------------------------------------------------------------- # 1600's Spanish in ISO Latin-1: span/qvi/one.1 span/qvi 35027 DQux-1 Don_Quixote Part I span/qvi/two.1 span/qvi 35027 DQux-2 Don_Quixote Part II span/qvi/tot.1 span/qvi 9999999 DQux Don_Quixote #---------------------------------------------------------------------- # Modern Italian in ISO Latin-1: ital/psp/tot.1 ital/psp 35027 PrSp Promessi_Sposi #---------------------------------------------------------------------- # Modern French in ISO Latin-1: fran/tal/tot.1 fran/tal 35027 TrLn De_la_Terre_a_la_lune #---------------------------------------------------------------------- # Modern Portuguese in ISO Latin-1: port/csm/tot.1 port/csm 35027 DCsm Dom_Casmurro #---------------------------------------------------------------------- # Modernized German in ISO Latin-1: germ/sim/tot.1 germ/sim 35027 Simp Simplicissimus_Teutsch #---------------------------------------------------------------------- # Russian transliterated into Latin-1: russ/pic/tot.1 russ/pic 35027 RPic Piknik_na_obochine #---------------------------------------------------------------------- # Russian in KOI8-R: russ/ptt/gen.1 - 8910 SOT-1 Synodal_OT_Genesis russ/ptt/exo.1 - 7192 SOT-2 Synodal_OT_Exodus russ/ptt/num.1 - 7057 SOT-3 Synodal_OT_Numeri russ/ptt/lev.1 - 5294 SOT-4 Synodal_OT_Leviticus russ/ptt/deu.1 - 6574 SOT-5 Synodal_OT_Deuteronomy russ/ptt/tot.1 russ/ptt 9999999 SOT-P Synodal_OT_Pentateuch #---------------------------------------------------------------------- # Classical Arabic in JSAR arab/quf/tot.1 arab/quv 35027 Qur-F Holy_Quran_-_Vowels_Sukuns arab/quv/tot.1 arab/quv 35027 Qur-V Holy_Quran_-_Vowels arab/qud/tot.1 arab/quv 35027 Qur-D Holy_Quran_-_Devowelled arab/qph/tot.1 arab/qph 35027 Qur-P Holy_Quran_-_Semi-Phonetic arab/qcs/tot.1 arab/qcs 35027 Qur Holy_Quran_-_Consonants #---------------------------------------------------------------------- # Biblical Hebrew in JSHB hebr/tav/tot.1 hebr/tan 35027 HbB-V Hebrew_Bible_-_Pentateuch_-_Vowels hebr/tad/tot.1 hebr/tan 35027 HbB-D Hebrew_Bible_-_Pentateuch_-_Devowelled #---------------------------------------------------------------------- # Ge`ez in SERA geez/gok/tot.1 geez/gok 35027 GoK Glory_of_the_Kings geez/eno/tot.1 geez/eno 35027 GoK First_Book_of_Enoch #---------------------------------------------------------------------- # Vietnamese in VIQR viet/ptt/gen.1 - 8698 COT-1 Cadman_OT_Genesis viet/ptt/exo.1 - 6974 COT-2 Cadman_OT_Exodus viet/ptt/num.1 - 7665 COT-3 Cadman_OT_Numeri viet/ptt/lev.1 - 5200 COT-4 Cadman_OT_Leviticus viet/ptt/deu.1 - 6480 COT-5 Cadman_OT_Deuteronomy viet/ptt/tot.1 viet/ptt 9999999 COT-P Cadman_OT_Pentateuch viet/nwt/mat.1 - 9857 CNT-1 Catholic_NT_Matthew viet/nwt/mrk.1 - 6117 CNT-2 Catholic_NT_Mark viet/nwt/luk.1 - 10636 CNT-3 Catholic_NT_Luke viet/nwt/jhn.1 - 8417 CNT-4 Catholic_NT_John viet/nwt/tot.1 viet/nwt 9999999 CNT-G Catholic_NT_Gospels #---------------------------------------------------------------------- # Modern Bible Mandarin Chinese in ideograms (Guo Biao) chin/ptt/gen.1 - 9056 UOT-1 Union_OT_Genesis chin/ptt/exo.1 - 7082 UOT-2 Union_OT_Exodus chin/ptt/num.1 - 7401 UOT-3 Union_OT_Numeri chin/ptt/lev.1 - 5161 UOT-4 Union_OT_Leviticus chin/ptt/deu.1 - 6327 UOT-5 Union_OT_Deuteronomy chin/ptt/tot.1 chin/ptt 9999999 UOT-P Union_OT_Pentateuch chin/ptn/gen.1 - 8933 NOT-1 NewTrans_OT_Genesis chin/ptn/exo.1 - 7276 NOT-2 NewTrans_OT_Exodus chin/ptn/num.1 - 7210 NOT-3 NewTrans_OT_Numeri chin/ptn/lev.1 - 5199 NOT-4 NewTrans_OT_Leviticus chin/ptn/deu.1 - 6409 NOT-5 NewTrans_OT_Deuteronomy chin/ptn/tot.1 chin/ptn 9999999 NOT-P NewTrans_OT_Pentateuch #---------------------------------------------------------------------- # 1700's Mandarin Chinese in ideograms (Guo Biao) chin/red/tot.1 chin/red 35027 Red Dream_of_Red_Mansion #---------------------------------------------------------------------- # Modern News Mandarin Chinese in ideograms (Guo Biao) chin/voa/tot.1 chin/voa 35027 VoA-G Voice_of_America_News_-_Ideograms #---------------------------------------------------------------------- # Modern News Mandarin Chinese in Pinyin chip/voa/tot.1 chip/voa 35027 VoA-P Voice_of_America_News_-_Pinyin #---------------------------------------------------------------------- # 600's Tibetan in ACIP-JS tibe/vim/tot.1 tibe/vim 35027 Vim Vimalakirti_Sutra tibe/ccv/tot.1 tibe/ccv 35027 CVR Comm_on_Comm_on_Valid_Reasoning #---------------------------------------------------------------------- # Modern Tibetan in ACIP-JS tibe/pmi/tot.1 tibe/pmi 35027 PMI Play_of_Mistaken_Illusion #---------------------------------------------------------------------- # Chinese encoded as "Roman-Voynichese" Numerals chrc/red/tot.1 chin/red 35027 Red-X Dream_of_Red_Mansion_-_Roman_Code #---------------------------------------------------------------------- # English words encoded as Roman Numerals enrc/wow/tot.1 engl/wow 35027 WoW-X War_of_the_Worlds_-_Roman_Code #---------------------------------------------------------------------- # English words encoded as Vietnamese syllables # Not enough syllables for that! envt/wow/tot.1 engl/wow 35027 WoW-Y War_of_the_Worlds_-_Vietnamese_Code #---------------------------------------------------------------------- # English words in Vigenère cipher envg/wow/tot.1 engl/wow 35027 WoW-Z War_of_the_Worlds_-_Vigenere_Cipher #---------------------------------------------------------------------- # Gordon Rugg's Pseudo-Voynichese in EVA voyp/grs/tot.1 voyp/grs 35027 Rug-S Rugg's_Pseudo-Voynichese_-_Software voyp/grm/tot.1 voyp/grm 35027 Rug-M Rugg's_Pseudo-Voynichese_-_Manual #---------------------------------------------------------------------- # Pseudo-Vietnamese by Rugg's method viep/grs/tot.1 viep/grs 35027 Rug-V Rugg's_Pseudo_Vietnamese_-_Software #---------------------------------------------------------------------- # Pseudo-Vietnamese by first-order Monkey viep/mky/tot.1 viep/mky 35027 Mky-V Monkey_Vietnamese_-_Software