# Last edited on 2026-01-17 22:18:51 by stolfi 092 Generating fractional word frequency lists per section In this note we generate tables of word frequency counts for each section ("hea", "heb", "zod", etc.) and each text type (parags, labels, etc.), but with fractional frequencies that take into accound dubious word spaces ",". SETUP ln -s ../.. work ln -s work/ivtff_frac_word_counts.py ln -s work/tabulate_frac_counts.py ln -s work/process_frac_words.py ln -s work/compute_freqs.gawk ln -s work/root_from_word_funcs.gawk ln -s work/ivt_loc_to_type.tbl ln -s work/error_funcs.gawk ln -s work/error_funcs.py ln -s work/process_funcs.py CREATING THE PER-SECTION-AND-TYPE TRANSCRIPTION AND WORD FREQ FILES Do it all: do_note_092.sh 25e1 #0 sec type lines JS RZ words roots #1 --- ------ ------ ------ ------ ------ ------ | bio glyphs 6 6 0 4 2 | cos glyphs 6 6 0 6 2 | hea glyphs 26 26 0 10 3 | unk glyphs 43 43 0 16 5 | bio labels 115 112 3 128 19 | cos labels 291 291 0 348 50 | hea labels 3 3 0 5 4 | pha labels 235 233 2 261 41 | zod labels 299 299 0 324 34 | bio parags 740 294 446 1564 71 | cos parags 174 174 0 713 45 | hea parags 1209 967 242 2993 117 | heb parags 373 265 108 1546 82 | pha parags 223 158 65 1092 73 | str parags 1082 1082 0 3866 168 | unk parags 305 182 123 1449 81 | cos radios 95 95 0 282 36 | cos titles 32 32 0 107 32 | hea titles 17 17 0 52 18 | heb titles 5 5 0 16 6 | pha titles 1 1 0 8 5 | str titles 3 3 0 13 4 | unk titles 25 25 0 48 12 | cos trings 43 43 0 906 86 | zod trings 36 36 0 736 73 COUNTING PUFFS ON HEAD AND BODY LINES Counting head and body lines with and without puffs: count_body_head_puffs.sh #0 body #1 ------------------------------------------------------------------- #2 sec nl nlp %lp nlt %lt nc ncp %cp nct %ct %cg #3 --- ----- ----- ---- ----- ---- ----- ----- ---- ----- ---- ---- | bio 700 90 12.8 679 96.9 29464 144 0.5 2797 9.5 10.0 | cos 129 17 13.2 101 78.2 3758 19 0.5 320 8.5 9.0 | hea 1042 78 7.5 991 95.0 31256 99 0.3 2972 9.5 9.8 | heb 322 58 18.0 320 99.3 14201 92 0.6 1548 10.9 11.5 | pha 190 15 7.9 188 98.8 9520 35 0.4 792 8.3 8.7 | str 751 44 5.9 727 96.7 37085 59 0.2 3458 9.3 9.5 | unk 272 61 22.4 267 98.1 13353 114 0.9 1297 9.7 10.6 #0 head #1 ------------------------------------------------------------------- #2 sec nl nlp %lp nlt %lt nc ncp %cp nct %ct %cg #3 --- ----- ----- ---- ----- ---- ----- ----- ---- ----- ---- ---- | bio 40 32 79.9 40 99.9 1757 73 4.2 160 9.1 13.2 | cos 45 24 53.3 38 84.3 1357 37 2.7 98 7.2 9.9 | hea 167 129 77.2 153 91.5 6307 289 4.6 391 6.2 10.8 | heb 51 47 92.0 49 96.0 2577 143 5.5 158 6.1 11.7 | pha 33 27 81.7 32 96.8 1850 62 3.3 127 6.9 10.2 | str 331 258 77.9 331 99.9 19671 621 3.2 1816 9.2 12.4 | unk 33 32 96.8 30 90.8 1726 112 6.5 91 5.3 11.7 >>> STOPPED HERE <<< for st = hea.parags heb.parags bio.parags bio.labels cos.trings cos.labels zod.trings zod.labels str.parags ; do sec="${st/.*/}" txty="${st/*./}"