#! /bin/bash -ue
# Last edited on 2025-05-04 22:50:15 by stolfi

# We use the majority version of the interlinear.

echo "extracting raw text lines that have one-leg gallows ..." 1>&2
raw_text_dir="dat/voyn/maj"
raw_oneleg_lines=".oneleg_raw.lines"
( cd ${raw_text_dir}/ && egrep -e '[fp]' */raw.wdf  ) \
  | egrep -v -e '^tot' \
  > ${raw_oneleg_lines}
printf "%d lines with one-leg gallows\n"  "`cat ${raw_oneleg_lines} wc -l`" 1>&2
  
echo "counting one-leg-gallow lines by section ..." 1>&2
raw_oneleg_lines_per_sec_cts=".oneleg_raw_lines_per_sec.cts"
cat ${raw_oneleg_lines} \
  | sed -e 's@/raw.wdf:.*@@g' \
  | sort \
  | uniq -c \
  | sort -b -k1nr \
  > ${raw_oneleg_lines_per_sec_cts}
cat ${raw_oneleg_lines_per_sec_cts} 1>&2

echo "extracting lexemes with one-leg gallows and counting occs ..." 1>&2
raw_text_dir="dat/voyn/maj"
raw_oneleg_lexeme_cts=".oneleg_raw.wct"
cat ${raw_oneleg_lines} \
  | egrep -v -e '^tot' \
  | sed -e 's@^.*:@@g' \
  | tr ' ' '\012' \
  | egrep -e '[fp]' \
  | sort \
  | uniq -c \
  | sort -b -k1nr \
  > ${raw_oneleg_lexeme_cts}
printf "%d distinct lexemes with one-leg gallows\n" "`cat  ${raw_oneleg_lexeme_cts} | wc -l`"
head -n 10 ${raw_oneleg_lexeme_cts} 1>&2
echo "..." 1>&2
tail -n 10 ${raw_oneleg_lexeme_cts} 1>&2

echo "getting the interlinear locations of occs of those lexemes ..." 1>&2
raw_oneleg_lexemes=".oneleg.wds"
cat ${raw_oneleg_lexeme_cts} \
  | gawk '//{ printf " %s\n", $2 }' \
  | sort \
  > ${raw_oneleg_occs}
raw_oneleg_tokens=".oneleg_located.tlw"
( cd ${raw_text_dir}/ && cat raw.tlw | fgrep -f ${raw_oneleg_lexemes} ) \
  | gawk '//{ print $2, $3 }' \
  > ${raw_oneleg_occs}
  
exit 0

# Extract the lines of the interlinear where those lexemes occur:

(query-replace-regexp "^.*:" "" nil (use-region-beginning) (use-region-end) nil (use-region-noncontiguous-p))
" nil (use-region-beginning) (use-region-end) nil (use-region-noncontiguous-p))
(query-replace-regexp "  *" "
(shell-command-on-region 1 169759 "egrep -e '[fpFP]'" '(4) '(4) nil t)
(shell-command-on-region 1 31183 "sort | uniq -c" '(4) '(4) nil t)
(shell-command-on-region 1 19267 "sort -b -k1nr" '(4) '(4) nil t)
(write-file "/home/stolfi2/stolfi/projects/voynich/work/Notes/103/dat/voyn/maj/.oneleg.wct" t)
(kill-buffer ".oneleg.wct")
(shell-command-on-region 1 9523 "sort" '(4) '(4) nil t)
(find-file "/home/stolfi2/stolfi/projects/voynich/work/Notes/103/.." t)
(dired-create-directory "/home/stolfi2/stolfi/projects/voynich/work/Notes/112")
(switch-to-buffer "Notes<stolfi2>" nil 'force-same-window)
(find-file "~/projects/voynich/work/Notes/." t)
(shell-command "cp -av 103/Note-103.txt 112/Note-112.txt" nil nil)
(kill-buffer "Note-103.txt")
(find-file "~/projects/voynich/work/tr-stats" t)
(find-alternate-file "~/projects/voynich/tr-stats" t)
(find-alternate-file "~/projects/voynich/work/Notes/tr-stats" t)
(kill-buffer "tex")
(kill-buffer "dat<tr-stats>")
(kill-buffer "tot.1")
(kill-buffer "maj<tex>")
(kill-buffer "voyn<tex>")
(kill-buffer "tex")
(kill-buffer "tr-stats")
(string-rectangle 677 1097 "# ")
(find-file "~/projects/voynich/work/Notes/112/extract_one_leg_gallows.sh" t)
(kill-buffer "extract_one_leg_gallows.sh")
(find-file "~/projects/voynich/work/Notes/112/." t)
(apropos-command '("history") nil)
(execute-extended-command nil "command-history" "command-h")
(command-history)
(describe-function 'command-history)
(execute-extended-command nil "list-command-history" "list-command-history")
(describe-function 'list-command-history)
(execute-extended-command nil "command-history")
(command-history)
