#! /bin/bash -eu
# Last edited on 2026-03-08 15:52:15 by stolfi


echo "counting percentage of parag heads as a function of daiin position ..." 1>&2
cat ${ofile} | sort > .aaa
cat ${rfile} | sort > .bbb
join -1 1 -2 1 -a 1 -a 2 -e '???' -o 0,1.2,2.2,2.3 .aaa .bbb > .temp
for p in $( count 0 20 ); do
  cat .temp | gawk -v p="$p" '($2+0 == p+0) { print }' > .lins
  # echo "~~ p = $p ~~" 1>&2
  # cat .lins 1>&2
  nline=$( cat .lins | wc -l )
  npara=$( cat .lins | egrep ' [%]' | wc -l )
  if [[ nline -gt 0 ]]; then
    pct=$( echo "100*${npara}/${nline}" | bc -lq )
  else
    pct=0 
  fi
  printf "%3d %4d %4d  %6.1f\n" $p $nline $npara $pct 1>&2
done

echo "listing low-index occurrences with dots ..." 1>&2
cat ${occ_file} \
  | gawk '($2 + 0 < 10) { print $1 }' \
  > .locs

cat ${ivt_file} \
  | fgrep -w -f .locs \
  | gawk \
      ' // { 
          loc = $1; text = $2;
          gsub(/[<>]/, "", loc); gsub(/[;][A-Z]+/, "", loc)
          gsub(/<%>[=]/, "% .", text)
          gsub(/^[=]/, "- .", text)
          gsub(/[,]/, "", text)
          gsub(/[-]/, ".", text)
          printf "%-12s %s\n", loc, text
        }
      ' \
  | sort -b -k3,3 \
  > ${sum_file}

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$



  "${temp_psize???}" using 2:(zer(0)) notitle with linespoints pt 1 ps 2.3 lw 2 lc rgb '#777777', \

