#! /bin/bash -ue
# Last edited on 2025-05-04 22:48:18 by stolfi

# Truncates the given {tlwFiles} to the same proportion {frac}, concatenates the results,
# and truncates it after {maxAlpha} "a" tokens.

# The fraction {frac} is chosen so that the total number of "a" tokens in the concatenated pieces
# is at least {maxAlpha}.  However if the total number of "a" tokens in the input files is
# {maxAlpha} or less, no truncation is done, and the files are simply concatenated.

maxAlpha="$1"; shift;
tlwFiles=( "$@" )

# Determine the total number of "a" tokens:
totAlpha=`cat ${tlwFiles[@]} | egrep -e '^a' | wc -l`

if [[ ${totAlpha} -le ${maxAlpha} ]]; then
  # No need to truncate:
  echo "    no truncation" 1>&2 
  cat ${tlwFiles[@]}
else
  ( \
    for ff in ${tlwFiles[@]} ; do
      # Count "a" tokens in file:
      nAlpha=`cat ${ff} | egrep -e '^a' | wc -l`
      nTrunc=`echo "n=${nAlpha}; m=${maxAlpha}; t=${totAlpha}; (n*m + t - 1)/t" | bc -lq`
      nTrunc=`printf "%.0f" "${nTrunc}"`
      echo "      ${ff}: taking ${nTrunc} from ${nAlpha} good tokens" 1>&2
      if [[ ${nTrunc} -ge ${nAlpha} ]]; then
        # No truncation for this file:
        cat ${ff}
      else
        cat ${ff} | truncate_tlw_file.gawk -v maxAlpha=${nTrunc}
      fi \
    done \
  ) \
    | truncate_tlw_file.gawk -v maxAlpha=${maxAlpha}
fi
