#! /bin/bash
# Last edited on 2024-03-30 18:17:15 by stolfi

# Usage: $0 <outprefix> <checkfiles>
# Collects all ".checked-good", ".checked-bad", and ".checked-dunno" word lists 
#   that result from splitting the given <checkfiles>, producing the
#   consolidated files <outprefix>.good, <outprefix>.dunno, <outprefix>.bad, 
#   and their unions <outprefix>.bad+good, <outprefix>.bad+dunno+good. 
# The <checkfiles> should be given in order of decreasing authority.
# Each file <f>.checked-good is compared against the union of the preceding 
#   .checked-bad lists and the current <f>.checked-bad; any conflicts are saved
#   in the file <f>.good-bugs.  Each file <f>.checked-bad is compared against 
#   the .checked-good lists.  processing stops after the first checkfile
#   with conflicts.

if [[ $# -ne 2 ]]; then
  echo "** usage: $0 <inprefix> <outprefix>" 1>&2
  exit 1
fi

inprefix="$1"
outprefix="$2"

for f in good bad dunno; do 1>&2
  echo '==============' "$f" '==============' 1>&2
  wc ${inprefix}*.$f 1>&2
  echo ' ' 1>&2
  cat ${inprefix}*.$f | sort -T . | uniq > ${outprefix}.$f
done

cat ${outprefix}.{good,bad,dunno} | sort | uniq > ${outprefix}.bdg

echo ' ' 1>&2
wc ${outprefix}.{bad,dunno,good,bdg}

dicio_list_dups_in_bad_dunno_good ${outprefix}
