#! /bin/bash
# Last edited on 2025-05-25 21:32:51 by stolfi

cmd="${0##*/}"
usage="${cmd} [ -sep {STRING} ]  < {INFILE}"

# Reads a file and splits it into zero or more separate files.
# 
# Assumes that the input was created by "catsep -sep '{STRING}'".
# Namely, each chunk must begin with its file name, on a line
# by itself (optionally prefixed with "#FILE "), then the
# file contents, then the separating {STRING} on a line by istelf.
#
# The default separator is a line of 70 "~"s.

sep="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
while [[ ( $# -gt 0 ) && ( "/$1" =~ /-* ) ]]; do 
  echo "$1" 1>&2
  if [[ ( $# -ge 2 ) && ( "/$1" == "/-sep" ) ]]; then
    sep="$2"; shift; shift;
  else
    echo "bad or incomplete option "'"'"$1"'"' 1>&2
    echo "usage: ${usage}" 1>&2; exit 1;
  fi
done

tmp="/tmp/$$-splitsep"

cat \
  | gawk \
      -v sep="${sep}" \
      ' BEGIN { 
          abort = -1; nlines = -1; f = ""; split("", seen); nfiles = 0;
        }
        ($0 == sep) { 
          finish(f);  nlines = -1;  next;
        }
        (nlines == -1) { 
          if ($0 ~ /^[#]FILE /) {
            f = $0;
            gsub(/^[#]FILE /, "", f);
            gsub(/^[ \011\240]+/, "", f);
            gsub(/[ \011\240]+$/, "", f);
          } else {
            f = sprintf("SPLITSEP.UNNAMED.%05d", nfiles);
          }
          start(f);
          next;
        }
        // {
          if (f == "") { data_error("mising file header line") }
          print > f; nlines++;
        }
        END { 
          if (abort >= 0) { exit abort; } 
          finish(f);
          printf "found %d files.\n", nfiles > "/dev/stderr"
        }
        function finish(f)
          { if (f != "")
              { if (nlines >= 0) { close(f); }
                if (nlines >= 0) { printf " %d lines.\n", nlines > "/dev/stderr"; }
              }
          }
        function start(f)
          { printf "%s ...", f > "/dev/stderr"; 
            if ((f == "") || (f ~ /[ \011\240]/))
              { data_error(("bad name [[" f "]]")); }
            if (f in seen) 
              { data_error("dup file name"); }
            nlines = 0;
            nfiles ++;
            seen[f] = 1
          }
        function data_error(msg) 
          { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"
            printf "  [[%s]]\n", $0 > "/dev/stderr"
            abort = 1; exit 1;
          }
      ' \
   > ${tmp}
   
if [[ -s ${tmp} ]]; then
  echo "bug - residual not empty" 1>&2
else
  /bin/rm -f ${tmp}
fi