#! /bin/bash # Last edited on 2025-05-25 21:32:51 by stolfi cmd="${0##*/}" usage="${cmd} [ -sep {STRING} ] < {INFILE}" # Reads a file and splits it into zero or more separate files. # # Assumes that the input was created by "catsep -sep '{STRING}'". # Namely, each chunk must begin with its file name, on a line # by itself (optionally prefixed with "#FILE "), then the # file contents, then the separating {STRING} on a line by istelf. # # The default separator is a line of 70 "~"s. sep="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"; while [[ ( $# -gt 0 ) && ( "/$1" =~ /-* ) ]]; do echo "$1" 1>&2 if [[ ( $# -ge 2 ) && ( "/$1" == "/-sep" ) ]]; then sep="$2"; shift; shift; else echo "bad or incomplete option "'"'"$1"'"' 1>&2 echo "usage: ${usage}" 1>&2; exit 1; fi done tmp="/tmp/$$-splitsep" cat \ | gawk \ -v sep="${sep}" \ ' BEGIN { abort = -1; nlines = -1; f = ""; split("", seen); nfiles = 0; } ($0 == sep) { finish(f); nlines = -1; next; } (nlines == -1) { if ($0 ~ /^[#]FILE /) { f = $0; gsub(/^[#]FILE /, "", f); gsub(/^[ \011\240]+/, "", f); gsub(/[ \011\240]+$/, "", f); } else { f = sprintf("SPLITSEP.UNNAMED.%05d", nfiles); } start(f); next; } // { if (f == "") { data_error("mising file header line") } print > f; nlines++; } END { if (abort >= 0) { exit abort; } finish(f); printf "found %d files.\n", nfiles > "/dev/stderr" } function finish(f) { if (f != "") { if (nlines >= 0) { close(f); } if (nlines >= 0) { printf " %d lines.\n", nlines > "/dev/stderr"; } } } function start(f) { printf "%s ...", f > "/dev/stderr"; if ((f == "") || (f ~ /[ \011\240]/)) { data_error(("bad name [[" f "]]")); } if (f in seen) { data_error("dup file name"); } nlines = 0; nfiles ++; seen[f] = 1 } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr" printf " [[%s]]\n", $0 > "/dev/stderr" abort = 1; exit 1; } ' \ > ${tmp} if [[ -s ${tmp} ]]; then echo "bug - residual not empty" 1>&2 else /bin/rm -f ${tmp} fi