#! /usr/bin/gawk -f # Last edited on 2008-06-11 00:14:20 by stolfi BEGIN { usage = ( "extract-EID-items -v itemList={FILE} < {INFILE} > {OUTFILE}" ); abort = -1; # Reads an EID file from {stdin}, writes selected items of it to {stdout}. # Assumes that each item starts with a line with the format # # > {ITEM_NAME} ... # # The items to be extracted are specified by the {itemList} file}. # It shoudl contain one {ITEM_NAME} per line. if (itemList == "") { arg_error("should defeine \"itemList\""); } # Parsing state (reset by {beg_item}): takeItem = 0; # TRUE if the current item is to be extracted. # Item table split("", ok); # {ok[id]} is defined iff item {id} is to be extracted. split("", ct); # {ct[id]} is how many times item {it} was seen. read_item_list(itemList,ok,ct); # Global state: nRead = 0; # Number of items read. nWritten = 0; # Number of items written. nLinesRead = 0; # Number of lines read. nLinesWritten = 0; # Number of lines written. } (abort >= 0) { exit abort; } /^[>]/ { nRead ++; nLinesRead ++; id = $2; if (id in ok) { if (ct[id] > 0) { data_error(("repeated item \"" id "\"")); } ct[id] ++; takeItem = 1; print; nWritten ++; nLinesWritten ++; } else { takeItem = 0; } next; } // { nLinesRead ++; if (takeItem) { if (match($0, /^CDS_incomplete$/)) { printf "%s:%s: !! line discarded «%s»\n", FILENAME, FNR, $0 > "/dev/stderr"; } else { print; nLinesWritten ++; } } } END { if (abort >= 0) { exit abort; } printf "items read = %10d written = %10d\n", nRead, nWritten > "/dev/stderr"; printf "lines read = %10d written = %10d\n", nLinesRead, nLinesWritten > "/dev/stderr"; } function read_item_list(fname,ok,ct, ntbl,nlin,lin,id) { ntbl=0; nlin=0; while((getline lin < fname) > 0) { nlin++; gsub(/[ ]*[\#].*$/, "", lin); gsub(/^[ ]+/, "", lin); if (lin != "") { id = lin; if (match(id, /[^-A-Za-z0-9_]/)) { tbl_error(fname, nlin, ("bad item id = \"" id "\"")); } if (id in ok) { tbl_error(fname, nlin, ("repeated item id = \"" id "\"")); } ok[id] = 1; ct[id] = 0; ntbl++; } } if (ERRNO != "0") { tbl_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } # printf "loaded %6d map pairs\n", ntbl > "/dev/stderr" } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; printf "usage: %s\n", USAGE > "/dev/stderr"; abort = 1; exit abort; } function tbl_error(f,n,msg) { printf "%s:%d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 } function data_error(msg) { printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit 1 }