#! /usr/bin/gawk -f # Last edited on 2021-11-26 20:47:06 by stolfi BEGIN { usage = ( \ "cat INFILE \\\n" \ " | extract_by_field.gawk \\\n" \ " [ -v field=FLDNUM ] \\\n" \ " -v table=TBLFILE \\\n" \ " > OUTFILE " \ ); # Reads each line from stdin and writes out all lines # such that a specified field has a value listed in a given # table. # The {field} parameters is "1" if not specified. # Each line of {TBLFILE} should have a single word. # The procedure will write to the output any line that # has field {field} equal to one of those words. Ignores "#"-comments and empty # lines in the table file. # In any case, input lines that are blank or begin with "#" are written # to the output without change. Otherwise, gives no other meaning to # the "#" character. abort = -1; if (table == "") { arg_error("must specify \"-v table=FILE\"\n"); } if (field == "") { field = 1; } split("", dic); read_table(table,dic); } (abort >= 0) { exit abort; } /^[#]/ { print; next; } /^ *$/ { print; next; } /./ { if (abort >= 0) { exit abort; } if (NF < field) { data_error("not enough input fields\n"); } x = $(field); if (x in dic) { print; } next; } function read_table(fname,tbl, ntbl,nlin,lin,fld,nfld,tmp) { ntbl=0; nlin=0; while((getline lin < fname) > 0) { nlin++; if (! match(lin, /^[ \011]*([#]|$)/)) { nfld = split(lin, fld, " "); if ((nfld >= 2) && (fld[2] ~ /^[#]/)) { nfld = 1; } if (nfld != 1) { tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); } if (fld[1] in tbl) { tbl_error(fname, nlin, ("repeated key = \"" lin "\"")); } tbl[fld[1]] = 1; ntbl++; } } if ((ERRNO != "0") && (ERRNO != "")) { tbl_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } # printf "loaded %6d key values\n", ntbl > "/dev/stderr" } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1 } function tbl_error(f,n,msg) { printf "%s:%d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 } function data_error(msg) { printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit 1 }