#! /bin/gawk -f # Last edited on 2007-01-20 11:58:07 by stolfi BEGIN { PROG_NAME = "pick-newest-versions"; PROG_DESC = "reads a list of files, outputs the newest versions only"; PROG_HELP = ( \ "${PROG_NAME} < {INFILE} > {OUTFILE}" \ ); PROG_INFO = ( \ "\nNAME" \ "\n ${PROG_NAME} - ${PROG_DESC}." \ "\n" \ "\nSYNOPSIS" \ "\n ${PROG_HELP[@]}" \ "\n" \ "\nDESCRIPTION" \ "\n Reads from stdin a list of files, one per line, in the format" \ "\n \"{SIZE} {MODTIME} {DIR} {AME}\", where {SIZE} is the file's" \ "\n size in bytes, {MODTIME} is its modification time in the format" \ "\n \"{YYYY}-{mm}-{dd}-{HH}{MM}{SS}\", {DIR} is the file's parent" \ "\n directory, and {NAME} is the filename minus the directory." \ "\n The lines must be sorted by {NAME} and then by {MODTIME}." \ "\n" \ "\n The input file can be obtained by piping the output of" \ "\n {find-all-files-size-date} through " \ "\n {sed sed -e 's:/\\([^/]*\\)$:/ \\1:g' | sort -k4,4 -k2,3}." \ "\n" \ "\n The program writes to standard output one line for each distinct" \ "\n {NAME}, namely the input line for that name with the highest" \ "\n (most recent) {MODTIME}." \ "\n" \ "\nOPTIONS" \ "\n None yet." \ "\n" \ "\nSEE ALSO" \ "\n find-all-files-size-date(1)" \ "\nAUTHOR" \ "\n Created 2007-01-20 by Jorge Stolfi, Unicamp" \ ); # Should check for leftover arguments... abort = -1; nread = 0; nwritten = 0; } # Cleanup: // { gsub(/[\t\v\r]/, "", $0); } # Ignore comments and blank lines: /^[ ]*($|[\#])/ { next; } // { modtime = $2; dir = $3; name = $4; if (nread > 0) { if (name != oname) { print olin; oname = name; nwritten++; } else if (modtime < omodtime) { data_error(("out of order " modtime " " omodtime)); } } olin = $0; omodtime = modtime; oname = name; nread++; next; } END { if (abort >= 0) { exit (abort); } if (nread > 0) { print olin; } printf "%d files read, %d files written\n", nread, nwritten > "/dev/stderr"; } function data_error(msg) { printf "stdin:%d: **%s\n", FNR, msg > "/dev/stderr"; abort = 1; exit (abort); }