#! /usr/bin/gawk -f # Last edited on 2019-03-15 19:44:10 by stolfilocal # Reads from "stdin" a log file as produced by "fetch-student-files". # Outputs a list of the form "{URLDIR} {FILEDATE} {TPNAME}/{RA}/{FILENAME}" # where {{URLDIR} is the URL directory # in students.ic.unicamp.br where the file was fetched from, # {FILEDATE} is the file's date as shown by {ls -l}. # For the file "main.pov", shows the date before the camera/lights cleanup. # Also lists the file "main-orig.pov" which should be a copy of "main.pov" # created before the camera cleanup. BEGIN { ra = ""; this_yr = strftime("%Y"); n = split("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec", month_tag); split("", month_num); for (i = 1; i <= n; i++) { month_num[month_tag[i]] = sprintf("%02d", i); } split("",fetched_mod); # Modification date (ISO format). Indexed by file name sans dir. local_dir = "???"; wget_dir="???"; } /^[=][=][=][ ][0-9]+[ ][=]/ { # Start of a new student if (local_dir != "???") { flush_files(); } header_ra = $2; header_usr = $4; # printf " header_ra = %s header_usr = %s\n", header_ra, header_usr, $0 > "/dev/stderr"; print > "/dev/stderr"; next; } /^ln: creating symbolic link.*[\/]/ { next; } /^fetching http:/ { # Header line created by {fetch-student-files.sh} before {wget}: url = $2; # URL of remote directory. lod = $4; # Local directory. wget_dir = url; # Remote wget directory (global var). gsub(/^http:\/\/www.students.ic.unicamp.br\//, "", wget_dir); gsub(/\/[*]$/, "", wget_dir); k = index(wget_dir, "/"); if (header_usr != substr(wget_dir, 2, k - 2)) { printf " ** BUG header_usr = %s wget_dir = %s «%s»\n", header_usr, wget_dir, $0 > "/dev/stderr"; } local_dir = lod; # Local directory (global var). gsub(/\/$/, "", local_dir); if (header_ra != substr(local_dir, length(local_dir)-5)) { printf " ** BUG header_ra = %s local_dir = %s «%s»\n", header_ra, local_dir, $0 > "/dev/stderr"; } # printf " wget_dir = %s local_dir = %s\n", wget_dir, local_dir, $0 > "/dev/stderr"; next; } /^PATH = / { next; } /^cmd = / { next; } /^http:[\/][\/][^ ]+[:] *$/ { next; } /^[`][a-z]+[0-9]+\/[a-z]+\/wget-log.txt['] ->/ { next; } /^mkdir: created/ { next; } /^[-0-9]+ [:0-9]+ ERROR 404:/ { next; } /^fetching files main.pov main.png/ { next; } /^[*][*] file.*was not fetched/ { next; } /^[*][*] file.*already exists, not fetched/ { next; } /^removed / { next; } /^removing camera and lights/ { next; } /^fixing files/ { next; } /^Fixing / { next; } /^Done [()]see / { next; } /^done[.]/ { next; } /^[-0-9]+ [:0-9]+ URL:[^ ]* \[[0-9]+\/[0-9]+\] -> / { # File transfer line from {wget}: if (NF != 7) { printf " ** BUG NF = %s «%s»\n", NF, $0 > "/dev/stderr"; } url = $3; # URL of remote file (with "URL:"). fot = $6; # Filename where file was stored (with quotes). gsub(/^["]/, "", fot); gsub(/["]$/, "", fot); nfot = length(fot); if (local_dir != substr(fot, 1, length(local_dir))) { printf " ** BUG fot = %s local_dir = %s «%s»\n", fot, local_dir, $0 > "/dev/stderr"; } fwg = url; # Remote filename (with "~{USER}") gsub(/URL:http:[^ ~]+[~]/, "~", fwg); nfwg = length(fwg); if (wget_dir != substr(fwg, 1, length(wget_dir))) { printf " ** BUG fwg = %s wget_dir = %s «%s»\n", fwg, wget_dir, $0 > "/dev/stderr"; } fna = substr(fwg, 2 + length(wget_dir)); # File name sans dir. if (fna != substr(fot, 2 + length(local_dir))) { printf " ** BUG fwg = %s fot = %s «%s»\n", fwg, fot, $0 > "/dev/stderr"; } if (fna in fetched_mod) { printf " ** BUG repeated file %s «%s»\n", fna, $0 > "/dev/stderr"; } # printf " fna = %s\n", fna > "/dev/stderr"; fetched_mod[fna] = "???"; # for now. next; } /[-][-rwx][.]?+[ \t][0-9]+[ \t][a-z0-9]+[ \t][a-z0-9]+[ \t]/ { # Output line of "ls -l": ma = $6; dy = $7; hm = $8; # Month (alpha), day, time as in "ls -l". flo = $9; # Local filename (with directory). mn = month_num[ma]; # Numeric month. if (hm ~ /^[0-9][0-9][:][0-9][0-9]$/) { yr = this_yr; gsub(/[:]/, "", hm); hm = (hm "00"); } else if (hm ~ /^[0-9][0-9][:][0-9][0-9][:][0-9][0-9]$/) { yr = this_yr; gsub(/[:]/, "", hm); } else if (hm ~ /^[12][09][0189][0-9]$/) { yr = hm; hm = "000000"; } else { printf " ** BUG hm = %s «%s»\n", hm, $0 > "/dev/stderr"; } mod = sprintf("%s-%02d-%02d-%s", yr, mn, dy, hm); if (local_dir != substr(flo, 1, length(local_dir))) { printf " ** BUG flo = %s local_dir = %s «%s»\n", flo, local_dir, $0 > "/dev/stderr"; } fna = flo; # File name sans directory and sans "-orig". gsub(/^.*[\/]/, "", fna); gsub(/-orig/, "", fna); if (! (fna in fetched_mod)) { printf " ** BUG surprise file fna = %s «%s»\n", fna, $0 > "/dev/stderr"; } else if ((fetched_mod[fna] != "???") && (fetched_mod[fna] != mod)) { printf " ** BUG dup ls for file fna = %s mod = %s «%s»\n", fna, fetched_mod[fna], $0 > "/dev/stderr"; } else { fetched_mod[fna] = mod; } # printf " fna = %s mod = %s\n", fna, mod > "/dev/stderr"; next } /^copying [a-z]+[0-9]+\/[0-9]+\/main.(png|pov) ->/ { # Copying original file to saved ("-orig") file: fno = $2; # Original file name. fnn = $4; # Saved file name. # The saved name must have "-orig" appended to their name: k = index(fnn, "-orig"); if (k == 0) { printf " ** BUG bad save name fno = %s fnn = %s «%s»\n", fno, fnn, $0 > "/dev/stderr"; } fnx = fnn; # Saved name minus "-orig". gsub(/[-]orig[.]/, ".", fnx); if (fnx != fno) { printf " ** BUG save name mismatch fno = %s fnn = %s «%s»\n", fno, fnn, $0 > "/dev/stderr"; } next; } // { printf " ** IGNORED «%s»\n",$0 > "/dev/stderr"; } END{ if (local_dir != "???") { flush_files(); } printf "Done.\n" > "/dev/stderr"; } function flush_files( f) { # Print and clear the files in {fetched_mod} at end of a student record. # Uses and clears {wget_dir} and {local_dir}. for (f in fetched_mod) { printf "%-30s %s %s/%s\n", wget_dir, fetched_mod[f], local_dir, f; } split("", fetched_mod); wget_dir="???"; local_dir="???"; }