#! /usr/bin/gawk -f # Last edited on 2010-12-28 20:06:10 by stolfilocal # THIS FILE IS IN ISO-Latin-1 (Pois é!) BEGIN \ { # Reads from standard input a complete ".dat" file generated by {extract-votes.gawk}. # IMPORTANT: the lines must be sorted by zone number and locality code. # Splits the input file into groups of lines with the same zone-locality pair. # Writes a separate ".dat" file for each group. # Parameters (define with "-v"): # {stcd} Two-letter code of state. # {ofnum} Numeric code of political office. # {eround} Election round ("t1" or "t2"). # {outdir} Name of top-level output directory. # The input ".dat" file should have one line for each and every # urn (ballot box), with the results of a specific election # round in a specific state and for a specific office. # The output files are called "{outdir}/{stcd}/{zon}-{loc}/{eround}-{ofnum}.dat" abort = -1; # Maximum number of real candidates (including BLANK and NULL): max_cands = 100; if (stcd !~/[A-Z][A-Z]/) { arg_error("[sdfz] missing or invalid {stcd} = \"" stcd "\""); } if (! is_num(ofnum,1,8)) { arg_error("[sdfz] missing or invalid {ofnum} = \"" ofnum "\""); } ofnum += 0; if (eround !~ /^(t1|t2)$/) { arg_error("[sdfz] missing or invalid {eround} = \"" eround "\""); } if (outdir == "") { arg_error("[sdfz] must define {outdir}"); } # Get table with locality names: split("", name_loc); # Indexed by zero-padded {loc}. tblname = ( "dados/" stcd "/localidades.tbl" ); read_table(tblname,0,name_loc); # Data for current zone number and locality code: oloc = ""; # Locality of current urn group, or "" if none yet. ozon = ""; # Zone of current urn group, or "" if none yet. ofile = ""; # Current output file, or "" if none yet. nvotes_zonloc = 0; # Total votes cast in zone-locality. nurns_zonloc = 0; # Number of records written to the current file. # Global counters: nurns_read = 0; # Number of input records seen. nvotes_read = 0; # Total number of cast votes in input file. nfiles_written = 0; # Number of output files (zonlocs) written. nurns_written = 0; # Total number of output records written. # Name prefix for temporary data files: tmpname = ( "/tmp/" PROCINFO["pid"] ); # Number of fields expected in ".dat" file (set from first record): num_fields = -1; printf "splitting %s %s %s to %s ...\n", stcd, ofnum, eround, outdir > "/dev/stderr"; } ( abort >= 0) { exit abort; } // \ { nurns_read ++; # We should have at least 2 cands besides BLANK and NULL: if ((NF < 9 + 4) || (NF > 9 + max_cands)) { data_error(("wrong number of fields " NF)); } # Check constancy of the number of fields: if ((FNR > 1) && (NF != num_fields)) { data_error(("wrong number of fields " NF " expected " num_fields)); } num_fields = NF; # Grab general fields: est=$1; zon=$2; sec=$3; lov=$4; loc=$5; car=$6; ele=$7; # Number of registered voters. aus=$8; # Number of absentees. pre=$9; # Number of votes cast ( { == ele-aus} ). # Check consistency of state code field: if (est != stcd) { data_error(("invalid {est} field = " est " should be " stcd)); } # Check formatting of zone and locality: if (zon !~ /^[0-9][0-9][0-9]$/) { data_error(("invalid {zon} field = \"" zon "\"")); } if (loc !~ /^[0-9][0-9][0-9][0-9][0-9]$/) { data_error(("invalid {loc} field = \"" loc "\"")); } # Check consistency of vote totals: if (! is_num(ele,0,999999999)) { data_error(("invalid {ele} = \"" ele "\"")); } if (! is_num(aus,0,ele)) { data_error(("invalid {aus} = \"" aus "\"")); } if (! is_num(pre,0,ele)) { data_error(("invalid {pre} = \"" pre "\"")); } if (pre + aus != ele) { data_error(("wrong vote counts ele = " ele " aus = " aus " pre = " pre)); } nvotes_read += pre; # Close current data file if zone or locality changed: if ((loc != oloc) || (zon != ozon)) { if (ofile != "") { if ((loc < oloc) || ((loc == oloc) && (zon < ozon))) { data_error(("input file is not properly sorted " ozon "." oloc " : " zon "." loc)); } close_zonloc_file(); } open_zonloc_file(zon, loc) } # Accumulate current record: print > ofile; nurns_written ++; nurns_zonloc ++; nvotes_zonloc += pre; next; } END \ { if ( abort >= 0) { exit abort; } # Dump current zone data if any: if (ofile != "") { close_zonloc_file(); } printf "\n" > "/dev/stderr"; printf "%8d urns read\n", nurns_read > "/dev/stderr"; printf "%8d urns written\n", nurns_written > "/dev/stderr"; printf "%8d zone-locality files created\n", nfiles_written > "/dev/stderr"; printf "%8d total votes in input file\n", nvotes_read > "/dev/stderr"; } function open_zonloc_file(zon,loc, zldir,locname) { # Clears all data for the current group: nurns_zonloc = 0; # Number of urns in group. nvotes_zonloc = 0; # Total cast votes in group. zldir = ( outdir "/" stcd "/" zon "." loc ); # Output directory for zonloc. # Make sure that output directory exists: # printf "creating %s ...\n", zldir > "/dev/stderr"; st = system("mkdir -p " zldir); if (st != 0) { prog_error(("failed to create directory \"" zldir "\"")); } ofile = ( zldir "/" eround "-" ofnum ".dat" ); oloc = loc; ozon = zon; printf "# Criado por split-dat-file-by-zon-loc.gawk\n" > ofile printf "# Codificação ISO-Latin-1\n" > ofile printf "# zona %s\n", zon > ofile # Get the locality name: if (! (loc in name_loc)) { data_error(("locality without name \"" loc "\"")); } locname = name_loc[loc]; gsub(/^[0-9]+[-_]*/, "", locname); printf "# localidade %s = %s\n", loc, locname > ofile printf "\n" > ofile } function close_zonloc_file( k,b,hname,hdatfile,lo,md,hi) { printf "# %8d urnas\n", nurns_zonloc > ofile; printf "# %8d votos\n", nvotes_zonloc > ofile; close(ofile); printf "%8d urns %8d votes %s\n", nurns_zonloc, nvotes_zonloc, ofile > "/dev/stderr"; nfiles_written++; } function read_table(fname,inv,tbl, ntbl,nlin,lin,fld,nfld,tmp) { ntbl=0; nlin=0; while((getline lin < fname) > 0) { nlin++; if (! match(lin, /^[ \011]*([\#]|$)/)) { nfld = split(lin, fld, " "); if ((nfld >= 3) && (fld[3] ~ /^[\#]/)) { nfld = 2; } if (nfld != 2) { tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); } # If {inv} is true, swap the two columns: if (inv) { tmp = fld[1]; fld[1] = fld[2]; fld[2] = tmp; } if (fld[1] in tbl) { tbl_error(fname, nlin, ("repeated key = \"" lin "\"")); } tbl[fld[1]] = fld[2]; ntbl++; } } if (ERRNO != "0") { tbl_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } # printf "loaded %6d map pairs\n", ntbl > "/dev/stderr" } function is_num(x,lo,hi) { if (x !~ /^[-]?[0-9]+$/) { return 0; }; if (x+0 < lo) { return 0; } if (x+0 > hi) { return 0; } return 1; } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; abort = 1; exit(abort); } function data_warning(msg) { printf "%s:%d: !! %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); } function prog_error(msg) { printf "** PROGRAM ERROR - %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); }