#! /usr/bin/gawk -f # Last edited on 1999-07-21 05:20:08 by stolfi # Reads a (symmetric) weight matrix M[i,j] and a list of permutations. # Finds the permutation(s) that minimize the sum over all j>i of # "M[p[i],p[j]]*(j-i-1)^power", for a given "power" (default 0). BEGIN { abort = -1; if (matrixFile == "") { error("must define \"matrixFile\""); } if (power == "") { power = 0; } read_matrix(matrixFile); precompute_power_table(nf); split("", pbest); nb = 0; sbest = 999999999; # "pbest[0..nb-1]" are the best permutations, and "sbest" is their weight. } (abort >= 0) { exit abort; } /^ *$/ { next; } /^[#]/ { next; } /./ { if (NF != 1) { error("bad NF"); } ps = $1; if (length(ps) != nf) { error("wrong length"); } s = compute_score(ps); if (s <= sbest) { if (s < sbest) { nb = 0; sbest = s; } pbest[nb] = ps; nb++; } } END { if (abort >= 0) { exit abort; } printf "score = %d\n", sbest; for (k=0; k<nb; k++) { printf "%s\n", pbest[k]; } } function read_matrix(file, lin,fld,nfld,i,j) { # defines the number of elements "nf", the element # codes "f[0..nf-1]", and the matrix "M" # (indexed with element codes). nf = 0; split("", f); split("", M); i = 0; while (getline lin < file) { if (match(lin, /^[#]/)) { continue; } if (match(lin, /^[- ]*$/)) { continue; } nfld = split(lin, fld); if (nf == 0) { # Assume it is a header line; fetch folio codes for (j=0; j<nfld; j++) { f[j] = fld[j+1]; } nf = nfld; } else { # Read line "i" of the matrix if (nfld != nf+1) { error((file ": bad num fields in line")); } if (fld[1] != f[i]) { error((file ": rows out of order?")); } for (j=0; j<nf; j++) { v = fld[j+2]; if ((v == ".")||(v == "-")) { v = 0; } if (v == "*") { v = 9999; } M[f[i],f[j]] = v; } i++; } } if (ERRNO != "0") { error((file ": " ERRNO)); } # Check if symmstric for(i=0; i<nf; i++) { for(j=i+1; j<nf; j++) { if (M[f[i],f[j]] != M[f[j],f[i]]) { error((file ": matrix is not symmetric")); } } } printf "nf = %d\n", nf > "/dev/stderr"; printf "f = " > "/dev/stderr"; for(j=0; j<nf; j++) { printf "%s", f[j] > "/dev/stderr"; } printf "\n" > "/dev/stderr"; } function precompute_power_table(nf, d,p) { # Precomputes a table of "pw[d] = d^power", with "d = 0..nf", # for use by compute_score below. By convention 0^0 here is 0 not 1. split("", pw); pw[0] = 0; for (d=1; d<= nf; d++) { if (power == 0) { p = 1; } else if (power == 1) { p = d; } else if (power == 2) { p = d*d; } else { p = exp(log(d)*power); } pw[d] = p; } } function compute_score(ps, i,j,sum,fi,fj) { # The score of a permuted matrix "MP" is the sum of # "MP[i,j]*(i-j-1)^power" for all elements outside the main # diagonal. Note that elements in the two adjacent sub-diagonals # have zero weight. sum = 0; for(i=0; i<nf; i++) { fi = substr(ps,i+1,1); for(j=i+2; j<nf; j++) { fj = substr(ps,j+1,1); # Note: adjacent elements are skipped, so d>0 here. if(! ((fi,fj) in M)) { error("bad code in perm"); } sum += M[fi,fj]*pw[j-i-1]; } } return(sum); } function error(msg) { printf "line %d: %s\n", NR, msg > "/dev/stderr"; abort = 1; exit 1; }