#! /usr/bin/gawk -f # Last edited on 2026-02-16 18:13:28 by stolfi # Used by {plot_pos_file.sh}. # # Reads a file with one line per position in the format "{LOC} {PLEN} # {WPOS} {WTYP} {WCOL}" where {LOC} is a locus ID string, {PLEN} and # {WPOS} are floats, {WTYP} is a point type. # # Client must define (with "-v") the variable{ yplot} which is the Y # coord of the lowest data subset. # # Outputs another file with fields "{LOC} {XPOS} {YPOS} {WTYP} {WCOL}" where # {XPOS} is {WPOS} shifted by some amount {SH}, {YPOS} is a # sequential integer starting with {yplot}, and {WTYP} is the input # {WTYP}. # # But also appends lines "{LOC} {XMIN} {YPOS} -1 {WCOL}" and "{LOC} {XMAX} {YPOS} -2 {WCOL}" # where {XMIN,XMAX} are {0,PLEN} shifted by the same amount {SH} as the # {WPOS}. # # The shift amount {SH} is minus the average of the {WPOS} which have {WTYP=1}, # over all consecutive lines with the same {LOC}. BEGIN { yplot += 0 cur_loc = ""; cur_plen = -1; split("", all_wpos); split("", all_wtyp); split("", all_wcol) npt = 0; } /^[ ]*([#]|$)/ { next; } // { if (NF != 5) { data_error("wrong NF") } loc = $1; plen = $2+0; wpos = $3 + 0; wtyp = $4 + 0; wcol = $5 if (loc != cur_loc) { finish_loc(); cur_loc = loc; cur_plen = plen; npt = 0; } if (plen != cur_plen) { data_error("inconsistent plen") } all_wpos[npt] = wpos; all_wtyp[npt] = wtyp; all_wcol[npt] = wcol; npt += 1 next } END { finish_loc() } function finish_loc( i,tot,avg) { if (cur_loc == "") { return; } if (npt < 1) { prog_error("no wpos?") } tot = 0; ntt = 0 for (i = 0; i < npt; i++) { if (all_wtyp[i] == 1) { tot += all_wpos[i]; ntt += 1 } } avg = tot/(ntt + 1.0e-100) printf ":: tot = %12.8f ntt = %d avg = %12.8f\n", tot, ntt, avg > "/dev/stderr" for (i = 0; i < npt; i++) { printf "%-12s %8.4f %5.1f %2d %s\n", \ cur_loc, all_wpos[i]-avg, yplot, all_wtyp[i], all_wcol[i] } printf "%-12s %8.4f %5.1f -1 0x777777\n", cur_loc, 0 - avg, yplot printf "%-12s %8.4f %5.1f -2 0x777777\n", cur_loc, cur_plen - avg, yplot printf "\n" yplot += 1 }