#! /bin/bash -e
# Last edited on 2026-01-12 16:59:12 by stolfi

# Reads a file from {stdin}.  On every line that begins with an old-style locator (locus ID)
# "<fXXX.YYY.ZZZ>", replaces that locator by the new-style one "<fXXX.WWW>"
# as determined by the old-to-new locator table. Other lines are 
# not modified. Writes the result to {stdout}.

table_file="loci-evmt16e6-ivtff.tbl"

gawk \
  -i error_funcs.gawk \
  -i read_table.gawk \
  -v table_file="${table_file}" \
  ' BEGIN{ split("", tbl); read_table(table_file, 0, tbl, 0); }
    /^[ ]*([#]|$)/ { print; next; }
    /^[<]f[0-9]+[rv][0-9]?[>]/ { print; next; }
    /^[<]f[0-9]+[rv][0-9]?[.][^.<>]*[>]/ { print; next; }
    (match($0, /^[<]f[0-9]+[rv][0-9]?[.][^.<>]*[.][^.<>]*[>]/)) {
      if (RSTART != 1) { prog_error("RSTART?"); }
      loc = substr($0, 1, RLENGTH);
      if (substr(loc, RLENGTH-2, 1) == ";") { 
        trc = substr(loc, RLENGTH-2, 2) 
        loc = (substr(loc, 1, RLENGTH-3) ">")
      } else { trc = "" }
      txt = substr($0, RLENGTH + 1);
      gsub(/^[ ]+/, "", txt); gsub(/[ ]+$/, "", txt);
      if (! (loc in tbl)) { data_error(("locus ID '" loc "' not in table")); }
      new = tbl[loc];
      if (trc != "") { new = (substr(new, 1, length(new)-1) trc ">"); }
      printf "%-18s %s\n", new, txt;
      next;
    }
    //{ print; next; }
  '
  
