#! /usr/bin/gawk -f

# Recoding an interlinear file from the FSG alphabet to 
# my Lossy Ad-hoc Semi-Analytic Fault-Tolerant encoding

BEGIN {
  print "# Output of fsg2hop - Stolfi's Semi-Analytic Fault-Tolerant alphabet"
}

/^ *$/ { print; next }
/^ *#/ { print; next }
/^<[^>.;]*>/ { print; next }

/^<[^>]*\.[^>]*;[A-Z]> / {
  curtxt = substr($0,20)
  
  # We discard  "%" and "!" since the conversion
  # will destroy synchronism anyway.
  gsub(/[%!]/, "", curtxt);
  
  # First, the conversion from FSG to JSA (Stolfi's super-analytic)
  gsub(/IIIK/, "iiiij",  curtxt);
  gsub(/IIIL/, "iiiiu",  curtxt);
  gsub(/IIIR/, "iiiis",  curtxt);
  gsub(/IIIE/, "iiiix",  curtxt);
  gsub(/IIE/,  "iiix",   curtxt);
  gsub(/IIR/,  "iiis",   curtxt);
  gsub(/IIK/,  "iiij",   curtxt);
  gsub(/HZ/,   "cqjc",   curtxt);
  gsub(/PZ/,   "cqgc",   curtxt);
  gsub(/DZ/,   "cljc",   curtxt);
  gsub(/FZ/,   "clgc",   curtxt);
  gsub(/IE/,   "iix",    curtxt);
  gsub(/IR/,   "iis",    curtxt);
  gsub(/IK/,   "iij",    curtxt);
  gsub(/2/,    "cs",     curtxt);
  gsub(/4/,    "q",      curtxt);
  gsub(/6/,    "cj",     curtxt);
  gsub(/7/,    "ig",     curtxt);
  gsub(/8/,    "cg",     curtxt);
  gsub(/A/,    "ci",     curtxt);
  gsub(/C/,    "c",      curtxt);
  gsub(/D/,    "lj",     curtxt);
  gsub(/E/,    "ix",     curtxt);
  gsub(/F/,    "lg",     curtxt);
  gsub(/G/,    "cy",     curtxt);
  gsub(/H/,    "qj",     curtxt);
  gsub(/I/,    "i",      curtxt);
  gsub(/K/,    "ij",     curtxt);
  gsub(/L/,    "iu",     curtxt);
  gsub(/M/,    "iiiu",   curtxt);
  gsub(/N/,    "iiu",    curtxt);
  gsub(/O/,    "o",      curtxt);
  gsub(/P/,    "qg",     curtxt);
  gsub(/R/,    "is",     curtxt);
  gsub(/S/,    "csc",    curtxt);
  gsub(/T/,    "cc",     curtxt);
  gsub(/V/,    "?",      curtxt);
  gsub(/Y/,    "?",      curtxt);
  
  # Now, the conversion from JSA to HOP:
  
  gsub(/[ql]j/, "H",     curtxt);
  gsub(/[ql]g/, "P",     curtxt);
  gsub(/cs/,    "z",     curtxt);
  gsub(/ij/,    "k",     curtxt);
  gsub(/ix/,    "e",     curtxt);
  gsub(/is/,    "r",     curtxt);
  gsub(/iiu/,   "n",     curtxt);
  gsub(/y/,     "i",     curtxt);
  gsub(/ci/,    "a",     curtxt);
  gsub(/cg/,    "8",     curtxt);
  gsub(/ir/,    "w",     curtxt);
  gsub(/i*n/,   "m",     curtxt);
  
  print (substr($0,1,19) curtxt);
  next
}