#! /usr/bin/gawk -f
# Last edited on 2012-02-08 05:34:22 by stolfilocal
 

# Reads a file in ".wds" format, prints the text in plain.

BEGIN {
  abort = -1;
  usage = ( ARGV[0] " \\\n" \
    "  < some.wds > some.txt" \
  );
  # 
  # See "wds-format.txt" for a description of the input file  format.

  nlines = 0; # Number of lines read.
  nwdin = 0;  # Number of words/symbols read.
  nptin = 0;  # Number of punctuation symbols read.
  
  olin = "";  # Current output line.
  ital = 0;   # 1 within italic text.
}

(abort >= 0) { exit abort; }

// { 
  # Get rid of funny spaces
  gsub(/[\011\014\015\240]/, " ");
  # Remove trailing blanks:
  gsub(/[ ]+$/, "");
  nlines++;
}

/^[\#][ ]*$/ { 
  # Comment line, ignore:
  next;
}

/^[\$][ ]/ { 
  # Section start, handle as end of paragraph:
  output_olin();
  output_olin();
  next;
}

/^[@\$\#][ ]/ { 
  # Internal info or comment line, ignore:
  next;
}

/^[asp][ ]/ {
  # Alpha, symbol, or punctuation, output it:
  # Grab the type and word:
  type = substr($0, 1, 1);
  word = substr($0, 3);
  # Get the last character {prev} on current line: 
  prev = (olin == "" ? "" : substr(olin, length(olin), 1));
  # Decide the spaces {befo} to add before before (or flush at end of parag and set {word} to nil):
  if (type == "p")
    { # Punctuation:
      nptin++;
      if (word == "รท")
        { # End of paragraph:
          output_olin(); 
          output_olin();
          ital = 0;
          next;
        }
      else 
        { if (word == "_")
            { if (ital)
                { befo = ""; word = "}"; ital = 0; }
              else
                { befo = " "; word = "\\emph{"; ital = 1; }
            }
          else if (word == "(")
            { befo = (prev == "(" ? "" : " "); }
          else if (word == "-")
            { befo = (prev == "-" ? "" : " "); }
          else if (word == "`")
            { befo = ((prev == "(") || (prev == "`") ? "" : " "); }
          else  
            { befo = ""; }
        }          
    }
  else
    { # Alpha or symbol:
      nwdin++;
      befo = ((prev == "(") || (prev == "`") || (prev == "-") || (prev == "{") ? "" : " ");
    }
  # No blanks needed at begin-of-line: 
  if (prev == "") { befo = ""; }
    
  # Recode some symbols: 
  gsub(/[_]/, "/", word);
  gsub(/[~]/, "-", word);
  gsub(/\^/, ".", word);
  
  # Does it fit in the current line?
  if ((befo == "") || (length(olin) + length(befo) + length(word) < 72))
    { # Append {word} with its space before:
      olin = (olin befo word);
    }
  else
    { # Flush the line and append {word} without space:
      output_olin(); 
      olin = word;
    }
  next;
}

// {
  data_error(("bad line format"));
  next;
}

END {
  if (abort >= 0) { exit abort; }
  if (olin != "") { output_olin(); }
  printf "%8d lines read\n", nlines > "/dev/stderr";
  printf "%8d words/symbols read\n", nwdin > "/dev/stderr";
  printf "%8d punctuation read\n", nptin > "/dev/stderr";
}

function output_olin(  )
{
  print olin;
  olin = "";
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_warning(msg)
{
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
}

function data_error(msg)
{
  printf "line %d: %s\n", FNR, msg > "/dev/stderr";
  printf "  %s\n", $0 > "/dev/stderr";
  abort = 1; exit 1;
}

function tbl_error(f,n,msg)
{ 
  printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr";
  abort = 1;
  exit 1
}