#! /usr/bin/gawk -f 
# Last edited on 2004-02-01 01:43:53 by stolfi

BEGIN {
  abort = -1;
  # Provides a Pinyin translation of GB puntuation in @chinword{}{}
  # directives. 
  # 
  # Note: we must not use characters beyond '\200' in the Pinyin 
  # text, because they may get confused with GB codes.
  #
  split("", punct); 
  #      GB              GBhx Ucod Name
  #      --              ---- ---- -------------------------------------
  punct[" "] = " ";   # A1A1 3000 IDEOGRAPHIC_SPACE
  punct["、"] = ",";   # A1A2 3001 IDEOGRAPHIC_COMMA
  punct["。"] = ".";   # A1A3 3002 IDEOGRAPHIC_FULL_STOP
  punct["“"] = "``";  # A1B0 201C LEFT_DOUBLE_QUOTATION_MARK
  punct["”"] = "''";  # A1B1 201D RIGHT_DOUBLE_QUOTATION_MARK
  punct["《"] = "<<";  # A1B6 300A LEFT_DOUBLE_ANGLE_BRACKET
  punct["》"] = ">>";  # A1B7 300B RIGHT_DOUBLE_ANGLE_BRACKET
  punct["「"] = "`";   # A1B8 300C LEFT_CORNER_BRACKET
  punct["」"] = "'";   # A1B9 300D RIGHT_CORNER_BRACKET
  punct["『"] = "((";  # A1BA 300E LEFT_WHITE_CORNER_BRACKET
  punct["』"] = "))";  # A1BB 300F RIGHT_WHITE_CORNER_BRACKET
  punct["▲"] = "*";   # A1F8 25B2 BLACK_UP-POINTING_TRIANGLE
  punct["〓"] = "*";   # A1FE 3013 GETA_MARK
  punct[","] = ",";   # A3AC FF0C FULLWIDTH_COMMA
  punct["-"] = "-";   # A3AD FF0D FULLWIDTH_HYPHEN-MINUS
  punct[":"] = ":";   # A3BA FF1A FULLWIDTH_COLON
  punct[";"] = ";";   # A3BB FF1B FULLWIDTH_SEMICOLON
  punct["?"] = "?";   # A3BF FF1F FULLWIDTH_QUESTION_MARK
}

/^[@]chinword{/ {
  gb = gensub(/^[@]chinword{(.*)}{.*}.*$/, "\\1", "g", $0);
  py = gensub(/^[@]chinword{.*}{(.*)}.*$/, "\\1", "g", $0);
  if (py ~ /^[?][?]+$/) 
    { if (! (gb in punct))
        { data_error(("invalid punctuation code = \"" gb "\"")); }
      py = punct[gb];
    }
  else if ((gb in punct) && (py != punct[gb]))
    { data_warning(("discrepant punctuation gb = \"" gb "\" py = \"" py "\""));
      py = punct[gb];
    }
  printf "@chinword{%s}{%s}\n", gb, py;
  next;
}

// { print; next; }

function data_error(msg) 
{
  printf "%d: ** %s\n", FNR, msg > "/dev/stderr";
  abort = 1;
  exit 1;
}

function data_warning(msg) 
{
  printf "%d: !! %s\n", FNR, msg > "/dev/stderr";
}