#! /usr/bin/gawk -f # Last edited on 2004-02-02 05:02:46 by stolfi BEGIN { abort = -1; # Extracts the pinyin reading of each GBcharacter from the # "main.raw" file. split("", punct); punct["¡°"] = "``"; # A1B0 201C LEFT_DOUBLE_QUOTATION_MARK punct["¡±"] = "''"; # A1B1 201D RIGHT_DOUBLE_QUOTATION_MARK punct["¡¶"] = "«"; # A1B6 300A LEFT_DOUBLE_ANGLE_BRACKET punct["¡·"] = "»"; # A1B7 300B RIGHT_DOUBLE_ANGLE_BRACKET punct["¡¸"] = "["; # A1B8 300C LEFT_CORNER_BRACKET punct["¡¹"] = "]"; # A1B9 300D RIGHT_CORNER_BRACKET punct["¡º"] = "{"; # A1BA 300E LEFT_WHITE_CORNER_BRACKET punct["¡»"] = "}"; # A1BB 300F RIGHT_WHITE_CORNER_BRACKET split("", closer_of); closer_of["¡°"] = "¡±"; # {LEFT,RIGHT}_DOUBLE_QUOTATION_MARK closer_of["¡¶"] = "¡·"; # {LEFT,RIGHT}_DOUBLE_ANGLE_BRACKET closer_of["¡¸"] = "¡¹"; # {LEFT,RIGHT}_CORNER_BRACKET closer_of["¡º"] = "¡»"; # {LEFT,RIGHT}_WHITE_CORNER_BRACKET split("", opener_of); for (gbi in closer_of) { opener_of[closer_of[gbi]] = gbi; } split("", stack); level = 0; dirty = 0; wct = 0; } /^[ ]*($|[\#])/ { next; } /^[@]fix / { next; } /^[@]chinword[\{]/ { gb = gbspread(gensub(/^[@]chinword{(.*)}{.*}.*$/, "\\1", "g", $0)); ngb = split(gb, gbf); for (i = 1; i <= ngb; i++) { gbi = gbf[i]; if (gbi in closer_of) { if (! dirty) { printf "%7d:", FNR; dirty = 1; } printf " %d %s", wct, punct[gbi]; wct = 0; stack[level] = gbi; level++; } else if (gbi in opener_of) { printf " %d %s", wct, punct[gbi]; wct = 0; if (level == 0) { printf " [** extra closer]"; } else { level--; if (stack[level] != opener_of[gbi]) { printf " [** mismatch]\n"; } } } else { wct++; } } next; } /^[@]/ { if (level > 0) { printf " [** end at level %d]", level; } if (dirty) { printf " %d \n", wct; } dirty = 0; level = 0; wct = 0; next; } // { data_error("bad format"); } END { if (level > 0) { printf " [** end at level %d]\n", level; } } function gbspread(s) { s = gensub(/([\241-\376][\241-\376])/, " \\1 ", "g", s); gsub(/[ ][ ]+/, " ", s); gsub(/^[ ]+/, "", s); gsub(/[ ]+$/, "", s); return s; } function data_error(msg) { printf "%d: ** %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function data_warning(msg) { printf "%d: !! %s\n", FNR, msg > "/dev/stderr"; }