#! /usr/bin/gawk -f # Last edited on 2004-02-04 03:27:48 by stolfi BEGIN{ split("", ct); name["91"] = "HEBREW_ACCENT_ETNAHTA"; name["92"] = "HEBREW_ACCENT_SEGOL"; name["93"] = "HEBREW_ACCENT_SHALSHELET"; name["94"] = "HEBREW_ACCENT_ZAQEF_QATAN"; name["95"] = "HEBREW_ACCENT_ZAQEF_GADOL"; name["96"] = "HEBREW_ACCENT_TIPEHA"; name["97"] = "HEBREW_ACCENT_REVIA"; name["98"] = "HEBREW_ACCENT_ZARQA"; name["99"] = "HEBREW_ACCENT_PASHTA"; name["9A"] = "HEBREW_ACCENT_YETIV"; name["9B"] = "HEBREW_ACCENT_TEVIR"; name["9C"] = "HEBREW_ACCENT_GERESH"; name["9D"] = "HEBREW_ACCENT_GERESH_MUQDAM"; name["9E"] = "HEBREW_ACCENT_GERSHAYIM"; name["9F"] = "HEBREW_ACCENT_QARNEY_PARA"; name["A0"] = "HEBREW_ACCENT_TELISHA_GEDOLA"; name["A1"] = "HEBREW_ACCENT_PAZER"; name["A3"] = "HEBREW_ACCENT_MUNAH"; name["A4"] = "HEBREW_ACCENT_MAHAPAKH"; name["A5"] = "HEBREW_ACCENT_MERKHA"; name["A6"] = "HEBREW_ACCENT_MERKHA_KEFULA"; name["A7"] = "HEBREW_ACCENT_DARGA"; name["A8"] = "HEBREW_ACCENT_QADMA"; name["A9"] = "HEBREW_ACCENT_TELISHA_QETANA"; name["AA"] = "HEBREW_ACCENT_YERAH_BEN_YOMO"; name["AB"] = "HEBREW_ACCENT_OLE"; name["AC"] = "HEBREW_ACCENT_ILUY"; name["AD"] = "HEBREW_ACCENT_DEHI"; name["AE"] = "HEBREW_ACCENT_ZINOR"; name["AF"] = "HEBREW_MARK_MASORA_CIRCLE"; name["B0"] = "HEBREW_POINT_SHEVA"; name["B1"] = "HEBREW_POINT_HATAF_SEGOL"; name["B2"] = "HEBREW_POINT_HATAF_PATAH"; name["B3"] = "HEBREW_POINT_HATAF_QAMATS"; name["B4"] = "HEBREW_POINT_HIRIQ"; name["B5"] = "HEBREW_POINT_TSERE"; name["B6"] = "HEBREW_POINT_SEGOL"; name["B7"] = "HEBREW_POINT_PATAH"; name["B8"] = "HEBREW_POINT_QAMATS"; name["B9"] = "HEBREW_POINT_HOLAM"; name["BB"] = "HEBREW_POINT_QUBUTS"; name["BC"] = "HEBREW_POINT_DAGESH_OR_MAPIQ"; name["BD"] = "HEBREW_POINT_METEG"; name["BE"] = "HEBREW_PUNCTUATION_MAQAF"; name["BF"] = "HEBREW_POINT_RAFE"; name["C0"] = "HEBREW_PUNCTUATION_PASEQ"; name["C1"] = "HEBREW_POINT_SHIN_DOT"; name["C2"] = "HEBREW_POINT_SIN_DOT"; name["C3"] = "HEBREW_PUNCTUATION_SOF_PASUQ"; name["C4"] = "HEBREW_MARK_UPPER_DOT"; name["D0"] = "HEBREW_LETTER_ALEF"; name["D1"] = "HEBREW_LETTER_BET"; name["D2"] = "HEBREW_LETTER_GIMEL"; name["D3"] = "HEBREW_LETTER_DALET"; name["D4"] = "HEBREW_LETTER_HE"; name["D5"] = "HEBREW_LETTER_VAV"; name["D6"] = "HEBREW_LETTER_ZAYIN"; name["D7"] = "HEBREW_LETTER_HET"; name["D8"] = "HEBREW_LETTER_TET"; name["D9"] = "HEBREW_LETTER_YOD"; name["DA"] = "HEBREW_LETTER_FINAL_KAF"; name["DB"] = "HEBREW_LETTER_KAF"; name["DC"] = "HEBREW_LETTER_LAMED"; name["DD"] = "HEBREW_LETTER_FINAL_MEM"; name["DE"] = "HEBREW_LETTER_MEM"; name["DF"] = "HEBREW_LETTER_FINAL_NUN"; name["E0"] = "HEBREW_LETTER_NUN"; name["E1"] = "HEBREW_LETTER_SAMEKH"; name["E2"] = "HEBREW_LETTER_AYIN"; name["E3"] = "HEBREW_LETTER_FINAL_PE"; name["E4"] = "HEBREW_LETTER_PE"; name["E5"] = "HEBREW_LETTER_FINAL_TSADI"; name["E6"] = "HEBREW_LETTER_TSADI"; name["E7"] = "HEBREW_LETTER_QOF"; name["E8"] = "HEBREW_LETTER_RESH"; name["E9"] = "HEBREW_LETTER_SHIN"; name["EA"] = "HEBREW_LETTER_TAV"; name["F0"] = "HEBREW_LIGATURE_YIDDISH_DOUBLE_VAV"; name["F1"] = "HEBREW_LIGATURE_YIDDISH_VAV_YOD"; name["F2"] = "HEBREW_LIGATURE_YIDDISH_DOUBLE_YOD"; name["F3"] = "HEBREW_PUNCTUATION_GERESH"; name["F4"] = "HEBREW_PUNCTUATION_GERSHAYIM"; name["__"] = "SPACE"; name["**"] = "INVALID_CHARACTER"; } /^ *([\#]|$)/ { next; } /[«»]/ { lin = $0; gsub(/^[^«»]*[«]/, "«", lin); gsub(/[»][^«»]*$/, "»", lin); gsub(/[»][^«»]*[«]/, "» «", lin); $0 = lin; for (i = 1; i <= NF; i++) { code = $(i); if (! (code ~ /^[«][_\/*0-9A-F][_\/*0-9A-F][»]$/)) { printf "bad code = \"%s\"\n", code > "/dev/stderr"; } code = substr(code, 2,2); ct[code]++; } next; } END{ for (code in ct) { printf " %7d %s %s\n", ct[code], code, name[code]; } }