#! /usr/bin/gawk -f # Last edited on 2004-01-27 07:36:55 by stolfi BEGIN{ split("", ct); name["0C"] = "ARABIC_COMMA"; name["1B"] = "ARABIC_SEMICOLON"; name["1F"] = "ARABIC_QUESTION_MARK"; name["21"] = "ARABIC_LETTER_HAMZA"; name["22"] = "ARABIC_LETTER_ALEF_WITH_MADDA_ABOVE"; name["23"] = "ARABIC_LETTER_ALEF_WITH_HAMZA_ABOVE"; name["24"] = "ARABIC_LETTER_WAW_WITH_HAMZA_ABOVE"; name["25"] = "ARABIC_LETTER_ALEF_WITH_HAMZA_BELOW"; name["26"] = "ARABIC_LETTER_YEH_WITH_HAMZA_ABOVE"; name["27"] = "ARABIC_LETTER_ALEF"; name["28"] = "ARABIC_LETTER_BEH"; name["29"] = "ARABIC_LETTER_TEH_MARBUTA"; name["2A"] = "ARABIC_LETTER_TEH"; name["2B"] = "ARABIC_LETTER_THEH"; name["2C"] = "ARABIC_LETTER_JEEM"; name["2D"] = "ARABIC_LETTER_HAH"; name["2E"] = "ARABIC_LETTER_KHAH"; name["2F"] = "ARABIC_LETTER_DAL"; name["30"] = "ARABIC_LETTER_THAL"; name["31"] = "ARABIC_LETTER_REH"; name["32"] = "ARABIC_LETTER_ZAIN"; name["33"] = "ARABIC_LETTER_SEEN"; name["34"] = "ARABIC_LETTER_SHEEN"; name["35"] = "ARABIC_LETTER_SAD"; name["36"] = "ARABIC_LETTER_DAD"; name["37"] = "ARABIC_LETTER_TAH"; name["38"] = "ARABIC_LETTER_ZAH"; name["39"] = "ARABIC_LETTER_AIN"; name["3A"] = "ARABIC_LETTER_GHAIN"; name["40"] = "ARABIC_TATWEEL"; name["41"] = "ARABIC_LETTER_FEH"; name["42"] = "ARABIC_LETTER_QAF"; name["43"] = "ARABIC_LETTER_KAF"; name["44"] = "ARABIC_LETTER_LAM"; name["45"] = "ARABIC_LETTER_MEEM"; name["46"] = "ARABIC_LETTER_NOON"; name["47"] = "ARABIC_LETTER_HEH"; name["48"] = "ARABIC_LETTER_WAW"; name["49"] = "ARABIC_LETTER_ALEF_MAKSURA"; name["4A"] = "ARABIC_LETTER_YEH"; name["4B"] = "ARABIC_FATHATAN"; name["4C"] = "ARABIC_DAMMATAN"; name["4D"] = "ARABIC_KASRATAN"; name["4E"] = "ARABIC_FATHA"; name["4F"] = "ARABIC_DAMMA"; name["50"] = "ARABIC_KASRA"; name["51"] = "ARABIC_SHADDA"; name["52"] = "ARABIC_SUKUN"; name["53"] = "ARABIC_MADDAH_ABOVE"; name["54"] = "ARABIC_HAMZA_ABOVE"; name["55"] = "ARABIC_HAMZA_BELOW"; name["60"] = "ARABIC-INDIC_DIGIT_ZERO"; name["61"] = "ARABIC-INDIC_DIGIT_ONE"; name["62"] = "ARABIC-INDIC_DIGIT_TWO"; name["63"] = "ARABIC-INDIC_DIGIT_THREE"; name["64"] = "ARABIC-INDIC_DIGIT_FOUR"; name["65"] = "ARABIC-INDIC_DIGIT_FIVE"; name["66"] = "ARABIC-INDIC_DIGIT_SIX"; name["67"] = "ARABIC-INDIC_DIGIT_SEVEN"; name["68"] = "ARABIC-INDIC_DIGIT_EIGHT"; name["69"] = "ARABIC-INDIC_DIGIT_NINE"; name["6A"] = "ARABIC_PERCENT_SIGN"; name["6B"] = "ARABIC_DECIMAL_SEPARATOR"; name["6C"] = "ARABIC_THOUSANDS_SEPARATOR"; name["6D"] = "ARABIC_FIVE_POINTED_STAR"; name["6E"] = "ARABIC_LETTER_DOTLESS_BEH"; name["6F"] = "ARABIC_LETTER_DOTLESS_QAF"; name["70"] = "ARABIC_LETTER_SUPERSCRIPT_ALEF"; name["71"] = "ARABIC_LETTER_ALEF_WASLA"; name["72"] = "ARABIC_LETTER_ALEF_WITH_WAVY_HAMZA_ABOVE"; name["73"] = "ARABIC_LETTER_ALEF_WITH_WAVY_HAMZA_BELOW"; name["74"] = "ARABIC_LETTER_HIGH_HAMZA"; name["75"] = "ARABIC_LETTER_HIGH_HAMZA_ALEF"; name["76"] = "ARABIC_LETTER_HIGH_HAMZA_WAW"; name["77"] = "ARABIC_LETTER_U_WITH_HAMZA_ABOVE"; name["78"] = "ARABIC_LETTER_HIGH_HAMZA_YEH"; name["79"] = "ARABIC_LETTER_TTEH"; name["7A"] = "ARABIC_LETTER_TTEHEH"; name["7B"] = "ARABIC_LETTER_BEEH"; name["7C"] = "ARABIC_LETTER_TEH_WITH_RING"; name["7D"] = "ARABIC_LETTER_TEH_WITH_THREE_DOTS_ABOVE_DOWNWARDS"; name["7E"] = "ARABIC_LETTER_PEH"; name["7F"] = "ARABIC_LETTER_TEHEH"; name["80"] = "ARABIC_LETTER_BEHEH"; name["81"] = "ARABIC_LETTER_HAH_WITH_HAMZA_ABOVE"; name["82"] = "ARABIC_LETTER_HAH_WITH_TWO_DOTS_VERTICAL_ABOVE"; name["83"] = "ARABIC_LETTER_NYEH"; name["84"] = "ARABIC_LETTER_DYEH"; name["85"] = "ARABIC_LETTER_HAH_WITH_THREE_DOTS_ABOVE"; name["86"] = "ARABIC_LETTER_TCHEH"; name["87"] = "ARABIC_LETTER_TCHEHEH"; name["88"] = "ARABIC_LETTER_DDAL"; name["89"] = "ARABIC_LETTER_DAL_WITH_RING"; name["8A"] = "ARABIC_LETTER_DAL_WITH_DOT_BELOW"; name["8B"] = "ARABIC_LETTER_DAL_WITH_DOT_BELOW_AND_SMALL_TAH"; name["8C"] = "ARABIC_LETTER_DAHAL"; name["8D"] = "ARABIC_LETTER_DDAHAL"; name["8E"] = "ARABIC_LETTER_DUL"; name["8F"] = "ARABIC_LETTER_DAL_WITH_THREE_DOTS_ABOVE_DOWNWARDS"; name["90"] = "ARABIC_LETTER_DAL_WITH_FOUR_DOTS_ABOVE"; name["91"] = "ARABIC_LETTER_RREH"; name["92"] = "ARABIC_LETTER_REH_WITH_SMALL_V"; name["93"] = "ARABIC_LETTER_REH_WITH_RING"; name["94"] = "ARABIC_LETTER_REH_WITH_DOT_BELOW"; name["95"] = "ARABIC_LETTER_REH_WITH_SMALL_V_BELOW"; name["96"] = "ARABIC_LETTER_REH_WITH_DOT_BELOW_AND_DOT_ABOVE"; name["97"] = "ARABIC_LETTER_REH_WITH_TWO_DOTS_ABOVE"; name["98"] = "ARABIC_LETTER_JEH"; name["99"] = "ARABIC_LETTER_REH_WITH_FOUR_DOTS_ABOVE"; name["9A"] = "ARABIC_LETTER_SEEN_WITH_DOT_BELOW_AND_DOT_ABOVE"; name["9B"] = "ARABIC_LETTER_SEEN_WITH_THREE_DOTS_BELOW"; name["9C"] = "ARABIC_LETTER_SEEN_WITH_THREE_DOTS_BELOW_AND_THREE_DOTS_ABOVE"; name["9D"] = "ARABIC_LETTER_SAD_WITH_TWO_DOTS_BELOW"; name["9E"] = "ARABIC_LETTER_SAD_WITH_THREE_DOTS_ABOVE"; name["9F"] = "ARABIC_LETTER_TAH_WITH_THREE_DOTS_ABOVE"; name["A0"] = "ARABIC_LETTER_AIN_WITH_THREE_DOTS_ABOVE"; name["A1"] = "ARABIC_LETTER_DOTLESS_FEH"; name["A2"] = "ARABIC_LETTER_FEH_WITH_DOT_MOVED_BELOW"; name["A3"] = "ARABIC_LETTER_FEH_WITH_DOT_BELOW"; name["A4"] = "ARABIC_LETTER_VEH"; name["A5"] = "ARABIC_LETTER_FEH_WITH_THREE_DOTS_BELOW"; name["A6"] = "ARABIC_LETTER_PEHEH"; name["A7"] = "ARABIC_LETTER_QAF_WITH_DOT_ABOVE"; name["A8"] = "ARABIC_LETTER_QAF_WITH_THREE_DOTS_ABOVE"; name["A9"] = "ARABIC_LETTER_KEHEH"; name["AA"] = "ARABIC_LETTER_SWASH_KAF"; name["AB"] = "ARABIC_LETTER_KAF_WITH_RING"; name["AC"] = "ARABIC_LETTER_KAF_WITH_DOT_ABOVE"; name["AD"] = "ARABIC_LETTER_NG"; name["AE"] = "ARABIC_LETTER_KAF_WITH_THREE_DOTS_BELOW"; name["AF"] = "ARABIC_LETTER_GAF"; name["B0"] = "ARABIC_LETTER_GAF_WITH_RING"; name["B1"] = "ARABIC_LETTER_NGOEH"; name["B2"] = "ARABIC_LETTER_GAF_WITH_TWO_DOTS_BELOW"; name["B3"] = "ARABIC_LETTER_GUEH"; name["B4"] = "ARABIC_LETTER_GAF_WITH_THREE_DOTS_ABOVE"; name["B5"] = "ARABIC_LETTER_LAM_WITH_SMALL_V"; name["B6"] = "ARABIC_LETTER_LAM_WITH_DOT_ABOVE"; name["B7"] = "ARABIC_LETTER_LAM_WITH_THREE_DOTS_ABOVE"; name["B8"] = "ARABIC_LETTER_LAM_WITH_THREE_DOTS_BELOW"; name["B9"] = "ARABIC_LETTER_NOON_WITH_DOT_BELOW"; name["BA"] = "ARABIC_LETTER_NOON_GHUNNA"; name["BB"] = "ARABIC_LETTER_RNOON"; name["BC"] = "ARABIC_LETTER_NOON_WITH_RING"; name["BD"] = "ARABIC_LETTER_NOON_WITH_THREE_DOTS_ABOVE"; name["BE"] = "ARABIC_LETTER_HEH_DOACHASHMEE"; name["BF"] = "ARABIC_LETTER_TCHEH_WITH_DOT_ABOVE"; name["C0"] = "ARABIC_LETTER_HEH_WITH_YEH_ABOVE"; name["C1"] = "ARABIC_LETTER_HEH_GOAL"; name["C2"] = "ARABIC_LETTER_HEH_GOAL_WITH_HAMZA_ABOVE"; name["C3"] = "ARABIC_LETTER_TEH_MARBUTA_GOAL"; name["C4"] = "ARABIC_LETTER_WAW_WITH_RING"; name["C5"] = "ARABIC_LETTER_KIRGHIZ_OE"; name["C6"] = "ARABIC_LETTER_OE"; name["C7"] = "ARABIC_LETTER_U"; name["C8"] = "ARABIC_LETTER_YU"; name["C9"] = "ARABIC_LETTER_KIRGHIZ_YU"; name["CA"] = "ARABIC_LETTER_WAW_WITH_TWO_DOTS_ABOVE"; name["CB"] = "ARABIC_LETTER_VE"; name["CC"] = "ARABIC_LETTER_FARSI_YEH"; name["CD"] = "ARABIC_LETTER_YEH_WITH_TAIL"; name["CE"] = "ARABIC_LETTER_YEH_WITH_SMALL_V"; name["CF"] = "ARABIC_LETTER_WAW_WITH_DOT_ABOVE"; name["D0"] = "ARABIC_LETTER_E"; name["D1"] = "ARABIC_LETTER_YEH_WITH_THREE_DOTS_BELOW"; name["D2"] = "ARABIC_LETTER_YEH_BARREE"; name["D3"] = "ARABIC_LETTER_YEH_BARREE_WITH_HAMZA_ABOVE"; name["D4"] = "ARABIC_FULL_STOP"; name["D5"] = "ARABIC_LETTER_AE"; name["D6"] = "ARABIC_SMALL_HIGH_LIGATURE_SAD_WITH_LAM_WITH_ALEF_MAKSURA"; name["D7"] = "ARABIC_SMALL_HIGH_LIGATURE_QAF_WITH_LAM_WITH_ALEF_MAKSURA"; name["D8"] = "ARABIC_SMALL_HIGH_MEEM_INITIAL_FORM"; name["D9"] = "ARABIC_SMALL_HIGH_LAM_ALEF"; name["DA"] = "ARABIC_SMALL_HIGH_JEEM"; name["DB"] = "ARABIC_SMALL_HIGH_THREE_DOTS"; name["DC"] = "ARABIC_SMALL_HIGH_SEEN"; name["DD"] = "ARABIC_END_OF_AYAH"; name["DE"] = "ARABIC_START_OF_RUB_EL_HIZB"; name["DF"] = "ARABIC_SMALL_HIGH_ROUNDED_ZERO"; name["E0"] = "ARABIC_SMALL_HIGH_UPRIGHT_RECTANGULAR_ZERO"; name["E1"] = "ARABIC_SMALL_HIGH_DOTLESS_HEAD_OF_KHAH"; name["E2"] = "ARABIC_SMALL_HIGH_MEEM_ISOLATED_FORM"; name["E3"] = "ARABIC_SMALL_LOW_SEEN"; name["E4"] = "ARABIC_SMALL_HIGH_MADDA"; name["E5"] = "ARABIC_SMALL_WAW"; name["E6"] = "ARABIC_SMALL_YEH"; name["E7"] = "ARABIC_SMALL_HIGH_YEH"; name["E8"] = "ARABIC_SMALL_HIGH_NOON"; name["E9"] = "ARABIC_PLACE_OF_SAJDAH"; name["EA"] = "ARABIC_EMPTY_CENTRE_LOW_STOP"; name["EB"] = "ARABIC_EMPTY_CENTRE_HIGH_STOP"; name["EC"] = "ARABIC_ROUNDED_HIGH_STOP_WITH_FILLED_CENTRE"; name["ED"] = "ARABIC_SMALL_LOW_MEEM"; name["F0"] = "EXTENDED_ARABIC-INDIC_DIGIT_ZERO"; name["F1"] = "EXTENDED_ARABIC-INDIC_DIGIT_ONE"; name["F2"] = "EXTENDED_ARABIC-INDIC_DIGIT_TWO"; name["F3"] = "EXTENDED_ARABIC-INDIC_DIGIT_THREE"; name["F4"] = "EXTENDED_ARABIC-INDIC_DIGIT_FOUR"; name["F5"] = "EXTENDED_ARABIC-INDIC_DIGIT_FIVE"; name["F6"] = "EXTENDED_ARABIC-INDIC_DIGIT_SIX"; name["F7"] = "ARABIC_LIGATURE_ALAYHE_ISOLATED_FORM"; # true: "EXTENDED_ARABIC-INDIC_DIGIT_SEVEN"; name["F8"] = "EXTENDED_ARABIC-INDIC_DIGIT_EIGHT"; name["F9"] = "EXTENDED_ARABIC-INDIC_DIGIT_NINE"; name["FA"] = "ARABIC_LETTER_SHEEN_WITH_DOT_BELOW"; name["FB"] = "ARABIC_LETTER_DAD_WITH_DOT_BELOW"; name["FC"] = "ARABIC_LETTER_GHAIN_WITH_DOT_BELOW"; name["FD"] = "ARABIC_SIGN_SINDHI_AMPERSAND"; name["FE"] = "ARABIC_SIGN_SINDHI_POSTPOSITION_MEN"; name["__"] = "SPACE"; name["**"] = "INVALID_CHARACTER"; } /^ *([\#]|$)/ { next; } /[«»]/ { lin = $0; gsub(/^[^«»]*[«]/, "«", lin); gsub(/[»][^«»]*$/, "»", lin); gsub(/[»][^«»]*[«]/, "» «", lin); $0 = lin; for (i = 1; i <= NF; i++) { code = $(i); if (! (code ~ /^[«][_\/*0-9A-F][_\/*0-9A-F][»]$/)) { printf "bad code = \"%s\"\n", code > "/dev/stderr"; } code = substr(code, 2,2); ct[code]++; } next; } END{ for (code in ct) { printf " %7d %s %s\n", ct[code], code, name[code]; } }