# Last edited on 2004-02-25 18:35:45 by stolfi
# 
# ACIP-JS Encoding for transcription of the Tibetan script
# including Tibetanized Sanskrit.
# 
# This is a slight modification of the encoding used the ACIP (Asian
# Classics Input Project) http://www.asianclassics.org/
#
# The table says that "W" is a letter byt "V" is not, yet
# in two files the opposite seems to be true.
#
# 
# Consonants (TIBETAN_LETTER_xxx)
# 
#    ACIP  | Unicode   UnicodeName     | Appearance+Obs
#    ----  | --------- --------------- | --------------
#    A     | 0F68      A               | Base for dependent vowels.
#    B     | 0F56      BA              | 
#    BH    | 0F57      BHA             | = 0F56+0FB7
#    C     | 0F45      CA              | 
#    CH    | 0F46      CHA             | 
#    D     | 0F51      DA              | 
#    DH    | 0F52      DHA             | = 0F51+0FB7
#    DZ    | 0F5B      DZA             | 
#    DZH   | 0F5C      DZHA            | = 0F5B+0FB7
#    G     | 0F42      GA              | 
#    GH    | 0F43      GHA             | 
#    H     | 0F67      HA              |   
#    J     | 0F47      JA              | 
#    K     | 0F40      KA              |        
#    KH    | 0F41      KHA             | 
#    Ksh   | 0F69      KSSA            | = 0F40+0FB5
#    L     | 0F63      LA              | 
#    M     | 0F58      MA              | 
#    N     | 0F53      NA              | 
#    NG    | 0F44      NGA             | 
#    NY    | 0F49      NYA             | 
#    P     | 0F54      PA              | 
#    PH    | 0F55      PHA             | 
#    R     | 0F62      RA              | 
#    S     | 0F66      SA              | 
#    SH    | 0F64      SHA             | 
#    T     | 0F4F      TA              | 
#    TH    | 0F50      THA             | 
#    TS    | 0F5A      TSHA            | 
#    TZ    | 0F59      TSA             | 
#    W     | 0F5D      WA              | "V" in some files?
#    Y     | 0F61      YA              | 
#    Z     | 0F5F      ZA              | 
#    ZH    | 0F5E      ZHA             | 
#    `     | 0F60      -A              | 
#    d     | 0F4D      DDA             | 
#    dH    | 0F4E      DDHA            | = 0F4C+0FB7
#    n     | 0F4E      NNA             | 
#    sh    | 0F65      SSA             | 
#    t     | 0F4A      TTA             | 
#    th    | 0F4B      TTHA            | 
#    
# 
# Vowels after consonants (TIBETAN_VOWEL_SIGN_xxx)
# 
#    ACIP  | Unicode   UnicodeName     | Appearance+Obs
#    ----  | --------- --------------- | --------------
#    A     | ----      --              | No mark.
#    'A    | 0F71      AA              | "2" below. 
#    I     | 0F72      I               | Left-hook above. 
#    'I    | 0F73      II              | Left-hook above and "2" below = 0F71+0F72.     
#    U     | 0F74      U               | Right-pipe below. 
#    'U    | 0F75      UU              | Right-pipe below and "2" below = 0F71+0F74.    
#
#    E     | 0F7A      E               | Spout above. 
#    EE    | 0F7B      EE              | Spout-doubled above. 
#    O     | 0F7C      O               | Wings above. 
#    OO    | 0F7D      OO              | Wings-doubled above. 
#
#    Ri    | 0F76      VOCALIC_R       | Spout below and right-hook above = 0FB2+0F80. 
#    R'i   | 0F77      VOCALIC_RR      | Spout below and right-hook above and "2" below ~ 0FB2+0F81. 
#
#    Li    | 0F78      VOCALIC_L       | "21" below and right-hook above.  = 0FB3+0F80
#    L'i   | 0F79      VOCALIC_LL      | "21" below and right-hook above and "2" below = 0FB3+0F81. 
# 
# 
# Vowels in initial position: (TIBETAN_LETTER_xxx + TIBETAN_VOWEL_SIGN_yyy)
# 
#    ACIP  | Unicode   UnicodeName     | Appearance+Obs
#    ----  | --------- --------------- | --------------
#    AA    | 0F68      AA              | Like "6V".
#    A'A   | 0F68+0F71 AA+             | AA with "2" below.
#    AI    | 0F68+0F72 AA+I            | AA with left-hook above.
#    A'I   | 0F68+0F73 AA+II           | AA with left-hook above and "2" below.
#    AU    | 0F68+0F74 AA+U            | AA with right-pipe below.
#    A'U   | 0F68+0F75 AA+UU           | AA with right-pipe below and "2" below.
#          |                   
#    AE    | 0F68+0F7A AA+E            | AA with spout above.
#    AEE   | 0F68+0F7B AA+EE           | AA with spout-doubled above.
#    AO    | 0F68+0F7C AA+O            | AA with wings above.
#    AOO   | 0F68+0F7D AA+OO           | AA with wings-doubled above.
#          |                    
#    Ri    | 0F62+0F76 RA+VOCALIC_R    | RA  with spout base and right-hook above.
#    R'i   | 0F62+0F77 RA+VOCALIC_RR   | RA  with spout base and "2" below.
#          |                  
#    Li    | 0F63+0F78 LA+VOCALIC_L    | LA with right-hook above.
#    L'i   | 0F63+0F79 LA+VOCALIC_LL   | LA with "2" below.
# 
# 
# Postfix vowel modifiers (TIBETAN_SIGN_xxx)
# 
#    ACIP  | Unicode   UnicodeName     | Appearance+Obs
#    ----  | --------- --------------- | --------------
#    m     | 0F7E      RJES_SU_NGA_RO  | App: Circle above letter.
#          |                           | Eqv: Sanskrit anusvara.
#
#    :     | 0F7F RNAM_BCAD            | App: Colon with open circles at left.
#          |                           | Eqv: Sanskrit visarga.
#    
# 
# Punctuation (TIBETAN_MARK_xxx)
# 
#    ACIP  | Unics UnicodeName            | Appearance+Obs
#    ----  | ----- ---------------        | --------------
#    \     | 0F84  HALANTA                | Aka: srog med.
#          |                              | App: backlash below.
#          |                              | Sem: Separates syllables that 
#          |                              |   are actually consonant clusters.
#          |                              | Eqv: Devanagari virama.
#
#   SP     | 0F0B   INTERSYLLABIC_TSHEG   | App: A dot aligned with the letter clothline.
#          | 0F0C   DELIMITER_TSHEG_BSTAR | Sem: Separates syllables (not just words).
#          |                              |   The Unicode names are misleading.
#          |                              |   0F0B is morpheme delim (breaking)
#          |                              |   0F0C is syllable delim (non-breaking)
#
#   &      | 0F85   PALUTA                | App: Curly "3" with tail, similar to NYA.
#          |                              | Sem: Sanskrit apostrophe.
#          |                              | Eqv: Sanskrit avagraha.
#
#   ,      | 0F0D   SHAD                  | App: a nail-like vertical stroke.
#          |                              | Sem: Used in pairs to delimit phrases.
#          |                              |   Marks end of a section of text
#
#   =      | 0F0E   NYS_SHAD              | App: double shad. 
#          |                              | Sem: Used after double- or triple-scroll
#          |                              |   at the beginning of the text, or as a
#          |                              |   full stop. Marks the end of a whole topic.
#
#   `      | 0F08   SBRUL_SHAD            | App: nail with wings and tilde on top.
#          |                              | Sem: Decorative version of the shad, sometimes
#          |                              |   used at the beginning or end of a text
#          |                              |   Separates sections of meanings equivalent to
#          |                              |   topics and sub-topics.
#
#   ;      | 0F11   RIN_CHEN_SPUNGS_SHAD  | App: nail-like stroke with three dots above
#          |                              | Sem: Decorative version of the shad, sometimes
#          |                              |   used at the beginning or end of a text.
#          |                              |   Shad which follows a tsheg-bar
#          |                              |   that starts a new line.
#
#   ^      |                              | App: like Greek lowercase upsilon, prefixed to syll.
#
#   °      | 0F37   NGAS_BZUNG_SGOR_RTAGS | App: small circle under preceding syllable
#          |                              | Sem: Emphasis; used as underlining.
#
#   ×      |                              | App: small "x" under preceding syllable
#
#   %      | 0F35   NGAS_BZUNG_NYI_ZLA    | App: small circle-on-bowl under preceding syllable 
#          |                              | Aem: Honorific; emphasis; used like underlining
#
#   /      | 0F3C   ANG_KHANG_GYON (opn)  | App: diagonal brace (rise = open, fall = close)
#          | 0F3C   ANG_KHANG_GYAS (cls)  | Sem: open or close tibetan brace 
# 
# 
# Decorative signs (TIBETAN_MARK_xxx):
# 
#   ~~     | 0F04+0F05 INITIAL_YIG_MGO_MDUN_MA+         | App: Double-scroll.
#          |           CLOSING_YIG_MGO_SGAB_MA          | Sem: Decorative sign used
#          |                                            |   at the beginning of texts.
#
#   ~~~    | 0F04+0F05² INITIAL_YIG_MGO_MDUN_MA 0F04+   | App: Triple-scroll.
#          |            2 × CLOSING_YIG_MGO_SGAB_MA(?)  | Sem: Decorative sign used at 
#          |                                            |   the beginning of texts.
#                                                                                                    
# 
# Formatting codes:
# 
#   X-Y         X and Y are separate letters, side by side.
#   X+Y         Sanskrit stack of letter X over letter Y.
#   ÷           End of line in the original book.
#   *           Unreadable/missing/untranscribed character.
#   @op{..}{..} Control line ("@" must be in column 1).
#   {...}       Text between braces is an editorial note.
#   #           Rest of line is a comment.
# 
# The following changes to the ACIP code were done by J. Stolfi 
# for compatibility with existing scripts:
#  
#    "~~~"   was "#"
#    "~~"    was "*"
#    "="     was ",,"
#    "×"     was "x"
#    "°"     was "o"
#    "{...}" was "[...]"
#    "÷"     was a blank line.