/* Last edited on 2024-12-25 10:15:26 by stolfi */ /* Reads a GB-encoded Chinese text, replaces all ideograms by a decimal number enclosed in guillemots «». Assumes the ideograms are encoded in the GB 2312-80 EUC-compatible format, where each ideogram consists of a `row' byte in the range 0xA1 to 0xF7 and a `column' byte in the range 0xA1 to 0xFE. Assumes also that Any byte that is not in GB encoding is in the range 0x00 to 0xA0. */ #include #include #include #define NCOLS (0xFE - 0xA1 + 1) #define NROWS (0xF7 - 0xA1 + 1) void doputchar(int c); void error(char *msg); int main(int argc, char **argv) { int r, s, rs; while(1) { r = getchar(); if (r == EOF) { fclose(stdout); exit(0); } else if (r < 0xA1) { doputchar(r); } else if (r > 0xF7) { doputchar(r); error("row byte over 0xF7"); } else { s = getchar(); if (s == EOF) { doputchar(r); error("unexpected EOF in GB code"); } else if (s < 0xA1) { doputchar(r); doputchar(s); error("incomplete GB code"); } else { rs = NCOLS*(r - 0xA1) + (s - 0xA1) + 1; printf("«%d»", rs); } } } } void doputchar(int c) { int res = putchar(c); if (res != c) { error("write error"); } } void error(char *msg) { fprintf(stderr, "%s\n", msg); fclose(stdout); exit(1); }