/* Capitalizes all words in a file. */ /* Last edited on 2024-12-25 10:16:27 by stolfi */ /* See the copyright notice at the end of this file. */ #include #include #include #include #include typedef int bool_t; #define TRUE 1 #define FALSE 0 #define usage \ "capitize [-k] [LETTERSET] < INFILE > OUTFILE" /* PROTOTYPES */ int main(int argc, char **argv); void parse_args(int argc, char **argv, bool_t *keepuc, char **letters); /* Parses the command line options. If "-k" was given, sets $keepuc$ to TRUE. If a custom letter set was specified, returns it in $letters$, else sets $letters$ to NULL. */ void define_case_tables(char *fst, char *rst, bool_t keepuc); /* Defines character mapping tabels for word-initial and non-word-initial characters, respectively. The latter are mapped to lowercase if $keepuc = FALSE$, and left unchanged if $keepuc = TRUE$. */ void default_letterset(bool_t *al); /* Defines the word-forming chars as being the ISO Latin-1 letters and digits. */ void define_letterset(bool_t *al, char *set); /* Defines the word-forming chars as being those that occur in $set$. */ char get_next_letter(char **string); /* Parses the first character of $**string$, handling backslashed escapes. Increments $*string$ to point after the parsed character or escape sequence. */ void error (char *msg); /* Prints string $*msg$ to $stderr$ and stops. */ void arg_error (char *msg); /* Prints string $*msg$ to $stderr$, then usage advice, and stops. */ /* IMPLEMENTATIONS */ int main(int argc, char **argv) { int c; bool_t al[256]; /* TRUE for word-forming bytes. */ char fst[256]; /* Mapping table for first char in word. */ char rst[256]; /* Mapping table for remaining chars in word. */ bool_t inword = FALSE; /* TRUE between 1st letter and end of name */ bool_t keepuc = FALSE; /* Argument - TRUE keeps non-initial upper case unchanged. */ char *letters; /* Argument - user-given letter set (NULL if none). */ parse_args(argc, argv, &keepuc, &letters); if (letters == NULL) { default_letterset(al); } else { define_letterset(al, letters); } define_case_tables(fst, rst, keepuc); while (TRUE) { c = getchar(); if (c == EOF) break; assert((c >= 0) && (c <= 255)); if (al[c]) { /* Letter */ putchar((inword ? rst[c] : fst[c])); inword = TRUE; } else { /* Non-letter */ inword = FALSE; putchar(c); } } /* DEBUG: printfiles(root); */ fclose(stdout); return 0; } void parse_args(int argc, char **argv, bool_t *keepuc, char **letters) { int i; /* Default arguments: */ (*keepuc) = FALSE; (*letters) = NULL; /* Scan arguments: */ i = 1; while ((i < argc) && (argv[i][0] == '-')) { if ((i < argc) && (strcmp(argv[i], "-k") == 0)) { (*keepuc) = TRUE; } else { arg_error("unrecognized option"); } i++; } if (i < argc) { (*letters) = argv[i]; i++; } if (i != argc) { arg_error("too many arguments"); } } void default_letterset(bool_t *al) { int c; for (c=0; c <= 255; c++) { al[c] = FALSE; } for (c='0'; c <= '9'; c++) { al[c] = TRUE; } for (c='A'; c <= 'Z'; c++) { al[c] = al[c+32] = TRUE; } for (c=192; c <= 214; c++) { al[c] = al[c+32] = TRUE; } for (c=216; c <= 222; c++) { al[c] = al[c+32] = TRUE; } al[223] = TRUE; /* es-zet */ al[255] = TRUE; /* y-umlaut */ } void define_letterset(bool_t *al, char *letters) { int c, f; for (c=0; c <= 255; c++) { al[c] = FALSE; } while ((*letters) != '\0') { c = get_next_letter(&letters); if (((*letters) != '-') || (*(letters+1)) == '\0') { al[c] = TRUE; } else { letters++; f = get_next_letter(&letters); while(c <= f) { al[c] = TRUE; c++; } } } } char get_next_letter(char **letters) { unsigned char *p = (unsigned char *)(*letters); unsigned char c; if ((*p) == '\\') { p++; switch (*p) { case '\0': arg_error("incomplete escape char"); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c = (*p) - '0'; p++; if (((*p) >= '0') && ((*p) <= '9')) { c = (c << 3) + ((*p) - '0'); p++; } if (((*p) >= '0') && ((*p) <= '9')) { if (c >= 32) { arg_error("octal escape char above \\377"); } c = (c << 3) + ((*p) - '0'); p++; } break; case 'n': c = '\n'; p++; break; case 't': c = '\t'; p++; break; case 'r': c = '\r'; p++; break; case 'v': c = '\v'; p++; break; case 'f': c = '\f'; p++; break; case 'e': c = '\e'; p++; break; case '\\': c = '\\'; p++; break; default: arg_error("bad escape code"); } } else { c = (*p); p++; } (*letters) = (char*)p; return c; } void define_case_tables(char *fst, char *rst, bool_t keepuc) { int c; /* Default: */ for (c=0; c <= 255; c++) { fst[c] = c; rst[c] = c; } for (c='A'; c <= 'Z'; c++) { fst[c+32] = c; rst[c] = (keepuc ? c : c+32); } for (c=192; c <= 214; c++) { fst[c+32] = c; rst[c] = (keepuc ? c : c+32); } for (c=216; c <= 222; c++) { fst[c+32] = c; rst[c] = (keepuc ? c : c+32); } } void arg_error (char *msg) { fprintf (stderr, "*** %s\n", msg); exit(1); } void error (char *msg) { fprintf (stderr, "*** %s\n", msg); fprintf (stderr, "usage: %s\n", usage); exit(1); } /* ** created by J. Stolfi ** ** Copyright (C) 1989, 1991 by Jorge Stolfi. ** ** Permission to use, copy, modify, and distribute this software and its ** documentation for any purpose and without fee is hereby granted, provided ** that the above copyright notice appear in all copies and that both that ** copyright notice and this permission notice appear in supporting ** documentation. This software is provided "as is" without express or ** implied warranty. ** */