/* See grr_grammar.h */ /* Last edited on 2024-12-21 11:49:46 by stolfi */ #include #include #include #include #include #include #include #include #include #include char *grr_copy_name(char *sOld) { if (sOld == NULL) { return NULL; } else { /* Copy the string itself: */ char *sNew = (char *)notnull(malloc((strlen(sOld)+1)*sizeof(char)), "no mem"); strcpy(sNew, sOld); return sNew; } } void grr_print_rule(FILE *wr, Grammar_t *G, char *pre, GRule_t *r, char *suf) { grr_print_symbol(wr, G, pre, r->H, " -> "); if (r->L != NOSYMB) { grr_print_symbol(wr, G, "", r->L, " "); } grr_print_symbol(wr, G, "", r->R, suf); } void grr_print_symbol(FILE *wr, Grammar_t *G, char *pre, GSymbId_t S, char *suf) { fprintf(wr, "%s%s%s", pre, (S == NOSYMB ? "ø" : G->sname[S]), suf); } void grr_build_lists(Grammar_t *G) { GSymbId_t S; for (S = 0; S <= G->NS; S++) { G->firstH[S] = NORULE; G->firstL[S] = NORULE; G->firstR[S] = NORULE; } GRuleId_t ir; for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); GSymbId_t H = rulei->H; GSymbId_t L = rulei->L; GSymbId_t R = rulei->R; /* Prepend to the {firstH/nextH} lists: */ assert(H != NOSYMB); rulei->nextH = G->firstH[H]; G->firstH[H] = ir; /* Prepend to the {firstL/nextL} lists: */ if (L != NOSYMB) { rulei->nextL = G->firstL[L]; G->firstL[L] = ir; } /* Prepend to the {firstR/nextR} lists: */ if (R != NOSYMB) { rulei->nextR = G->firstR[R]; G->firstR[R] = ir; } } } #define grr_MAX_SYMB_LENGTH 300 Grammar_t *grr_read_Grammar(FILE *rd) { Grammar_t *G; /* Initialize its tables: */ string_vec_t sname = string_vec_new(1000); GRule_vec_t rule = GRule_vec_new(1000); int NS = 0; int NR = 0; int NL = 1; /* Line counter */ auto GSymbId_t grr_read_symbol(FILE *rd); /* Reads a symbol name, adds it to the {sname} table, assigns and returns its ID. If there is no symbol, returns {NOSYMB}. */ auto void grr_skip_spaces_and_comment_lines(FILE *rd); /* Skips characters until a character that is not space or newline, nor part of a "#"-comment (including end-of-file). */ GSymbId_t grr_read_symbol(FILE *rd) { /* Parse symbol name, put in {buf[0..n-1]}: */ char buf[grr_MAX_SYMB_LENGTH + 1]; int n = 0; int c = fgetc(rd); if (c == EOF) { return NOSYMB; } else if (! grr_is_symbol_char(c)) { ungetc(c, rd); return NOSYMB; } else { /* Read as many symbol characters as possible: */ do { if (n >= grr_MAX_SYMB_LENGTH) { grr_file_error("stdin", NL, "symbol too long"); } buf[n] = c; n++; c = fgetc(rd); } while(grr_is_symbol_char(c)); if (c != EOF) { ungetc(c, rd); } buf[n] = '\000'; } /* Look up in symbol table: */ GSymbId_t S; for (S = 1; S <= NS; S++) { char *Sn = sname.e[S]; if (strcmp(buf,Sn) == 0) { return S; } } /* Append to symbol table: */ NS++; string_vec_expand(&sname, NS); sname.e[NS] = txtcat(buf, ""); /* Make a copy of name. */ return NS; } void grr_skip_spaces_and_comment_lines(FILE *rd) { while(1) { bool_t ok = fget_test_comment_or_eol(rd, '#', NULL); if (! ok) { return; } } } /* Read rule data: */ GSymbId_t S, T; GSymbId_t H, L, R; while(1) { grr_skip_spaces_and_comment_lines(rd); /* Try to read LHS of rule: */ H = grr_read_symbol(rd); if (H == NOSYMB) { break; } /* Parse rest of rule: */ fget_skip_spaces(rd); fget_match(rd, "->"); fget_skip_spaces(rd); S = grr_read_symbol(rd); if (S == NOSYMB) { /* Valid rule */ L = NOSYMB; R = NOSYMB; } else { fget_skip_spaces(rd); T = grr_read_symbol(rd); if (T == NOSYMB) { /* Unary rule: */ L = NOSYMB; R = S;/* {cfp_parser}'s convention */ } else { /* Binary rule: */ L = S; R = T; } } /* Store rule in grammar: */ NR++; GRule_vec_expand(&rule,NR); rule.e[NR] = (GRule_t){H,L,R, NORULE,NORULE,NORULE}; /* Skip comments and end-of-line: */ grr_skip_spaces_or_comment(rd); /* Require newline: */ fget_eol(rd); NL++; } if (!feof(rd)) { grr_file_error("stdin", NL, "extraneous stuff after grammar"); } G = grr_new_Grammar(NS,NR); GRuleId_t ir; for (ir = 1; ir <= NR; ir++) { G->rule[ir] = rule.e[ir]; } for (S = 1; S <= NS; S++) { G->sname[S] = sname.e[S]; } free(sname.e); free(rule.e); grr_build_lists(G); return G; } Grammar_t *grr_new_Grammar(int NS, int NR) { Grammar_t *G = (Grammar_t *)notnull(malloc(sizeof(Grammar_t)), "no mem"); G->NS = NS; G->NR = NR; G->rule = (GRule_t *)notnull(malloc((NR+1)*sizeof(GRule_t)), "no mem"); G->sname = (char **)notnull(malloc((NS+1)*sizeof(char *)), "no mem"); G->firstH = (GRuleId_t *)notnull(malloc((NS+1)*sizeof(GRuleId_t)), "no mem"); G->firstL = (GRuleId_t *)notnull(malloc((NS+1)*sizeof(GRuleId_t)), "no mem"); G->firstR = (GRuleId_t *)notnull(malloc((NS+1)*sizeof(GRuleId_t)), "no mem"); /* Initialize the rule table, just in case: */ GRuleId_t ir; for (ir = 0; ir <= NR; ir++) { G->rule[ir] = (GRule_t){NOSYMB,NOSYMB,NOSYMB, NORULE,NORULE,NORULE}; } /* Initialize the symbol-indexed arrays: */ GSymbId_t S; for (S = 0; S <= NS; S++) { G->firstH[S] = NORULE; G->firstL[S] = NORULE; G->firstR[S] = NORULE; G->sname[S] = NULL; } return G; } void grr_free_grammar(Grammar_t *G) { GSymbId_t S; int NS = G->NS; for (S = 0; S <= NS; S++) { char *s = G->sname[S]; if (s != NULL) { free(s); } } free(G->sname); free(G->rule); free(G->firstH); free(G->firstL); free(G->firstR); free(G); } #define GrammarVersion "2004-11-01" void grr_write_Grammar(FILE *wr, Grammar_t *G) { filefmt_write_header(wr, "grammar", GrammarVersion); fprintf(wr, "symbols = %d\n", G->NS); fprintf(wr, "rules = %d\n", G->NR); GSymbId_t S; for (S = 1; S <= G->NS; S++) { fprintf(wr, "%d: %s\n", S, G->sname[S]); } GRuleId_t ir; for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); GSymbId_t H = rulei->H, L = rulei->L, R = rulei->R; fprintf(wr, "%d: %d %d %d", ir, H, L, R); char *Hx = (H == NOSYMB ? "¤" : G->sname[H]); char *Lx = (L == NOSYMB ? "¤" : G->sname[L]); char *Rx = (R == NOSYMB ? "¤" : G->sname[R]); fprintf(wr, " # %s -> %s %s\n", Hx, Lx, Rx); } filefmt_write_footer(wr, "grammar"); fflush(wr); } bool_t grr_is_symbol_char(int c) { return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c == '$') || (c == '*') || (c == '(') || (c == ')') || (c == '{') || (c == '}') || (c == '_') || (c == '+') || (c == ':') || (c == ','); } void grr_file_error(char *where, int loc, char *msg) { fprintf(stderr, "data file error:"); if (where != NULL) { fprintf(stderr, " %s %d:", where, loc); } fprintf(stderr, " ** %s\n", msg); exit(1); } vec_typeimpl(GRule_vec_t,GRule_vec,GRule_t); bool_t grr_debug = FALSE; void grr_debug_rule(FILE *wr, Grammar_t *G, char *pre, GRule_t *r, char *suf) { if (grr_debug) { grr_print_rule(wr, G, pre, r, suf); } } void grr_debug_symbol(FILE *wr, Grammar_t *G, char *pre, GSymbId_t S, char *suf) { if (grr_debug) { grr_print_symbol(wr, G, pre, S, suf); } }