/* See grr_nondead.h */ /* Last edited on 2024-12-21 11:49:37 by stolfi */ #include #include #include #include #include #include #include #include Grammar_t *grr_remove_invalid_symbols(Grammar_t *G) { /* {valid[S]} will be true iff {S} can produce some terminal string: */ bool_t valid[G->NS+1]; /* {vsymb[0..M-1]} are the symbols that can produce terminal strings: */ int M = 0; GSymbId_t vsymb[G->NS]; auto void grr_mark_valid(GSymbId_t S); /* If {S} is not marked valid yet, marks it and appends it to the queue. */ void grr_mark_valid(GSymbId_t S) { if (!(valid[S])) { valid[S] = TRUE; vsymb[M] = S; M++; grr_debug_symbol(stderr, G, "marked ", S, " as valid\n"); } } GSymbId_t S; GRuleId_t ir; /* Initialize {valid} with FALSE: */ for (S = 0; S <= G->NS; S++) { valid[S] = FALSE; } auto bool_t grr_is_term(GSymbId_t S); /* Returns TRUE iff {S} is {NOSYMB} or a terminal symbol of {G}. */ bool_t grr_is_term(GSymbId_t S) { if (S == NOSYMB) { return TRUE; } assert(G->sname[S] != NULL); char c = G->sname[S][0]; return ((c >= 'a') && (c <= 'z')); } /* Mark terminal symbols: */ for (S = 1; S <= G->NS; S++) { if (grr_is_term(S)) { grr_mark_valid(S); } } /* Mark symbols which have empty RHS: */ for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); if ((rulei->L == NOSYMB) && (rulei->R == NOSYMB)) { grr_mark_valid(rulei->H); } } /* Compute closure: */ int K = 0; /* Symbols {vsymb[0..K-1]} have been closed: */ while (K < M) { /* Close symbol {vsymb[K]}: */ S = vsymb[K]; /* Look for all uses of {S} as left symbol of RHS: */ ir = G->firstL[S]; while (ir != NORULE) { GRule_t *rulei = &(G->rule[ir]); grr_debug_rule(stderr, G, "looking at rule ", rulei, "\n"); assert(rulei->L == S); GSymbId_t R = rulei->R; if ((R == NOSYMB) || valid[R]) { grr_mark_valid(rulei->H); } ir = rulei->nextL; } /* Look for all uses of {S} as right symbol of RHS: */ ir = G->firstR[S]; while (ir != NORULE) { GRule_t *rulei = &(G->rule[ir]); grr_debug_rule(stderr, G, "looking at rule ", rulei, "\n"); assert(rulei->R == S); GSymbId_t L = rulei->L; if ((L == NOSYMB) || valid[L]) { grr_mark_valid(rulei->H); } ir = rulei->nextR; } /* Mark {vsymb[K]} as closed: */ K++; } /* Get valid subset {ruleNew} of {G->rule}: */ GRule_vec_t ruleNew = GRule_vec_new(G->NR + 1); /* Initial guess. */ int NRNew = 0; ruleNew.e[0] = (GRule_t){NOSYMB,NOSYMB,NOSYMB, NORULE,NORULE,NORULE}; for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); GSymbId_t H = rulei->H; GSymbId_t L = rulei->L; GSymbId_t R = rulei->R; assert(H != NOSYMB); /* A rule is invalid iff the RHS contains a non-null invalid symbol: */ if (((L == NOSYMB) || (valid[L])) && ((R == NOSYMB) || (valid[R]))) { /* Valid rule - preserve: */ grr_debug_rule(stderr, G, "kept rule ", rulei, "\n"); assert(valid[H]); NRNew++; GRule_vec_expand(&ruleNew, NRNew); ruleNew.e[NRNew] = (GRule_t){H,L,R, NORULE,NORULE,NORULE}; } else { /* Invalid rule: */ grr_debug_rule(stderr, G, "deleted rule ", rulei, "\n"); } } /* Create new grammar: */ int NSNew = M; Grammar_t *GNew = grr_new_Grammar(NSNew,NRNew); /* Output numerically-coded input grammar for debugging. */ /* grr_write_Grammar(stderr, G); */ /* Copy valid symbols, and build old->new symbol table: */ /* {dic[S]} is the new symbol id for the valid symbol whose old id was {S}. */ GSymbId_t *dic = (GSymbId_t *)notnull(malloc((G->NS+1)*sizeof(GSymbId_t)), "no mem"); GSymbId_t SNew = 0, SOld; dic[NOSYMB] = NOSYMB; for (SOld = 1; SOld <= G->NS; SOld++) { if (valid[SOld]) { SNew++; dic[SOld] = SNew; GNew->sname[SNew] = grr_copy_name(G->sname[SOld]); } else { dic[SOld] = NOSYMB; } } /* Copy valid rules from {ruleNew} to {GNew->rule}, mapping the symbol ids: */ GNew->rule[NORULE] = (GRule_t){NOSYMB,NOSYMB,NOSYMB, NORULE,NORULE,NORULE}; for (ir = 1; ir <= NRNew; ir++) { GRule_t *rulei = &(ruleNew.e[ir]); GSymbId_t HOld = rulei->H, HNew = dic[HOld]; GSymbId_t LOld = rulei->L, LNew = dic[LOld]; GSymbId_t ROld = rulei->R, RNew = dic[ROld]; assert(HNew != NOSYMB); assert((LOld == NOSYMB) == (LNew == NOSYMB)); assert((ROld == NOSYMB) == (RNew == NOSYMB)); GNew->rule[ir] = (GRule_t){HNew,LNew,RNew, NORULE,NORULE,NORULE}; } /* Rebuild inverted lists: */ grr_build_lists(GNew); return GNew; }