/* See grr_nonempty.h */ /* Last edited on 2024-12-21 11:48:40 by stolfi */ #include #include #include #include #include #include #include Grammar_t *grr_remove_empty_rules(Grammar_t *G) { /* {empty[S]} will be true iff {S} can produce the empty string: */ bool_t empty[G->NS+1]; /* {esymb[0..M-1]} are the empty-producing symbols: */ int M = 0; GSymbId_t esymb[G->NS]; auto void grr_mark_empty(GSymbId_t S); /* If {S} is not marked empty yet, marks it and appends it to the queue. */ void grr_mark_empty(GSymbId_t S) { if (!(empty[S])) { empty[S] = TRUE; esymb[M] = S; M++; } } GSymbId_t S; GRuleId_t ir; /* Initialize {empty} with FALSE: */ for (S = 0; S <= G->NS; S++) { empty[S] = FALSE; } /* Mark symbols that directly produce the empty string: */ for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); if ((rulei->L == NOSYMB) && (rulei->R == NOSYMB)) { grr_mark_empty(rulei->H); } } /* Compute closure: */ int K = 0; /* Symbols {esymb[0..K-1]} have been closed: */ while (K < M) { /* Close symbol {esymb[K]}: */ S = esymb[K]; /* Look for all uses of {S} as left symbol of RHS: */ ir = G->firstL[S]; while (ir != NORULE) { GRule_t *rulei = &(G->rule[ir]); assert(rulei->L == S); GSymbId_t R = rulei->R; if ((R == NOSYMB) || empty[R]) { grr_mark_empty(rulei->H); } ir = rulei->nextL; } /* Look for all uses of {S} as right symbol of RHS: */ ir = G->firstR[S]; while (ir != NORULE) { GRule_t *rulei = &(G->rule[ir]); assert(rulei->R == S); GSymbId_t L = rulei->L; if ((L == NOSYMB) || empty[L]) { grr_mark_empty(rulei->H); } ir = rulei->nextR; } /* Mark {esymb[K]} as closed: */ K++; } /* The new rule set is {ruleNew[1..NRNew]}: */ int NRNew = 0; GRule_vec_t ruleNew = GRule_vec_new(G->NR + 1); /* Initial guess. */ auto void grr_add_rule(GSymbId_t H, GSymbId_t L, GSymbId_t R); /* Appends to the new rule set a rule {H -> L R}. Should suppress duplicate rules, but does not do it yet. */ void grr_add_rule(GSymbId_t H, GSymbId_t L, GSymbId_t R) { NRNew++; GRule_vec_expand(&ruleNew, NRNew); ruleNew.e[NRNew] = (GRule_t){H,L,R, NORULE,NORULE,NORULE}; grr_debug_rule(stderr, G, " added rule ", &(ruleNew.e[NRNew]), "\n"); } /* Expand any rules whose RHS has symbols that may generate "": */ ruleNew.e[0] = (GRule_t){NOSYMB,NOSYMB,NOSYMB, NORULE,NORULE,NORULE}; for (ir = 1; ir <= G->NR; ir++) { GRule_t *rulei = &(G->rule[ir]); grr_debug_rule(stderr, G, "expanding rule ", rulei, "\n"); GSymbId_t H = rulei->H; GSymbId_t L = rulei->L; GSymbId_t R = rulei->R; assert(H != NOSYMB); if ((L != NOSYMB) && (R != NOSYMB)) { /* Binary rule - preserve and add expansions: */ grr_add_rule(H,L,R); /* Add unary alternatives if any RHS symbols can be empty: */ /* Note that these new rules may duplicate existing ones. */ /* We should check for that and suppress the duplicates. */ if (empty[L]) { grr_add_rule(H, NOSYMB, R); } if (empty[R]) { grr_add_rule(H, NOSYMB, L); } } else if (L != NOSYMB) { /* Anomalous unary rule (just in case) - preserve with correct form: */ grr_add_rule(H,NOSYMB,L); } else if (R != NOSYMB) { /* Unary rule - preserve: */ grr_add_rule(H,NOSYMB,R); } } /* Create the new grammar: */ int NSNew = G->NS; Grammar_t *GNew = grr_new_Grammar(NSNew,NRNew); /* Copy the {sname} array: */ for (S = 0; S <= G->NS; S++) { GNew->sname[S] = grr_copy_name(G->sname[S]); } /* Copy the new rules: */ for (ir = 0; ir <= NRNew; ir++) { GNew->rule[ir] = ruleNew.e[ir]; } /* Rebuild inverted lists: */ grr_build_lists(GNew); return GNew; }