#! /bin/bash
# Last edited on 2010-04-12 17:00:39 by stolfilocal

# Sorts a "sed" script by decreasing length of the LHS.
# This increases the chances that the longest matches will be matched first.

gawk \
      ' /^[ ]*([\#]|$)/ { next; }
        /^[s]/ { 
          ch = substr($0,2,1);
          esc = 0; n = length($0);
          for (i = 3; i <= n; i++) 
            { if (! esc)
                { c2 = substr($0,i,1);
                  if (c2 == ch)
                    { lhs = substr($0,3,i-3); break; }
                  else if (c2 == "\\")
                    { esc = 1; }
                }
              else
                { esc = 0; }
            }
          # Remove any "*" groups.  Not perfect:
          lhs = gensub(/\[[^]\\]\][*]/, "", "g", lhs);
          lhs = gensub(/[^]][*]/, "", "g", lhs);
          # Replace "[...]" by a single char:
          lhs = gensub(/\[[^]\\]\]/, ".", "g", lhs);
          # Remove escapes:
          lhs = gensub(/\\(.)/, "\\1", "g", lhs);
          printf "%05d %s\n", length(lhs), $0;
          next;
        }
        // { printf "!! ERROR %s\n", $0; }
      ' \
  | sort -k1,1nr \
  | gawk \
      ' /^[0-9][0-9][0-9][0-9][0-9] /{ print substr($0,7); next; }
        // { print; next; }
      '
  
      
          
