#! /usr/bin/gawk -f 
# Last edited on 2000-09-21 03:02:05 by stolfi

BEGIN {
  abort = -1;
  usage = ( ARGV[0] " \\\n" \
      "  -v sticky_qmarks=BOOL \\\n" \
      "  [ -v min_line_length=NUM ] \\\n" \
      "  < INFILE > OUTFILE" \
  );

  # Breaks the lines of the input file into runs of equal characters.

  # Runs are usually delimited transitions between two different characters,
  # including line boundaries.

  # If "sticky_qmarks" is set, a transition between different characters
  # is a run only if one of them is a line boundary, or both are
  # different from "?".

  # If min_line_length is set, lines shorter than that are eliminated.
  # The default is 1.

  # For each line, also writes out a string with one dot
  # for each non-commented character in the line.

  if (sticky_qmarks == "") { arg_error("must define sticky_qmarks"); }
  if (min_line_length == "") { min_line_length = 1; }
  if (min_line_length < 1) { min_line_length = 1; }
}

(abort >= 0) { exit abort; }

/[#]/ { gsub(/#.*$/, "", $0); }

(length($0) < min_line_length) { next; }

// {
  lin = $0;
  run = ""; last_c = "#"; dots = ""; 
  while (lin != "")
    { c = substr(lin,1,1);
      if (sticky_qmarks)
        { diff = ((c != last_c) && (c != "?") && (last_c != "?")); }
      else
        { diff = (c != last_c); }
      if (diff)
        { if (run != "") { print run; run = ""; } }
      run = ( run c ); dots = (dots ".");
      last_c = c;
      lin = substr(lin, 2);
    }
  if (run != "") { print run; }
  print dots;
  next;
}

function arg_error(msg)
{
  printf "%s\n", msg > "/dev/stderr";
  printf "usage: %s\n", usage > "/dev/stderr";
  abort = 1; exit abort;
}


function data_error(msg)
{
  printf "file %s, line %s: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  abort = 1; exit abort;
}