#! /usr/bin/gawk -f
# Last edited on 2016-05-10 11:55:17 by stolfilocal

BEGIN { 
  # Removes embedded quoted text from bitcointalk messges.
  # Assumes that the messages have been processed by {do-split-posts.sh}.
  # In particular, that all <div> and </div> are start a new line.
  
  quotelevel = 0; # Nesting of <div..>...</div> inside a "quoteheader" or "quote" class.  
  nopen = 0;
  nclose = 0;
} 

/^ *<div class="quote/ {
  # Class "quoteheader" or "quote"
  if (quotelevel == 0) {
    nopen++; 
    printf "<!-- line %d -->\n", FNR;
  }
  quotelevel++;
  next;
}

/^ *<div / {
  if (quotelevel > 0) { 
    # Some other <div> inside a quote div.
    quotelevel++; next;
  } else {
    # Some <div> outside a quote div.
    print; next;
  }
}

/^<[/]div/ {
  if (quotelevel > 0) { 
    # Some </div> inside a quote div.
    quotelevel--; 
    if (quotelevel == 0) { 
      printf "@s{quotedtext}\n", FNR;
      printf "<!-- line %d -->\n", FNR;
      nclose++; 
    }
    next; 
  } else {
    # Some </div> outside a quote div.
    print; 
    next;
  }
}

(quotelevel > 0) {
  # Some other line inside a quote:
  next;
}

// { 
  # Some other line outside a quote:
  print;
  next;
}

END {
  printf "%d open quote %d close quote\n", nopen, nclose > "/dev/stderr";
  if (quotelevel > 0) { printf "** not closed!\n" > "/dev/stderr"; exit(1); }
}

function arg_error(msg)
  { 
    printf "%s\n", msg > "/dev/stderr";
    printf "usage: %s\n", usage > "/dev/stderr";
    abort = 1;
    exit 1
  }

function prog_error(msg)
  { 
    printf "**PROG ERROR: %s\n", msg > "/dev/stderr";
    abort = 1;
    exit 1
  }

function data_error(msg)
  { 
    printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr";
    abort = 1;
    exit 1
  }