#! /bin/bash
# Last edited on 2016-05-08 23:47:27 by stolfilocal

# Second cleanup pass: remove quoted texts.

sed \
    -e 's:<div:\n<div:g' \
    -e 's:</div:\n</div:g' \
| gawk \
  ' BEGIN { 
      quotelevel = 0;
      nopen = 0;
      nclose = 0;
    } 
    /^<div class="quote/ {
      if (quotelevel == 0) {
        nopen++; 
        printf "<@@quote><!-- %d -->\n", FNR;
      }
      quotelevel++;
      next;
    }
    /^<div / {
      if (quotelevel > 0) { 
        quotelevel++; next;
      } else {
        print; next;
      }
    }
    /^<[/]div/ {
      if (quotelevel > 0) { 
        quotelevel--; 
        if (quotelevel == 0) { 
          printf "</@@quote><!-- %d -->\n", FNR;
          nclose++; 
        }
        next; 
      } else {
        print; 
        next;
      }
    }
    (quotelevel > 0) {
      next;
    }
    // { 
      print;
      next;
    }
    END {
      printf "%d open quote %d close quote\n", nopen, nclose > "/dev/stderr";
      if (quotelevel > 0) { printf "** not closed!\n" > "/dev/stderr"; exit(1); }
    }
  '
