#! /bin/bash
# Last edited on 2016-05-08 23:40:29 by stolfilocal

# Thirs cleanup pass: remove all but the post texts.

gawk \
  ' BEGIN { 
      postlevel = 0; # Level of <div> inside a post.
      nopen = 0;
      nclose = 0;
    } 
    /^<div class="post/ {
      if (postlevel == 0) { 
        nopen++; 
        printf "<@@post><!-- %d -->\n", FNR;
      }
      postlevel++;
      print;
      next;
    }
    /^<div / {
      if (postlevel > 0) { 
        postlevel++; print; next;
      } else {
        next;
      }
    }
    /^<[/]div/ {
      if (postlevel > 0) { 
        postlevel--; 
        if (postlevel == 0) { nclose++; }
        print;
        printf "</@@post><!-- %d -->\n", FNR;
        next; 
      } else {
        next;
      }
    }
    (postlevel > 0) {
      print;
      next;
    }
    // { 
      next;
    }
    END {
      printf "%d open post %d close post\n", nopen, nclose > "/dev/stderr";
      if (postlevel > 0) { printf "** post div not closed!\n" > "/dev/stderr"; exit(1); }
    }
  '
