#! /usr/bin/gawk -f # Last edited on 2016-05-10 11:55:17 by stolfilocal BEGIN { # Removes embedded quoted text from bitcointalk messges. # Assumes that the messages have been processed by {do-split-posts.sh}. # In particular, that all <div> and </div> are start a new line. quotelevel = 0; # Nesting of <div..>...</div> inside a "quoteheader" or "quote" class. nopen = 0; nclose = 0; } /^ *<div class="quote/ { # Class "quoteheader" or "quote" if (quotelevel == 0) { nopen++; printf "<!-- line %d -->\n", FNR; } quotelevel++; next; } /^ *<div / { if (quotelevel > 0) { # Some other <div> inside a quote div. quotelevel++; next; } else { # Some <div> outside a quote div. print; next; } } /^<[/]div/ { if (quotelevel > 0) { # Some </div> inside a quote div. quotelevel--; if (quotelevel == 0) { printf "@s{quotedtext}\n", FNR; printf "<!-- line %d -->\n", FNR; nclose++; } next; } else { # Some </div> outside a quote div. print; next; } } (quotelevel > 0) { # Some other line inside a quote: next; } // { # Some other line outside a quote: print; next; } END { printf "%d open quote %d close quote\n", nopen, nclose > "/dev/stderr"; if (quotelevel > 0) { printf "** not closed!\n" > "/dev/stderr"; exit(1); } } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1 } function prog_error(msg) { printf "**PROG ERROR: %s\n", msg > "/dev/stderr"; abort = 1; exit 1 } function data_error(msg) { printf "%s:%d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit 1 }