#! /usr/bin/gawk -f
# Last edited on 2016-05-09 23:49:53 by stolfilocal
BEGIN {
# Cleanups posts from a Bitcointalk "latest posts" page,
# removing the site headers, writing each post
# as a separate HTML file.
# Assumes that every "
"s.
nclose = 0; # Number of closing "
"s.
anchor = "???"; # Anchor of post in bitcointalk.
date = "???"; # Date of post.
uname = "???"; # User name.
ofile = "???"; # Output file of post.
debug = 1; # If true, prints debugging information.
verbose = 1; # If true, prints info for each post found.
# USA to ISO month conversion table:
split("", month_num);
month_num["January"] = 1;
month_num["February"] = 2;
month_num["March"] = 3;
month_num["April"] = 4;
month_num["May"] = 5;
month_num["June"] = 6;
month_num["July"] = 7;
month_num["August"] = 8;
month_num["September"] = 9;
month_num["October"] = 10;
month_num["November"] = 11;
month_num["December"] = 12;
}
/content="Latest posts of:/ {
if (debug) { printf "!! grabbing uname\n" > "/dev/stderr"; }
uname = $0;
gsub(/^.*"Latest posts of: */, "", uname);
gsub(/ *" *\/> *$/, "", uname);
gsub(/[ ]/, "_", uname);
if (uname !~ /^[-_A-Za-z0-9]+$/) { data_error("invalid uname"); }
next;
}
/^[\011 ]*