#! /usr/bin/gawk -f # Last edited on 2014-12-03 00:10:42 by stolfilocal BEGIN \ { # Converts an HTML page fetched from "http://www.walletexplorer.com/" # to a ".txt" file. Splits each input data line into one or more lines, # one for each transaction input or output. # Note that the transactions are in inverse chronological order # and addresses are combined into "wallets" by some unspecified # clustering logic. abort = -1; # Needs the wallet name, for intra-wallet transactions: if (wallet == "") { arg_error(("must define {wallet}")); } # Make sure that conversions of numbers to strings and vice-versa preserve 8 decimals after point: OFMT = "%.8f"; CONVFMT = "%.8f"; nin = 0; # Number of data lines in input page. nout = 0; # Number of data lines in putput file. # Total received and sent in file: vbtc_tot_R = 0; vbtc_tot_S = 0; # Max absolute received and sent in file: vbtc_max_R = 0; vbtc_max_S = 0; # Number of receives, sends, and rearrangents: nout_R = 0; nout_S = 0; nout_I = 0; } (abort >= 0) { exit abort; } # Expand tabs, non-breaking spaces: // \ { gsub(/[ ]/, " ", $0); gsub(/[&]nbsp;/, " ", $0); } # Remove headers and other non-data stuff: /^ *$/ { next; } /^ * *$/ { next; } /^ * *$/ { next; } /^ * *$/ { next; } /^ *Wallet .*<[/]title> *$/ { next; } /^ *<link href="[/]styles.css" rel="stylesheet"> *$/ { next; } /^ *<div id="topbar"> *$/ { next; } /^ *<h1><a href="[/]">.*<[/]a> - smart block explorer<[/]h1> *$/ { next; } /^ *<form action="[/]"> *$/ { next; } /^ *<input type="text" name="q" value=""[/]> *$/ { next; } /^ *<input type="submit" value="Search address[/]wallet"[/]> *$/ { next; } /^ *<[/]form> *$/ { next; } /^ *<[/]div> *$/ { next; } /^ *<div id="main"> *$/ { next; } /^ *<h2>Wallet.*<[/]h2><div.*<[/]th><[/]tr> *$/ { next; } /^ *<[/]table><div>.*<[/]div> *$/ { next; } /^ *<[/]div> *$/ { next; } /^ *<div style=".*"><[/]div> *$/ { next; } /^ *<p>[&]copy; <a href=.*>.*<[/]a> .*<[/]p> *$/ { next; } /^ *<[/]html> *$/ { next; } # Cleanup data lines: /^<tr class="(received|sent)">.*<[/]tr> *$/ \ { lin = $0; # Replace HTML formatting by tags, leaving a list of "@{tag}={value}" gsub(/^ *<tr class="received">/, "@K=R ", lin); gsub(/^ *<tr class="sent">/, "@K=S ", lin); gsub(/<[/]tr> *$/, "", lin); # Discard final </tr>. gsub(/<span class="walletcolor" style="background-color: [#][0-9a-f]*"><[/]span>/, "", lin); gsub(/<td>-<[/]td>/, "@W=REWARD", lin); # Mined block reward. gsub(/<td class="date">/, " @D=", lin); gsub(/<td class="txid">/, " @T=", lin); gsub(/<td class="inout">/, " @IO=", lin); gsub(/<td class="amount diff">/, " @V=", lin); gsub(/<td class="amount">/, " @B=", lin); gsub(/<td>/, "", lin); gsub(/<table class="empty">/, "{ ", lin); lin = gensub(/<a href="[/]wallet[/]([^<>"]*)">/, "@W=\\1<a>", "g", lin); lin = gensub(/<a href="[/]txid[/]([^<>"]*)">/, "@T=\\1<a>", "g", lin); gsub(/<em>fee<[/]em>/, " @W=FEE ", lin); gsub(/<a>[^<>]*<[/]a>/, "", lin); gsub(/<[/]table>/, " } ", lin); gsub(/<em>/, "", lin); gsub(/<[/]em>/, "", lin); gsub(/<[/]td>/, "", lin); gsub(/<tr>/, " { ", lin); gsub(/<[/]tr>/, " } ", lin); # Internal </tr>. gsub(/@T=@T=/, "@T=", lin); # Remove "()" around fee amount: lin = gensub(/@V=[(]([-+]?[0-9.]*)[)]/, "@V=\\1", "g", lin); # Tag the time: lin = gensub(/^(.*20[01][0-9]-[01][0-9]-[0-3][0-9]) +([0-2][0-9][:][0-5][0-9][:][0-6][0-9].*)$/, "\\1 @H=\\2", "g", lin); # Move input/output list to end of record: lin = gensub(/^(.*) *(@IO=[{] *[{].*[}] *[}]) *(.*) *$/, "\\1 \\3 \\2", "g", lin); lin = gensub(/^(.*) *(@IO=[{] *[}]) *(.*) *$/, "\\1 \\3 \\2", "g", lin); output_as_txtable_rows(lin); nin++; next; } // \ { data_error(("bad format")); } END \ { printf "%d lines read, %d lines written\n", nin, nout > "/dev/stderr"; printf "receives: count %6d total %+.8f max %+.8f\n", nout_R, vbtc_tot_R, vbtc_max_R > "/dev/stderr"; printf "sends: count %6d total %+.8f max %+.8f\n", nout_S, vbtc_tot_S, vbtc_max_S > "/dev/stderr"; printf "shuffles: count %6d\n", nout_I > "/dev/stderr"; printf "\n" > "/dev/stderr"; } function output_as_txtable_rows(lin, fix,ios,fixN,fixF,kind,date,hour,fbal,txid,iosN,iosF,k,nio,wall,vbtc,kwall,kvbtc,vtot,pbal,vsgn) { # Takes a data line from the HTML file and outputs one or more lines, # one line for each input or for each output of the transaction. # Updates the global variables {nout,nout_R,nout_S,nout_I,vbtc_tot_R,vbtc_tot_S,vbtc_max_R,vbtc_max_S}. # Assumes that the input data line {lin} has been cleaned up and reformatted as # a sequence of fields "@{tag}={value}". For most {tag}s # the {value} is a signed number of bitcoins or a simple string without quotes. # For the "IO" tag, the {value} is a list of pairs # "@V={amount} @W={wallet}" delimited by "{{...}{...}...{...}}". # Split {lin} into the fixed-format part and the output list: fix = lin; gsub(/@IO=.*$/, "", fix); ios = lin; gsub(/^.*@IO=/, "", ios); # Split the fixed part into fields: # printf "%s\n", fix > "/dev/stderr"; gsub(/[ ]/, "", fix); # Remove all spaces. gsub(/^[@]/, "", fix); # Remove leading "@". fixN = split(fix, fixF, /[@=]/); if (fixN != 10) { data_error(("wrong fixed field count = " fixN " \"" fix "\"")); } # Transaction kind: kind = get_tagged_field("K", fixF[1], fixF[2]); if (kind !~ /^[RS]$/) { data_error(("invalid {kind} \"" kind "\"")); } date = get_tagged_field("D", fixF[3], fixF[4]); if (date !~ /^20[01][0-9]-[01][0-9]-[0-3][0-9]$/) { data_error(("invalid {date} \"" date "\"")); } hour = get_tagged_field("H", fixF[5], fixF[6]); if (hour !~ /^[0-2][0-9][:][0-5][0-9][:][0-6][0-9]$/) { data_error(("invalid {hour} \"" hour "\"")); } fbal = get_tagged_field("B", fixF[7], fixF[8]); if (fbal !~ /^[-+]?[0-9]+([.][0-9]*)?$/) { data_error(("invalid {fbal} \"" fbal "\"")); } txid = get_tagged_field("T", fixF[9], fixF[10]); if (txid !~ /^[0-9a-f]+?$/) {data_error(("invalid {txid} \"" txid "\"")); } if (length(txid) != 64) {data_error(("invalid {txid} length = " length(txid))); } # Split the input/output list, save in {wall[0..nio-1],vbtc[0..nio-1]}, compute total {vtot}: # printf "%s\n", ios > "/dev/stderr"; nio = 0; # Number of inputs or outputs. split("", wall); # Wallet of input or output, indexed {0..nio-1}. split("", vbtc); # Amount of input or output, indexed {0..nio-1}. gsub(/[{} ]/, "", ios); # Remove spaces and braces. gsub(/^[@]/, "", ios); # Remove leading "@" iosN = split(ios, iosF, /[@=]/); if (iosN == 0) { # Internal rearrangement transaction. kind = "I"; vbtc[0] = 0; wall[0] = wallet; # !!! Should be argument. nio = 1; } else { if ((iosN % 4) != 0) { data_error(("wrong input/output field count = " iosN " \"" ios "\"")); } vtot = 0.00000000; vsgn = (kind == "R" ? +1.0 : -1.0); # Expected sign of amounts transferred. for (k = 1; k < iosN; k += 4) { # Get the next input/output, write table row: # For receive the wallet comes first, for send it comes second: kwall = (kind == "R" ? k : k+2); kvbtc = (kind == "R" ? k+2 : k); wall[nio] = get_tagged_field("W", iosF[kwall], iosF[kwall+1]); if (wall[nio] !~ /^[-A-Za-z0-9.]+$/) { data_error(("invalid wallet \"" wall[nio] "\"")); } vbtc[nio] = get_tagged_field("V", iosF[kvbtc], iosF[kvbtc+1]); if (vbtc[nio] !~ /^[-+]?[0-9]+([.][0-9]*)?$/) { data_error(("invalid input/output amount \"" vbtc[nio] "\"")); } if (vsgn*vbtc[nio] < 0) { data_error(("input/output amount " vbtc[nio] " has wrong sign for {kind} \"" kind "\"")); } if (vbtc[nio] == 0) { data_warning(("input/output amount " vbtc[nio] " is zero")); } vtot += vbtc[nio]; nio++; } } # Write one table row for each input or output: pbal = fbal; for (k = nio-1; k >= 0; k--) { # printf "pbal = %s\n", pbal > "/dev/stderr"; printf "%s %s", date, hour; printf " | %s | %5d", txid, k; printf " | %s | %s", (vbtc[k] == 0 ? "00.00000000" : sprintf("%+.8f", vbtc[k])), wall[k]; printf " | %.8f\n", pbal; pbal -= vbtc[k]; # Update global max and total amounts: if (kind == "R") { nout_R++; vbtc_tot_R += vbtc[k]; if (vbtc[k] > vbtc_max_R) { vbtc_max_R = vbtc[k]; } } else if (kind == "S") { nout_S++; vbtc_tot_S += vbtc[k]; if (vbtc[k] < vbtc_max_S) { vbtc_max_S = vbtc[k]; } } else if (kind == "I") { nout_I++; } else { prog_error(("invalid kind \"" kind "\"")); } # Update global output line count: nout++; } check_balance(pbal, fbal - vtot); } function check_balance(b1,b2, x1, x2) { # Compares the listed balance {b1} to expected balance {b2}, to 8 decimals: x1 = sprintf("%.8f", b1); x2 = sprintf("%.8f", b2); if (x1 != x2) { prog_error(("inconsistent balance -- " x1 " should be " x2)); } } function get_tagged_field(tag, f1, f2) { # Checks whether {f1 == tag}, returns {f2} if true. if (f1 != tag) { data_error(("expected \"" tag "\", got \"" f1 "\"")); } return f2; } function data_error(msg) { printf "%s:%s: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " %s\n", $0 > "/dev/stderr"; abort = 1; exit(abort); } function arg_error(msg) { printf "** %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); } function prog_error(msg) { printf "** PROGRAM ERROR: %s\n", msg > "/dev/stderr"; abort = 1; exit(abort); }