#! /usr/bin/gawk -f # Last edited on 2004-02-03 07:00:04 by stolfi BEGIN { } /[#][ ][=][=][=]/ { print; next; } /^ *$/ { next; } // { gsub(/
/, " "); gsub(/[&]nbsp;/, " "); gsub(/^[ ]+/, ""); gsub(/[ ]+$/, ""); gsub(/[ ][ ]+/, " "); } /^<[\/]*HTML>$/ { next; } /^Tanakh [(]Hebrew Bible[)]:([^<>]*)[:]([^<>]*) *([0-9]+)<[\/]TITLE>$/ { lin = $0; lin = gensub(/<HEAD><TITLE>Tanakh [(]Hebrew Bible[)]:([^<>]*)[:]([^<>]*) *([0-9]+)<[\/]TITLE> */, \ "@chapter{\\1}{\\2}{\\3}", "g", lin); print lin; next; } /^<META HTTP-EQUIV="Content-Type" CONTENT="text[\/]html; charset=UTF-8">$/ { next; } /^<[\/]HEAD>$/ { next; } /^<[\/]*BODY>$/ { next; } /^<[\/]*CENTER>$/ { next; } /^<A HREF="[.][.][\/][.][.][\/]cdshop[\/]index[.]htm"><IMG SRC="[.][.][\/][.][.][\/]cdshop[\/]cdinfo[.]jpg" BORDER="0"><[\/]A> <[\/]CENTER>$/ { next; } /^<A HREF="[.][.][\/][.][.][\/]index[.]htm">Sacred-texts<[\/]A>$/ { next; } /^<A HREF="[.][.][\/]index[.]htm">Bible<[\/]A>$/ { next; } /^<A HREF="index[.]htm">Tanakh [(]Hebrew Bible[)] Index<[\/]A>$/ { next; } /^<A HREF="...[.]htm">[^<>]*[:][^<>]*<[\/]A>$/ { lin = $0; lin = gensub(/<A HREF="(...)[.]htm">([^<>]*)[:]([^<>]*)<[\/]A> */, \ "@bibbook{\\1}{\\2}{\\3}", "g", lin); print lin; next; } /^<A HREF="[^<>"]*[.]htm">Next[^<>]*<[\/]A>$/ { next; } /^<A HREF="[^<>"]*[.]htm">Prev[^<>]*<[\/]A>$/ { next; } /^<A HREF="[.][.][\/]kjv[\/][^<>"]*[.]htm">KJV<[\/]A>$/ { next; } /^<A HREF="[.][.][\/]jps[\/][^<>"]*[.]htm">JPS<[\/]A>$/ { next; } /^<B>Hebrew<[\/]B> *<A HREF="[.][.][\/]vul[\/][^<>"]*[.]htm">Latin<[\/]A>$/ { next; } /^<HR><H1 ALIGN="CENTER">[^<>]*[:][^<>]*<[\/]H1>$/ { lin = $0; lin = gensub(/<HR><H1 ALIGN="CENTER">([^<>]*)[:]([^<>]*)<[\/]H1> */, \ "@bibbook{???}{\\1}{\\2}", "g", lin); print lin; next; } /^<H3 ALIGN="CENTER">Chapter [0-9]+<[\/]H3>$/ { lin = $0; lin = gensub(/<H3 ALIGN="CENTER">Chapter ([0-9]+)<[\/]H3> */, \ "@chapter{???}{???}{\\1}", "g", lin); print lin; next; } /^<HR>$/ { next; } // { print next; }