#! /usr/bin/gawk -f
# Last edited on 2004-02-03 07:00:04 by stolfi
BEGIN {
}
/[#][ ][=][=][=]/ { print; next; }
/^ *$/ { next; }
// {
gsub(/
/, " ");
gsub(/[&]nbsp;/, " ");
gsub(/^[ ]+/, "");
gsub(/[ ]+$/, "");
gsub(/[ ][ ]+/, " ");
}
/^<[\/]*HTML>$/ {
next;
}
/^
Tanakh [(]Hebrew Bible[)]:([^<>]*)[:]([^<>]*) *([0-9]+)<[\/]TITLE>$/ {
lin = $0;
lin = gensub(/Tanakh [(]Hebrew Bible[)]:([^<>]*)[:]([^<>]*) *([0-9]+)<[\/]TITLE> */, \
"@chapter{\\1}{\\2}{\\3}", "g", lin);
print lin;
next;
}
/^$/ {
next;
}
/^<[\/]HEAD>$/ {
next;
}
/^<[\/]*BODY>$/ {
next;
}
/^<[\/]*CENTER>$/ {
next;
}
/^
<[\/]A> <[\/]CENTER>$/ {
next;
}
/^Sacred-texts<[\/]A>$/ {
next;
}
/^Bible<[\/]A>$/ {
next;
}
/^Tanakh [(]Hebrew Bible[)] Index<[\/]A>$/ {
next;
}
/^[^<>]*[:][^<>]*<[\/]A>$/ {
lin = $0;
lin = gensub(/([^<>]*)[:]([^<>]*)<[\/]A> */, \
"@bibbook{\\1}{\\2}{\\3}", "g", lin);
print lin;
next;
}
/^Next[^<>]*<[\/]A>$/ {
next;
}
/^Prev[^<>]*<[\/]A>$/ {
next;
}
/^KJV<[\/]A>$/ {
next;
}
/^JPS<[\/]A>$/ {
next;
}
/^Hebrew<[\/]B> *Latin<[\/]A>$/ {
next;
}
/^
[^<>]*[:][^<>]*<[\/]H1>$/ {
lin = $0;
lin = gensub(/
([^<>]*)[:]([^<>]*)<[\/]H1> */, \
"@bibbook{???}{\\1}{\\2}", "g", lin);
print lin;
next;
}
/^Chapter [0-9]+<[\/]H3>$/ {
lin = $0;
lin = gensub(/Chapter ([0-9]+)<[\/]H3> */, \
"@chapter{???}{???}{\\1}", "g", lin);
print lin;
next;
}
/^
$/ {
next;
}
// {
print
next;
}