#! /usr/bin/gawk -f
# Reads list of links from stdin and converts to an HTML page on stdoutb
# Each data line of stdin has a link to a paper or other file, then " £ ", or " @@ ",
# then the visible anchor text for linked thing.
# A line "=== ${subtit}" separates subsections.
# A line "!!!! {title}" must come before all data and subsection lines, and defines
# the title to use in the HTML
and tags.
# Comments are assumed to start with " #" or "#" in column 1, extend to end of line.
# A "#" in other contexts is treated as any other nonblank character.
BEGIN{
page_title = ""
section_title = ""
curr_time = strftime("%Y-%m-%d %H:%M:%S %Z", systime(), 1)
FS = "@@"
state = 0 # 0 = no page header yet, 1 = outside , 2 = within "".
}
/[\#] *Last edited on/ {
# Save last edited line for footer:
lastedit = $0;
gsub(/^.*[\#] Last/, "last", lastedit)
}
// { gsub(/(^|[ ]+)[\#].*$/, "", $0) }
/^[ ]*$/ { next }
# Compatibility with old separator character:
// {
gsub(/ [£] /, " @@ ", $0) # If input is in iso-latin-1 encoding.
gsub(/ [\302][\243] /, " @@ ", $0) # If input is in utf-8 encoding.
}
/^[ ]*[!][!][!][!]*/ {
page_title = $0;
gsub(/^[ ]*[!]*[ ]*/, "", page_title);
gsub(/[ ]*$/, "", page_title)
next
}
/^[ ]*[=][=][=][=]*/ {
section_title = $0;
gsub(/^[ ]*[=]*[ ]*/, "", section_title);
gsub(/[ ]*$/, "", section_title)
if (section_title == "") { section_title = "References"; }
start_section(section_title)
next
}
/[@][@]/ {
# Link with anchor:
if (NF != 2) { error(("bad NF = " NF)); }
link = cleanup($1)
anchor = cleanup($2)
write_link(link, anchor)
next
}
/^[^@]*(http[s]?|file)[:]/ {
# Link without anchor:
link = cleanup($0);
anchor = "";
write_link(link, anchor)
next
}
// {
error(("bad format"));
}
END {
if (state == 0) { start_page(page_title) }
finish_page()
}
function write_link(link, anchor) {
if (state == 0) { start_page(page_title) }
if (state == 1) { start_list() }
# Now state should be 2.
if (anchor == "") { anchor = link }
gsub(/^http[s]?[:]en[.]wikipedia[.]org[\/]wiki[\/]/, "", anchor);
printf " - %s
\n", link, anchor
}
function start_list() {
printf "\n"
state = 2
}
function finish_list() {
printf "
\n"
state = 1
}
function start_section(subtit) {
if (state == 0) { start_page(page_title) }
if (state == 2) { finish_list() }
printf "%s
\n", subtit
# Now {state} should be 2.
}
function start_page(title) {
if (state != 0) { error("prog error 1") }
printf "\n"
printf "\n"
printf "\n"
printf "\n"
printf "%s: Tabs\n", title
printf "\n"
printf "\n"
printf "%s
\n", title
state = 1
}
function finish_page() {
if (state == 2) { finish_list() }
# Now state should be 1.
printf "\n"
printf "
\n"
printf "\n"
printf "Source: %s %s
\n", FILENAME, lastedit
printf "Converted by convert_links_to_html.gawk on %s
\n", curr_time
printf "DO NOT EDIT BY HAND - EDITS WILL BE LOST\n"
printf "
\n"
printf "\n"
printf "\n"
}
function error(msg) {
printf "** %s - aborted\n", msg > "/dev/stderr"
printf "line = [%s]\n", $0 > "/dev/stderr"
exit(1)
}
function cleanup(x) {
# Removes leading and trailing blanks.
gsub(/^[ ]+/, "", x)
gsub(/[ ]+$/, "", x)
return x
}