#! /usr/bin/gawk -f # Reads list of links from stdin and converts to an HTML page on stdoutb # Each data line of stdin has a link to a paper or other file, then " £ ", or " @@ ", # then the visible anchor text for linked thing. # A line "=== ${subtit}" separates subsections. # A line "!!!! {title}" must come before all data and subsection lines, and defines # the title to use in the HTML and <h1> tags. # Comments are assumed to start with " #" or "#" in column 1, extend to end of line. # A "#" in other contexts is treated as any other nonblank character. BEGIN{ page_title = "" section_title = "" curr_time = strftime("%Y-%m-%d %H:%M:%S %Z", systime(), 1) FS = "@@" state = 0 # 0 = no page header yet, 1 = outside <ul>, 2 = within "<ul>". } /[\#] *Last edited on/ { # Save last edited line for footer: lastedit = $0; gsub(/^.*[\#] Last/, "last", lastedit) } // { gsub(/(^|[ ]+)[\#].*$/, "", $0) } /^[ ]*$/ { next } # Compatibility with old separator character: // { gsub(/ [£] /, " @@ ", $0) # If input is in iso-latin-1 encoding. gsub(/ [\302][\243] /, " @@ ", $0) # If input is in utf-8 encoding. } /^[ ]*[!][!][!][!]*/ { page_title = $0; gsub(/^[ ]*[!]*[ ]*/, "", page_title); gsub(/[ ]*$/, "", page_title) next } /^[ ]*[=][=][=][=]*/ { section_title = $0; gsub(/^[ ]*[=]*[ ]*/, "", section_title); gsub(/[ ]*$/, "", section_title) if (section_title == "") { section_title = "References"; } start_section(section_title) next } /[@][@]/ { # Link with anchor: if (NF != 2) { error(("bad NF = " NF)); } link = cleanup($1) anchor = cleanup($2) write_link(link, anchor) next } /^[^@]*(http[s]?|file)[:]/ { # Link without anchor: link = cleanup($0); anchor = ""; write_link(link, anchor) next } // { error(("bad format")); } END { if (state == 0) { start_page(page_title) } finish_page() } function write_link(link, anchor) { if (state == 0) { start_page(page_title) } if (state == 1) { start_list() } # Now state should be 2. if (anchor == "") { anchor = link } gsub(/^http[s]?[:]en[.]wikipedia[.]org[\/]wiki[\/]/, "", anchor); printf " <li><a href=\"%s\">%s</a></li>\n", link, anchor } function start_list() { printf "<ul>\n" state = 2 } function finish_list() { printf "</ul>\n" state = 1 } function start_section(subtit) { if (state == 0) { start_page(page_title) } if (state == 2) { finish_list() } printf "<h2>%s</h1>\n", subtit # Now {state} should be 2. } function start_page(title) { if (state != 0) { error("prog error 1") } printf "<!DOCTYPE html>\n" printf "<html>\n" printf "<head>\n" printf "<meta charset=\"UTF-8\"/>\n" printf "<title>%s: Tabs\n", title printf "\n" printf "\n" printf "

%s

\n", title state = 1 } function finish_page() { if (state == 2) { finish_list() } # Now state should be 1. printf "\n" printf "
\n" printf "

\n" printf "Source: %s %s
\n", FILENAME, lastedit printf "Converted by convert_links_to_html.gawk on %s
\n", curr_time printf "DO NOT EDIT BY HAND - EDITS WILL BE LOST\n" printf "

\n" printf "\n" printf "\n" } function error(msg) { printf "** %s - aborted\n", msg > "/dev/stderr" printf "line = [%s]\n", $0 > "/dev/stderr" exit(1) } function cleanup(x) { # Removes leading and trailing blanks. gsub(/^[ ]+/, "", x) gsub(/[ ]+$/, "", x) return x }