#! /bin/bash

# Cleans the HTML files produced by the OneTab Chrome extension.

# Reada "${name}.html" and the favicons in the directory "${name}_files/".
# Writes to standard output.

name="$1"; shift;  # Name of webpage and associated 
title="$1"; shift;

name="${name/.html/}" # Just in case.

hfile="${name}.html"
if [[ ! ( -s ${hfile} ) ]]; then echo "** file \"${hfile}\" not found" 1>&2 ; exit 1; fi

hdir="${name}_files"
if [[ ! ( -d ${hdir} ) ]]; then echo "** \"${hdir}\" not found or not a directory" 1>&2 ; exit 1; fi

tmp="/tmp/$$" # Temp file prefix.

printf "<!DOCTYPE html>\n"
printf "<html>\n"
printf "<head>\n"
printf "  <meta charset=\"UTF-8\"/>\n"
printf "  <title>%s</title>\n" "${title}"
printf "  <style>\n"

# Convert favicons to base64("*.b64") files. The "Save as" option of
# Chromium writes the favicons as files "favicon", "favicon(1)",
# "favicon(2)", ... instead of "favicon/XXX.png". We name the base64
# files "${tmp}_favicon_0.b64", "${tmp}_favicon_1.b64", etc. To avoid
# repeating the data for every instance, we embed the icons as CSS class
# styles. We also create a file "${tmp}.sed" with the {sed} commands to
# replace the favicons <img...> elements by invocations of the
# corresponding CSS styles.

# Inline image: <img src="data:image/png;base64,%s">

icons=( `cd ${hdir} && ls -d favicon*` ) 
sfile="${tmp}.sed"  # Image inlining sed script.
rm -f ${sfile}
n=0
for ff in "${icons[@]}" ; do
  echo "converting ${ff}" 1>&2
  ifile="${hdir}/${ff}"
  pfile="${tmp}_favicon_${n}.png"
  ofile="${tmp}_favicon_${n}.b64"
  convert "${ifile}" \
      -resize '16x16' \
      -colors 256 \
    ${pfile}
  base64 -w 0 ${pfile} \
    > ${ofile}
  data="`cat ${ofile}`"
  printf "   .icon_${n}::before { content: url(data:image/png;base64,%s); }\n" "${data}"
  printf "s@<img src=\"[.]/[^\"<>]*/${ff}\"[^<>]*>@<span class=\"icon_${n}\"></span>@g\n" >> ${sfile}
  n=$(( n + 1 ))
done
ls -l ${tmp}_favicon*.b64 1>&2
cat ${sfile} 1>&2
ls -l ${tmp}_favicon*.png 1>&2

printf "  </style>\n"
printf "</head>\n"
printf "<body>\n"
printf "<h1>%s</h1>\n" "${title}"
printf "<ul>\n" 

# Copy each line from the file and append it to the end of the img line. Do not leave any space or create new lines. Although the Base64 encoded text in mypicture.txt are separated into many lines, join all of them into a single continuous line with no embedded spaces.

afile="${tmp}_afile.html"
cat ${hfile} \
  | recode windows-1252..UTF-8 \
  | sed \
      -e "s@<div style=\"word-break@\n<<<BREAK>>>\n<div style=\"word-break@g" \
      -e "s@</body>@\n</ul>\n</body>@g" \
      -e "s@</html>@\n</html>\n@g" \
  > ${afile}
  
cat ${afile} \
  | sed \
      -e '1,/<<<BREAK>>>/d' \
      -e '/<<<BREAK>>>/d' \
      -e 's@[&]nbsp;@@g' \
      -e 's@[ ]*>[ ][ ]*@>@g' \
      -e 's@[ ][ ]*<[ ]*@<@g' \
      -e ':LOOP1' \
      -e 's@<div[^<>]*>[0-9]* *tabs</div>@@g' \
      -e 's@<div[^<>]*>Created[^<>]*</div>@@g' \
      -e 's@<div[^<>]*>Restore all</div>@@g' \
      -e 's@<div[^<>]*>Delete all</div>@@g' \
      -e 's@<div[^<>]*>Share as web page</div>@@g' \
      -e 's@<div[^<>]*>More[.]*</div>@@g' \
      -e 's@<div[^<>]*>Lock this tab group</div>@@g' \
      -e 's@<div[^<>]*>Star this tab group</div>@@g' \
      -e 's@<div[^<>]*>Name this tab group</div>@@g' \
      -e 's@<div[^<>]*>Help</div>@@g' \
      -e 's@<div[^<>]*></div>@@g' \
      -e 's@<div[^<>]*>$@@g' \
      -e 'tLOOP1' \
      -e 's@<div style="word-break[^"<>]*">@<div>@g' \
      -e 's@position: absolute; top: [0-9]*px; left: [0-9]*px; @@g' \
      -e 's@ cursor: move;@@g' \
      -e 's@ class="clickable"@@g' \
      -e 's@ style="padding-right: [0-9]*px; text-decoration: none;"@@g' \
      -e 's@<img src="chrome-extension:[^<>]*>@@g' \
      -e 's@<div style="margin-left: [0-9]*px;"></div>@@g' \
      -e 's@</div><div>@@g' \
      -e 's@<div class="tabGroupTitleText"[^<>]*></div>@@g' \
      -e 's@<div id="ext"></div>@@g' \
      -e 's@[&]nbsp;</a>@</a>@g' \
      -e 's@<div[^<>]*></div>@@g' \
      -e ':LOOP2' \
      -e 's@</div></div>@@g' \
      -e 'tLOOP2' \
      -e 's@<a @ <a @g' \
      -e 's@^[ ]*$@@g' \
      -e 's@<div>@  <li>@g' \
      -e '/<li>/s@$@</li>@g' \
  | cat -s \
  | sed -f ${sfile}

display -title '%f' -resize '1600%' -filter Point ${tmp}_favicon*.png 1>&2
rm -fv ${tmp}_favicon* ${sfile} 1>&2
# rm -fv ${afile} 1>&2
