#! /usr/bin/python3
# Last edited on 2025-11-20 18:07:18 by stolfi

# Functions to generate HTML reports for specific pages and clips.

import os, sys, re
import html_gen as h
from html_gen import err
from process_funcs import bash
from datetime import datetime, timezone

def basic_figure(st, img_url, caption, link_url:str|None = None) -> None:
  # Adds to {st} a figure with the {img_url} and {caption}.
  # The {img_url} is converted to a thumbnail tag that links to {link_url}, or to the full image
  # if {link_url} is {None}.
  # The{caption} is filtered through {protect_html} and {simple_markup}
  # and turned into one or more paragraphs with 80% width.
  #
  max_width = st['text_width']
  max_height = 780
  if link_url == None: link_url = img_url
  img_html = h.make_link(st, link_url, None, img_url, max_width, max_height)
  
  caption_html = h.protect_html(caption)
  caption_html = h.simple_markup(caption_html)
  caption_parags = h.make_parags(caption_html, align = "left", width = "80%")
  h.figure(st, img_html, caption_parags, centered = True)
  return 
  # ----------------------------------------------------------------------

def html_subdoc_link(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str|None = None) -> str:
  # Returns an HTML fragment that is a link to the subsidiary HTML file
  # "{sub_dir}/{sub_name}.html"
  #
  # If {thumb_img} is not {None} and the image file
  # "{sub_dir}/{thumb_img}" exists the link's appearance is a thumbnail
  # of the image "{sub_dir}/{thumb_img}", with the {link_text} as
  # caption if not {None}.
  #
  # If {thumb_img} is {None}, looks for obvious image files in folder
  # "{sub_dir}"; if it can find one, proceeds as above.
  #
  # If it can't find an image for thumbnail, and {link_text} is not
  # {None}, the appearance is just {link_text}. Otherwise the appearance
  # is the string {sub_dir} itself.
  #
  # Assumes that {link_text} is HTML-safe.
  #
  # The text_width of {st} applies to that subdoc and also to the max width of the thumbnail.
  # Then returns an HTML fragment with a link to that subsidiary page image. 
  
  err(f"!! enter html_subdoc_link {sub_dir = } {sub_name = } {thumb_img = } {link_text = }\n")
  
  assert not re.search(r"[?][?]", sub_dir),   "invalid subdoc folder"
  if thumb_img != None: assert not re.search(r"[?][?]", thumb_img), "invalid thumb image"
  if link_text != None: assert not re.search(r"[?][?]", link_text), "invalid link text"
  
  html_file = f"{sub_dir}/{sub_name}.html"
  
  make_subdocs = False # For now.
  if make_subdocs:
    # Generate the subdoc form its source:
    # File names relative to the current folder:
    src_file = f"{sub_dir}/{sub_name}-src.py"
    err(f"executing {src_file}\n")
    assert os.path.exists(src_file), f"file {src_file} does not exist"
    assert os.access(src_file, os.X_OK), f"file {src_file} is not executable"
    setpath = "export PYTHONPATH=\"${HOME}/lib:..:../..:../../..:../../../..:${PYTHONPATH}\""
    bash(f"{setpath}; ( cd {sub_dir} && {sub_name}-src.py > {sub_name}.html )")
    
  if not os.path.exists(html_file):
    err(f"!! warning: page {html_file} does not exist\n")
  
  if thumb_img != None:
    thumb_file = f"{sub_dir}/{thumb_img}"
    if not os.path.exists(thumb_file): 
      err(f"!! warning: thumbnail {thumb_file} does not exist")
      thumb_file = None
  else:
    thumb_file = f"{sub_dir}/thumb.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/annotated.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/clip.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/page.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/book.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/raw.png"
    if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/img.png"
    if not os.path.exists(thumb_file): 
      thumb_file = None
    else:
      err(f"!! using {thumb_file} as thumbnail\n")
  
  thumb_size = 0 if thumb_file == None else 64
  if thumb_file == None and link_text == None: link_text = html_file
  link_to_sub = h.make_link(st, html_file, link_text, thumb_file, thumb_size, thumb_size)
  return link_to_sub
  # ----------------------------------------------------------------------
  
def html_subdoc_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str = None) -> None:
  # Appends to {st} a parag that consists of a link to the subsidiary
  # page "{sub_dir}/{sub_name}.html" . Uses the image
  # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's
  # text if not {None}. See {html_subdoc_link} for defaults. Assumes
  # that {link_text} is HTML-safe.
  # 
  link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text)
  h.parags(st, link_to_sub, markup = False, protect = False)
  return
  # ----------------------------------------------------------------------
  
def image_link_parag(st:dict, img_url:str, img_size:int, link_text:str = None) -> None:
  # Appends to {st} a parag that consists of a link to "{img_url}", assumed to be
  # the URL of an image file.
  # 
  # If {img_size} is positive, the link appearance is a thumbnail of 
  # the image with the specified max width and height, with the {link_text}
  # as caption underneath if not {None}. If {img_size}is zero, the appearance of the
  # link is the string {link_text}, which must not be {None}.
  #
  # The string {link_text} is fltered with {h.protect} and {h.simple_markup}.
  
  if link_text != None:
    link_text = h.protect_html(link_text)
    link_text = h.simple_markup(link_text)
  link_url = img_url
  link_html = h.make_link(st, img_url, link_text, img_url, img_size, img_size)
  h.parags(st, link_html, vspace = None, markup = False, protect = False)
  return 
  # ----------------------------------------------------------------------
  
def links_section(st:dict) -> None:
  # Appends to {st} a section titled "Links" with a list of all images in the
  # current folder and all images and HTML pages in immediate subfolders.
  
  targets = get_images_in_dir(".") + get_pages_and_images_in_sub_dirs(".")
  h.section(st, 2, "Links")
  links_enum(st, targets)
  return 
  # ----------------------------------------------------------------------

def links_enum(st:dict, targets:list[str]) -> None:
  # Appends to {st} an enum list of links with given targets.
  # Namely, for each {target} in {targets}, appends to {st} an enum parag that
  # consists of a link to that {target}. 
  # The form of the link will depend on whether {target} is 
  # an image file or an HTML file.
  # 
  h.begin_enum(st, "ul")
  for target in targets: 
    m = re.fullmatch(r"(.*)/([^/]*)[.]html", f"./{target}")
    link_text = re.sub(r"^([.]/)*", "", target)
    if m:
      sub_dir = re.sub(r"^([.]/)*", "", m.group(1))
      sub_name = m.group(2)
      link_html = html_subdoc_link(st, sub_dir, sub_name, thumb_img = None, link_text = link_text)
    else:
      img_size = 64
      link_html = h.make_link(st, target, link_text, target, img_size, img_size)
    h.enum_item(st, link_html)
  h.end_enum(st, "ul")
  return
  # ----------------------------------------------------------------------
 
def enum_item_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None, link_text:str = None) -> None:
  # Appends to {st} an enum parag that consists of a link to the
  # subsidiary page "{sub_dir}/{sub_name}..html". Uses the image
  # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's
  # text if not {None}. See {html_subdoc_link} for defaults. Assumes
  # that {link_text} is HTML-safe.
  # 
  link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text)
  h.enum_item_parags(st, link_to_sub, markup = False, protect = False)
  return
  # --------------------------------------------------------------------
  
def get_pages_and_images_in_sub_dirs(dir:str) -> list[str]:
  # Scans all immediate subfolders of folder "{dir}" (except "JUNK" and "SAVE")
  # for files called "*.html" or "*-src.py", or image files.
  #
  # For very sub-folder {sub_dir} and every {name} such that 
  # either "{dir}/{sub_dir}/{name}-src.py" or "{sub_dir}/{name}.html" exist,
  # the result will have "{dir}/{sub_dir}/{name}.html".  If at leas one such file
  # is found, ignores all other all image files in the subfolder.  
  #
  # If a subfolder has neither a "*-src.py" or "*.html", the result
  # will include the names of every image file in that subfolder.
  # 
  # Get list {src_html_img_files} of potential files of intereste (with full path including {dir}):
  bash(f"( ls -d ./{dir}/*/{{*-src.py,*.html,*.png,*.jpg,annotate.sh}} 2> /dev/null ; cat /dev/null ) > .shifiles")
  src_html_img_files = h.get_text_from_file(".shifiles").split()
  bash("rm -f .shifiles")
  # Get the set of subfolders where those files reside, without duplicates:
  subdirs = sorted(list(set(map(lambda x: re.sub(r"/[^/]*$", "", x), src_html_img_files))))
  subdirs = [ x for x in subdirs if not re.match(r"\bwork/", x) ]
  items = list()
  for subdir in subdirs:
    subdir = re.sub(r"^([.]/)*", "", subdir)
    if subdir != "" and not re.search(r"JUNK|SAVE", subdir):
      htmls = get_pages_in_dir(subdir)
      if len(htmls) != 0:
        items += htmls
      else:
        images = get_images_in_dir(subdir)
        items += images
  return items
  # ----------------------------------------------------------------------

def get_pages_in_dir(dir:str) -> list[str]:
  # Scans the folder "{dir}" for files called "*.html" or "*-src.py".
  #
  # For every {name} such that 
  # either "{dir}/{name}-src.py" or "{dir}/{name}.html" exist,
  # the result will have "{dir}/{name}.html". 
  #
  assert os.path.exists(dir), f"folder {dir} does not exist"
  bash(f"( ls {dir}/{{*-src.py,*.html}} 2> /dev/null ; cat /dev/null ) > .htfiles")
  src_html_files = h.get_text_from_file(".htfiles").split()
  bash("rm -f .htfiles")
  hnames = sorted(list(set(map(lambda x: re.sub(r"(-src[.]py|[.]html)*$", "", x), src_html_files))))
  hnames = [ x for x in hnames if not re.match(r"\bwork/", x) ]
  hfiles = list()
  for hname in hnames:
    hfiles.append(f"{hname}.html")
  return hfiles
  # ----------------------------------------------------------------------

def get_images_in_dir(dir:str) -> list[str]:
  # Scans the folder "{dir}" for files called "*.png" or "*.jpg"
  # or "annotate.sh". Returns the full names (including {dir}) of those files.
  #
  # However, if it finds "{dir}/annotate.sh", returns instead "{dir}/annotated.png"
  # 
  has_html = False
  bash(f"( ls {dir}/{{*.png,*.jpg,annotate.sh}} 2> /dev/null ; cat /dev/null ) > .ifiles")
  img_files = h.get_text_from_file(".ifiles").split()
  bash("rm -f .ifiles")
  images = sorted(list(set(map(lambda x: re.sub(r"/annotate[.]sh*$", "/annotated.png", x), img_files))))
  images = [ x for x in images if not re.match(r"\bwork/", x) ]
  return list(images)
  # ----------------------------------------------------------------------