#! /usr/bin/python3 # Last edited on 2025-11-20 18:07:18 by stolfi # Functions to generate HTML reports for specific pages and clips. import os, sys, re import html_gen as h from html_gen import err from process_funcs import bash from datetime import datetime, timezone def basic_figure(st, img_url, caption, link_url:str|None = None) -> None: # Adds to {st} a figure with the {img_url} and {caption}. # The {img_url} is converted to a thumbnail tag that links to {link_url}, or to the full image # if {link_url} is {None}. # The{caption} is filtered through {protect_html} and {simple_markup} # and turned into one or more paragraphs with 80% width. # max_width = st['text_width'] max_height = 780 if link_url == None: link_url = img_url img_html = h.make_link(st, link_url, None, img_url, max_width, max_height) caption_html = h.protect_html(caption) caption_html = h.simple_markup(caption_html) caption_parags = h.make_parags(caption_html, align = "left", width = "80%") h.figure(st, img_html, caption_parags, centered = True) return # ---------------------------------------------------------------------- def html_subdoc_link(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str|None = None) -> str: # Returns an HTML fragment that is a link to the subsidiary HTML file # "{sub_dir}/{sub_name}.html" # # If {thumb_img} is not {None} and the image file # "{sub_dir}/{thumb_img}" exists the link's appearance is a thumbnail # of the image "{sub_dir}/{thumb_img}", with the {link_text} as # caption if not {None}. # # If {thumb_img} is {None}, looks for obvious image files in folder # "{sub_dir}"; if it can find one, proceeds as above. # # If it can't find an image for thumbnail, and {link_text} is not # {None}, the appearance is just {link_text}. Otherwise the appearance # is the string {sub_dir} itself. # # Assumes that {link_text} is HTML-safe. # # The text_width of {st} applies to that subdoc and also to the max width of the thumbnail. # Then returns an HTML fragment with a link to that subsidiary page image. err(f"!! enter html_subdoc_link {sub_dir = } {sub_name = } {thumb_img = } {link_text = }\n") assert not re.search(r"[?][?]", sub_dir), "invalid subdoc folder" if thumb_img != None: assert not re.search(r"[?][?]", thumb_img), "invalid thumb image" if link_text != None: assert not re.search(r"[?][?]", link_text), "invalid link text" html_file = f"{sub_dir}/{sub_name}.html" make_subdocs = False # For now. if make_subdocs: # Generate the subdoc form its source: # File names relative to the current folder: src_file = f"{sub_dir}/{sub_name}-src.py" err(f"executing {src_file}\n") assert os.path.exists(src_file), f"file {src_file} does not exist" assert os.access(src_file, os.X_OK), f"file {src_file} is not executable" setpath = "export PYTHONPATH=\"${HOME}/lib:..:../..:../../..:../../../..:${PYTHONPATH}\"" bash(f"{setpath}; ( cd {sub_dir} && {sub_name}-src.py > {sub_name}.html )") if not os.path.exists(html_file): err(f"!! warning: page {html_file} does not exist\n") if thumb_img != None: thumb_file = f"{sub_dir}/{thumb_img}" if not os.path.exists(thumb_file): err(f"!! warning: thumbnail {thumb_file} does not exist") thumb_file = None else: thumb_file = f"{sub_dir}/thumb.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/annotated.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/clip.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/page.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/book.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/raw.png" if not os.path.exists(thumb_file): thumb_file = f"{sub_dir}/img.png" if not os.path.exists(thumb_file): thumb_file = None else: err(f"!! using {thumb_file} as thumbnail\n") thumb_size = 0 if thumb_file == None else 64 if thumb_file == None and link_text == None: link_text = html_file link_to_sub = h.make_link(st, html_file, link_text, thumb_file, thumb_size, thumb_size) return link_to_sub # ---------------------------------------------------------------------- def html_subdoc_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None = None, link_text:str = None) -> None: # Appends to {st} a parag that consists of a link to the subsidiary # page "{sub_dir}/{sub_name}.html" . Uses the image # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's # text if not {None}. See {html_subdoc_link} for defaults. Assumes # that {link_text} is HTML-safe. # link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text) h.parags(st, link_to_sub, markup = False, protect = False) return # ---------------------------------------------------------------------- def image_link_parag(st:dict, img_url:str, img_size:int, link_text:str = None) -> None: # Appends to {st} a parag that consists of a link to "{img_url}", assumed to be # the URL of an image file. # # If {img_size} is positive, the link appearance is a thumbnail of # the image with the specified max width and height, with the {link_text} # as caption underneath if not {None}. If {img_size}is zero, the appearance of the # link is the string {link_text}, which must not be {None}. # # The string {link_text} is fltered with {h.protect} and {h.simple_markup}. if link_text != None: link_text = h.protect_html(link_text) link_text = h.simple_markup(link_text) link_url = img_url link_html = h.make_link(st, img_url, link_text, img_url, img_size, img_size) h.parags(st, link_html, vspace = None, markup = False, protect = False) return # ---------------------------------------------------------------------- def links_section(st:dict) -> None: # Appends to {st} a section titled "Links" with a list of all images in the # current folder and all images and HTML pages in immediate subfolders. targets = get_images_in_dir(".") + get_pages_and_images_in_sub_dirs(".") h.section(st, 2, "Links") links_enum(st, targets) return # ---------------------------------------------------------------------- def links_enum(st:dict, targets:list[str]) -> None: # Appends to {st} an enum list of links with given targets. # Namely, for each {target} in {targets}, appends to {st} an enum parag that # consists of a link to that {target}. # The form of the link will depend on whether {target} is # an image file or an HTML file. # h.begin_enum(st, "ul") for target in targets: m = re.fullmatch(r"(.*)/([^/]*)[.]html", f"./{target}") link_text = re.sub(r"^([.]/)*", "", target) if m: sub_dir = re.sub(r"^([.]/)*", "", m.group(1)) sub_name = m.group(2) link_html = html_subdoc_link(st, sub_dir, sub_name, thumb_img = None, link_text = link_text) else: img_size = 64 link_html = h.make_link(st, target, link_text, target, img_size, img_size) h.enum_item(st, link_html) h.end_enum(st, "ul") return # ---------------------------------------------------------------------- def enum_item_link_parag(st:dict, sub_dir:str, sub_name:str, thumb_img:str|None, link_text:str = None) -> None: # Appends to {st} an enum parag that consists of a link to the # subsidiary page "{sub_dir}/{sub_name}..html". Uses the image # "{sub_dir}/{thumb_img}" as thumbnail and {link_text} as the link's # text if not {None}. See {html_subdoc_link} for defaults. Assumes # that {link_text} is HTML-safe. # link_to_sub = html_subdoc_link(st, sub_dir, sub_name, thumb_img = thumb_img, link_text = link_text) h.enum_item_parags(st, link_to_sub, markup = False, protect = False) return # -------------------------------------------------------------------- def get_pages_and_images_in_sub_dirs(dir:str) -> list[str]: # Scans all immediate subfolders of folder "{dir}" (except "JUNK" and "SAVE") # for files called "*.html" or "*-src.py", or image files. # # For very sub-folder {sub_dir} and every {name} such that # either "{dir}/{sub_dir}/{name}-src.py" or "{sub_dir}/{name}.html" exist, # the result will have "{dir}/{sub_dir}/{name}.html". If at leas one such file # is found, ignores all other all image files in the subfolder. # # If a subfolder has neither a "*-src.py" or "*.html", the result # will include the names of every image file in that subfolder. # # Get list {src_html_img_files} of potential files of intereste (with full path including {dir}): bash(f"( ls -d ./{dir}/*/{{*-src.py,*.html,*.png,*.jpg,annotate.sh}} 2> /dev/null ; cat /dev/null ) > .shifiles") src_html_img_files = h.get_text_from_file(".shifiles").split() bash("rm -f .shifiles") # Get the set of subfolders where those files reside, without duplicates: subdirs = sorted(list(set(map(lambda x: re.sub(r"/[^/]*$", "", x), src_html_img_files)))) subdirs = [ x for x in subdirs if not re.match(r"\bwork/", x) ] items = list() for subdir in subdirs: subdir = re.sub(r"^([.]/)*", "", subdir) if subdir != "" and not re.search(r"JUNK|SAVE", subdir): htmls = get_pages_in_dir(subdir) if len(htmls) != 0: items += htmls else: images = get_images_in_dir(subdir) items += images return items # ---------------------------------------------------------------------- def get_pages_in_dir(dir:str) -> list[str]: # Scans the folder "{dir}" for files called "*.html" or "*-src.py". # # For every {name} such that # either "{dir}/{name}-src.py" or "{dir}/{name}.html" exist, # the result will have "{dir}/{name}.html". # assert os.path.exists(dir), f"folder {dir} does not exist" bash(f"( ls {dir}/{{*-src.py,*.html}} 2> /dev/null ; cat /dev/null ) > .htfiles") src_html_files = h.get_text_from_file(".htfiles").split() bash("rm -f .htfiles") hnames = sorted(list(set(map(lambda x: re.sub(r"(-src[.]py|[.]html)*$", "", x), src_html_files)))) hnames = [ x for x in hnames if not re.match(r"\bwork/", x) ] hfiles = list() for hname in hnames: hfiles.append(f"{hname}.html") return hfiles # ---------------------------------------------------------------------- def get_images_in_dir(dir:str) -> list[str]: # Scans the folder "{dir}" for files called "*.png" or "*.jpg" # or "annotate.sh". Returns the full names (including {dir}) of those files. # # However, if it finds "{dir}/annotate.sh", returns instead "{dir}/annotated.png" # has_html = False bash(f"( ls {dir}/{{*.png,*.jpg,annotate.sh}} 2> /dev/null ; cat /dev/null ) > .ifiles") img_files = h.get_text_from_file(".ifiles").split() bash("rm -f .ifiles") images = sorted(list(set(map(lambda x: re.sub(r"/annotate[.]sh*$", "/annotated.png", x), img_files)))) images = [ x for x in images if not re.match(r"\bwork/", x) ] return list(images) # ----------------------------------------------------------------------