#! /usr/bin/python3
# Functions that generate HTML for reports and such.
LastEdit = "Last edited on 2026-03-13 00:15:03 by stolfi"
import os, sys, re, subprocess
def new_doc(title:str, bg_color:str, text_width:int|None = None) -> dict:
# Returns a new html document object {st}, with the preamble and the
# level 1 section (title). The {bg_color} must be an HTML color, e.g.
# "#fefffd". The {text_width} must be the max page width in pixels.
#
# The {title} is used as is; the caller may want to call
# {protect_html} and/or {simple_markup} as needed before calling this function.
#
# The document object {st} is a dict with the following fields:
# 'text_width': width of text on page.
# 'indent': indentation level of the next things to be written to the HTML file.
# 'slevel": level of last section header.
# 'nsecs': number of sections added so far.
# 'pre': html-formatted preamble.
# 'toc': list of triplets {(snum,slevel,title)}
# 'body': list of html-formatted pieces of the body,
# properly indented, without final newlines.
if text_width == None: text_width = 1200
st = dict()
st['text_width'] = text_width
st['indent'] = 0
st['toc'] = []
st['body'] = []
st['preamble'] = make_preamble(title, bg_color, text_width)
st['top_header'] = make_section_header(title, snum = None, slevel = 1)
st['nsecs'] = 0
st['slevel'] = 1
return st
# ----------------------------------------------------------------------
def section(st:dict, slevel:int, title:str) -> None:
# Starts a new section of the document. The {title} is used as is.
# User should consider calling {protect_html} and {simple_markup}.
# The {level} must not exceed the level of the last {section} plus one.
# The indentation for the header and subsequent non-section items
# is the header)
#
assert isinstance(slevel, int) and slevel >= 2, f"invalid {slevel = }"
slevel_max = st['slevel'] + 1
assert slevel <= slevel_max, f"skipped levels {slevel_max}..{slevel-1}"
snum = st['nsecs'] + 1
indent = slevel
html_head = make_section_header(title, snum = snum, slevel = slevel)
st['body'].append(indent_lines(indent, html_head))
st['toc'].append((snum, slevel, no_breaks(title)))
st['indent'] = indent
st['slevel'] = slevel
st['nsecs'] = snum
return
# ----------------------------------------------------------------------
def parags(st:dict, txt:str, vspace:str = None, markup:bool = False, protect:bool = True, style:str|None = None) -> None:
# Converts a plain text {txt} into an HTML multiparag fragment
# and appends it to the document, with proper indentation.
# If {protect} is true, protects ">", "<", "&".
# If {markup} is true, also converts simple markup "*bold*", "/italic/"
# (after the protecting above).
if protect: txt = protect_html(txt)
if markup: txt = simple_markup(txt)
html_parags = make_parags(txt, vspace = vspace, style = style)
# Prefix the indentation blanks:
indent = st['indent']
html_txt = indent_lines(indent, html_parags)
st['body'].append(html_txt)
return
# ----------------------------------------------------------------------
def begin_enum(st:dict, tag:str) -> None:
# Appends to [st} a start-of-enumeration "<{tag}>" where {tag} is "ul" or "ol".
assert tag == "ul" or tag == "ol", f"bad {tag = }"
indent = st['indent']
st['body'].append(indent_lines(indent, f"<{tag}>"))
st['indent'] = indent + 1
return
# ----------------------------------------------------------------------
def enum_item(st:dict, html_item:str, vspace:str|None = None) -> None:
# Adds {item} to an enumerated list, surrounded bt "
" and "
".
# The {vspace} is the margin above and below.
# The {html_item} must be HTML-safe.
indent = st['indent']
html_item = make_enum_item(html_item, vspace)
st['body'].append(indent_lines(indent, html_item))
return
# ----------------------------------------------------------------------
def enum_item_parags(st:dict, txt:str, vspace:str = None, markup:bool = False, protect:bool = True) -> None:
# Same as {parags}, but the paragraphs are wrapped in "..."
if protect: txt = protect_html(txt)
if markup: txt = simple_markup(txt)
html_parags = "" + make_parags(txt, vspace = vspace) + ""
# Prefix the indentation blanks:
indent = st['indent']
html_txt = indent_lines(indent, html_parags)
st['body'].append(html_txt)
return
# ----------------------------------------------------------------------
def end_enum(st:dict, tag:str) -> None:
# Appends to [st} an end-of-enumeration "{tag}>" where {tag} is "ul" or "ol".
# Currently not check whether there was a matching "<{tag}>".
assert tag == "ul" or tag == "ol", f"bad {tag = }"
indent = st['indent'] - 1
assert indent >= 0, f"bad {indent =}"
st['body'].append(indent_lines(indent, f"{tag}>"))
st['indent'] = indent
return
# ----------------------------------------------------------------------
def figure(st:dict, contents:str, caption:str, centered:bool = True) -> None:
# Appends to the document a figure consisting of {contents} with the given {caption}.
html_fig = make_figure(contents, caption)
append_centered(st, html_fig, centered)
return
# ----------------------------------------------------------------------
def table(st:dict, rows_or_cols:list[list[str]], by_rows:bool = True, col_mods:list[str] = None, centered:bool = True) -> str:
# Formats a table given a list of lists of items.
#
# If {col_mods} is not {None}, it must be a list with the modifier
# to insert in the "" of each column (or row).
#
# If {by_rows} is true, each element of {rows_or_cols} is a row of the
# table, If {by_rows} is false, the table is transposed: each element
# of {rows_or_cols} is a column.
#
html_tb = make_table(rows_or_cols, by_rows, col_mods)
append_centered(st, html_tb, centered)
return
# ----------------------------------------------------------------------
def append_preformatted(st:dict, item:str, ind:int, centered:bool) -> None:
# Appends {item} to the body, with extra indentation level {ind} on each line.
# If {centered} is true, surrounds it with "" ... "".
st['body'].append(indent_lines(st['indent'], ""))
st['indent'] += ind
append_centered(st, item, centered)
st['indent'] -= ind
st['body'].append(indent_lines(st['indent'], " \n"))
return
# ----------------------------------------------------------------------
def append_centered(st:dict, item:str, centered:bool) -> None:
# Appends {item} to the body, properly indented.
# If {centered} is true, surrounds it with "" ... "".
indent = st['indent']
if centered:
st['body'].append(indent_lines(indent, ""))
indent += 1
st['body'].append(indent_lines(indent, item))
if centered:
indent -= 1
st['body'].append(indent_lines(indent, ""))
return
# ----------------------------------------------------------------------
def output_doc(st:dict, wr, toc_depth:int, last_edit:str) -> None:
wr.write(st['preamble']); wr.write("\n")
wr.write(st['top_header']); wr.write("\n")
write_toc(wr, st['toc'], toc_depth)
for item in st['body']:
wr.write(item); wr.write("\n")
html_post = make_postamble(last_edit)
wr.write(html_post); wr.write("\n")
wr.flush()
if wr != sys.stdout: wr.close()
return
# ----------------------------------------------------------------------
def write_toc(wr, toc:list, toc_depth:int) -> None:
# Writes the table of contents {toc}, a list of triplets {(snum,slevel,title)}.
# Writes only entries with {slevel <= toc_depth}.
if toc_depth < 1: return
wr.write(indent_lines(2, make_section_header("Index", snum = None, slevel = 2))); wr.write("\n")
cur_slevel = 1
for snum, slevel, title in toc:
err(f" {snum = } {slevel = }\n")
assert snum != None, "prog error -{snum} is None"
assert slevel >= 1 and slevel <= cur_slevel + 1, f"bad {slevel = }"
if slevel <= toc_depth:
while cur_slevel < slevel:
wr.write(indent_lines(cur_slevel, "\n"))
cur_slevel += 1
while cur_slevel > slevel:
cur_slevel -= 1
wr.write(indent_lines(cur_slevel, " \n"))
html_link = f"{title}"
html_para = make_parags(html_link, vspace = "0pt")
html_item = make_enum_item(html_para, vspace = "0pt")
wr.write(indent_lines(slevel, html_item)); wr.write("\n")
while cur_slevel >= 1:
wr.write(indent_lines(cur_slevel, "\n"))
cur_slevel -= 1
return
# ----------------------------------------------------------------------
def make_preamble(title:str, bg_color:str, text_width:str) -> str:
# Formats the preamble of the HTM document, including complete
# "" and opening of ""
html_pre = \
"" + \
"\n" + \
"" + \
"\n" + \
"" + \
"" + \
f"{no_breaks(title)}" + \
"" + \
"\n" + \
f""
return html_pre
# ----------------------------------------------------------------------
def make_section_header(title:str, snum:int|None, slevel:int) -> str:
# Formats a section header with the given level {slevel} and anchor "s{snum}".
anchor = "" if snum == None else f""
html = f"{anchor}{title}"
return html
# ----------------------------------------------------------------------
def make_figure(contents:str, caption:str) -> str:
# Creates HTML to display {contents} centered above the {caption}.
# The {contents} and {caption} are taken as is and assumed
# HTML-safe. The user should consider calling {protect_html},
# {simple_markup}, and {make_parags} if appropriate.
html_row1 = \
" \n" + \
" | \n" + \
indent_lines(6, contents) + "\n" \
" | \n" + \
" \n"
if caption == None:
html_row2 = ""
else:
html_row2 = \
" \n" + \
" | \n" + \
indent_lines(6, caption) + "\n" + \
" | \n" + \
" \n"
html_fig = "\n" + html_row1 + html_row2 + " "
return html_fig
# ----------------------------------------------------------------------
def make_table(rows_or_cols:list[list[str]], by_rows:bool = True, col_mods:list[str] = None) -> str:
if by_rows:
rows = rows_or_cols
else:
rows = transpose_elems(rows_or_cols)
html_tb = []
html_tb.append("")
for ir in range(len(rows)):
row = rows[ir]
if col_mods is not None:
assert len(col_mods) >= len(row), \
f"not enough column mods ({len(col_mods)}) for row {ir}"
html_tb.append(indent_lines(1, ""))
for ic in range(len(row)):
el = row[ic]
el_mod = col_mods[ic] if col_mods is not None else None
if el_mod is None or el_mod == "":
td_tag = "| "
else:
td_tag = f" | "
html_el = "" if el == None else el
html_tb.append(indent_lines(2, td_tag))
html_tb.append(indent_lines(3, html_el))
html_tb.append(indent_lines(2, " | "))
html_tb.append(indent_lines(1, " "))
html_tb.append(" ")
return "\n".join(html_tb)
# ----------------------------------------------------------------------
def transpose_elems(cols:list[list[str]]) -> list:
# Given a list {cols} of lists, returns
# a list {rows} such that {rows[ir,ic]} == cols[ic,ir]}.
# Ignores {None} elements and may use {None} as filler.
# where needed.
rows = []
nc = len(cols)
for ic in range(nc):
col = cols[ic]
for ir in range(len(col)):
el = col[ir]
if el != None:
while ir >= len(rows): rows.append([ ])
while ic >= len(rows[ir]): rows[ir].append(None)
rows[ir].append(el)
return rows
# ----------------------------------------------------------------------
def make_link(st:dict, link_url:str, link_text:str|None, img_url:str|None, max_width:int, max_height:int) -> str:
# Returns an HTML fragment that is a link to {link_url}.
#
# If {img_url} is not {None}, the link appearance is
# the image {img_url} with the specified max width and height, which must be positive. The
# {link_text}, if not {None}, is added below the thumbnail as a
# caption. Otherwse the appearance is the string {link_text}.
#
# The {link_text} is assumed to be HTML-safe.
# err(f"!! enter make_link {link_url = } {link_text = } {img_url = }\n")
if img_url != None:
assert max_width > 0 and max_height > 0, "invalid max_width,max_height"
assert not re.search(r"[?][?]", img_url), "undefined figure"
link_form = f" "
if link_text != None:
link_form = f"{link_form} {link_text} "
err(f"!! warning: ignoring {link_text = }\n")
else:
assert link_text != None, "must specify {link_text}"
assert not re.search(r"[?][?]", link_text), "undefined URL"
link_form = link_text
link_html = f"{link_form}"
return link_html
# ----------------------------------------------------------------------
def make_postamble(last_edit:str) -> str:
html = \
" \n" + \
"\n" + \
f"{last_edit}\n" + \
" \n" + \
"\n" + \
"\n" + \
"\n" + \
"\n"
return html
# ----------------------------------------------------------------------
def make_parags(html_txt:str, vspace:str|None = None, width:str|None = None, align:str|None = None, style:str|None = None) -> str:
# Converts a multiline HTML text {html_txt} into HTML paragraphs. Single newlines
# are replaced by spaces. One or more blank lines becomes a parag
# break. Assumes that {txt} is HTML-safe.
#
# The {style}, if not "None", should be a CSS style for the parag.
# If {vspace} is not {None}, it is included in the {style} as "margin: {vspace} 0pt".
# If {align} is not {None}, it is included in the {style} as "text-align:{align}"
# If {width} is noy {None}, it is included in the {style} as ???. It may be absolute "600px" or
# relative "80%".
# Removes blanks and newlines at both ends:
html_txt = html_txt.strip()
# Replaces one or more internal blank lines by "\n\n":
html_txt = re.sub("[ ]*\n[ ]*\n[ \n]*", "\n\n", html_txt)
# Replaces single line breaks by spaces:
html_txt = re.sub("([^\n])\n([^\n])", r"\1 \2", html_txt)
# Insert "... " delimiters:
vspace_st = "" if vspace == None else f"margin:{vspace} 0pt;"
align_st = "" if align == None else f"text-align:{align};"
width_st = "" if width == None else f"width:{width};"
style_st = "" if style == None else f"{style};"
style_all = " ".join((vspace_st, align_st, width_st, style_st)).strip()
popen = "" if style == "" else f" "
pclose = " "
html_txt = re.sub("(^|\n\n)", r"\1" + popen, html_txt)
html_txt = re.sub("(\n\n|$)", pclose + r"\1", html_txt)
return html_txt
# ----------------------------------------------------------------------
def make_enum_item(html_txt:str, vspace:str|None = None) -> str:
# Formats an HTML fragment {html_txt} into an item for "" or "" enumeration.
liopen = "- " if vspace == None else f"
- "
liclose = "
"
html_item = liopen + html_txt + liclose
return html_item
# ----------------------------------------------------------------------
html_txt = "- " + html_txt + "
"
return html_txt
# ----------------------------------------------------------------------
def protect_html(txt):
# Converts "<" ">" "&" to HTML entities, unless they
# are parts of "", "", "", "", or their closing counterparts,
# ", "", " " or recognizable HTMP entities.
# Warning: DON'T use if text already has other HTML markup!
# First, replace all "@" by "@0"
txt = re.sub(r'@', r"@0", txt)
# Then replace all "&" "<" ">" in recognizable tags by "@1", "@2", "@3":
txt = re.sub(r'[&]([#][0-9]+|[a-z]+);', r"@1\1@1", txt)
txt = re.sub(r'<(/?)([bis]|tt)>', r"@2\1\2@3", txt)
txt = re.sub(r'<(/?)(a|img)\b([^<>]*)>', r"@2\1\2\3@3", txt)
txt = re.sub(r'?br/?>', r"@2br/@3", txt)
# Now protect all other "&", "<", ">":
txt = re.sub(r'[&]', r"&", txt)
txt = re.sub(r'[<]', r"<", txt)
txt = re.sub(r'[>]', r">", txt)
# txt = re.sub(r'["]', r""", txt)
# Now restore the "@" codes:
txt = re.sub(r'@1([^@]+)@1', r"$\1;", txt)
txt = re.sub(r'@2([^@]+)@3', r"<\1>", txt)
txt = re.sub(r'@0', r"@", txt)
return txt
# ----------------------------------------------------------------------
def simple_markup(txt):
# Converts "... *{blabla}* ..." to bold, "... /{blabla}/ ..." to italic.
# The {blabla} must not contain "<", ">".
# Nesting only works if both end at the same time "*/{blabla}/*"
# or "/*{blabla}*/". Otherwise "/{bla}*{ble}
#
lpunct_pat = r"(^|[ .,:!;*>({\[])"
rpunct_pat = r"([ .,:!;/<)}\]]|$)"
txt = re.sub(lpunct_pat + r"[/][*]([^/*<>]+)[*][/]" + rpunct_pat, r"\1\2\3", txt)
txt = re.sub(lpunct_pat + r"[*][/]([^*/<>]+)[/][*]" + rpunct_pat, r"\1\2\3", txt)
txt = re.sub(lpunct_pat + r"[/]([^/<>]+)[/]" + rpunct_pat, r"\1\2\3", txt)
txt = re.sub(lpunct_pat + r"[*]([^*<>]+)[*]" + rpunct_pat, r"\1\2\3", txt)
return txt
# ----------------------------------------------------------------------
def get_text_from_file(fname):
if os.path.exists(fname):
rd = open(fname, 'r')
lines = rd.readlines()
txt = ' '.join(lines)
rd.close()
return txt
else:
return f"** {fname} not found"
# ----------------------------------------------------------------------
def get_image_size(fname):
cmd = [ "convert", fname, "-print", "'%[fx:w] %[fx:h]'", "null:-" ]
res = str(subprocess.check_output(cmd).strip())
size = re.split(r"[ ]+", res)
assert len(size) == 2, f"bad command output '{res}'"
size = ( int(x) for x in size )
return size
# ----------------------------------------------------------------------
def indent_lines(indent:int, txt:str) -> str:
# Prepends {2*indent} spaces to each line of {txt}.
assert indent >= 0, f"bad {indent = }"
blanks = " " * indent
txt = blanks + re.sub(r"\n", "\n" + blanks, txt)
# In case {txt} ends with newline:
txt = re.sub(r"\n *$", "\n", txt)
return txt
# ----------------------------------------------------------------------
def no_breaks(txt:str) -> str:
# Removes " " elements from the {txt}.
res = re.sub(r" * *", " ", txt)
return res
# ----------------------------------------------------------------------
def out(txt):
sys.stdout.write(txt)
# ----------------------------------------------------------------------
def err(txt):
sys.stderr.write(txt)
# ----------------------------------------------------------------------
|