#! /usr/bin/python3 # Last edited on 2025-08-24 05:05:22 by stolfi # Reads a set of {nb} narrow-band monochrome images for a given page and # clip. Extracts from those images a set of pixels, selected by a binary # mask image. For those pixels, computes # # {savg} the mean spectrum # # {smin} the "darkest" spectrum # # {smax} the "lightest" spectrum # # {P} a matrix with the {ne} principal components of those spectra, # # {rdev} the total variance of those spectra that is not # accounted for by those principal components. # # The "darkest" spectrum {smin} is such that, for any spectral band {ib} # in {0..nb-1}, only 0.025 of the sampled pixels have lower albedo in # that band than {smin[ib]}. The "lightest" spectrim is similar but with # "higer alebdo" insread of "lower albedo". # # The principal components are the {ne} eigenvectors of the covariance # matrix with largest eigenvalues, scaled by the square root of those # eignevalues. The number {ne} is currently fixed at 10. # # The parameters are the {page}, the clip's name {clip}, and the mask name # {mask}. # # The program repeats the analysis for a list of {nm} mask names. It # saves the results of all these analyses to text files. # # Each eigenvector selected as principal component has its diection # flipped as needed so that the sum of the elements is non-negative. # This means that increasng the albedo of a pixel on all bands by the # same amount will cause the projection on that eigenvector to remain # the same or increase, never decrease. # # INPUT FILES # # Assumes that there is a file "MS/davis/{page}/bands.txt" that specifies the # names of the bands avaliable for that page (e.g. "MB870IR_030_F") # as well as their illumnation type {illum} ({0..3}) and main wavelength {wlen}. # # The input images are read from "{cdir}/npy/{band}.npy", where {cdir} # is "pages/{page}/clips/{clip}", for each {band} listed in the # "bands.txt" file above that was imaged in reflected light, with # frontal narrow-band illumination ({illum == 0} and name starts with # "MB"). # # These clip image files are assumed to be {numpy} 2D matrices of floats # stored with {numpy.save}. The values are assumed to have been # extracted from the TIFF files without any gamma mapping (other than # whatever was used to encode those file by the scanning team) and # scaled from some integer range {0..maxval}, specific to each band, to # the float range {[0 _ 1]}. # # Each mask image is read from the local file "{cdir}/masks/{mask}.png", # where {mask} is the mask's name(e.g. "ch", "di", "bg"). This file is # supposed to be in PNG (not PGM) format. The mask image should have the # same dimensions as the clip images, with pixels either 0 ("ignore") or # 1 ("take"). # # MAIN OUTPUT FILES # # The principal components and other info for each {mask} are saved to disk as as file # "{mdir}/analysis.txt" where {mdir} is "pages/{page}/clips/{clip}/masks/{mask}". # # The first two lines of each of these files are "nb = {nb}" and "ne = {ne}". # # After this header come {nb} lines, each with {1 + (ne+2)*nm} columns. Each # line except the last one has the format # # "{band[ib]} {illum[ib]} {wlen[ib]} {data[0]} ...{data[nm-1]}" # # where {band[ib]} is the name of a spectrum band, {illum[ib]} and {wlen[ib]} # are the illumination type and wavelength of that band (as in the "bands.txt" file) # and {data[im]} is data for mask {im}. # # The data for each mask is {ne+5} columns # # "{savg[ib,im]} {smin[ib,im]} {smax[ib,im]} {rdev[ib,im]} {tdev[ib,im]}" + # " {P[0,ib,im]} ... {P[ne-1,ib,im]}" # # where {tdev[ib,im]} is the magnitude of the residual not accounted for # by the first component {P[ib,0,im]}, that is, {tdev[ib,im] = # sqrt(SUM(P[ib,ie,im]^2 : ie\in 1..ne-1) + rdev[ib,im]^2)} # import sys, re, os from math import sqrt, sin, cos, log, exp, floor, pi, inf import numpy from PIL import Image from error_funcs import prog_error, arg_error, debug from process_funcs import bash, run_command from bands_table_funcs import read_page_bands_tables from image_funcs import read_mask_image, read_clip_npy_images import argparser def main(): page, clip, masks = parse_options() nm = len(masks) # masks the spectral bands tables: bfile = f"MS/davis/{page}/bands.txt" bands_tb = read_page_bands_tables(bfile) # Select the bands to use: bands = [] # Bands included in the analysis. for band in bands_tb.keys(): illum = bands_tb[band]['illum'] if illum == 0: assert band[0:2] == "MB", "bug: band image name" bands.append(band) nb = len(bands) debug("bands used", bands) assert nb > 0, "bug nb" # Get the clip images: C = read_clip_npy_images(page, clip, bands) assert numpy.size(C,2) == nb ny, nx = numpy.shape(C[:,:,0]) # Compute the principal components and extremal spectra: ne = 10; # Number of principal components to keep. savg = numpy.zeros((nb,nm,)) smin = numpy.zeros((nb,nm,)) smax = numpy.zeros((nb,nm,)) rdev = numpy.zeros((nb,nm,)) P = numpy.zeros((nb,ne,nm,)) for im in range(nm): mask = masks[im] sys.stderr.write(f"=== mask = {mask} ===============================================\n") # Get the mask image: mfile = f"pages/{page}/clips/{clip}/masks/{mask}/mask.png" M = read_mask_image(mfile) # Obtain the relevant pixel spectra: S = extract_masked_pixel_spectra(C, M) ns = numpy.size(S,0) fs = ns/(ny*nx) sys.stderr.write(f"mask {mask}: selected {ns} samples of {nx*ny} ({fs:.4f})\n") debug("S[0,:]", S[0,:]) debug("S[:,0]", S[:,0]) assert numpy.shape(S) == (ns,nb,) # Compute principal components: savg, rdev, P = principal_components(S, ne) ne = numpy.size(P,1) assert numpy.shape(savg) == (nb,), "bug savg shape" assert numpy.shape(rdev) == (nb,), "bug rdev shape" assert numpy.shape(P) == (nb,ne,), "bug P shape" # Compute extremal spectra: u = P[:,0]/numpy.linalg.norm(P[:,0]) smin, smax = extremal_spectra(S, savg, u) # Save analysis results: write_analysis_data \ ( page, clip, mask, bands, bands_tb, savg, smin, smax, rdev, P ) return 0 # ---------------------------------------------------------------------- def extract_masked_pixel_spectra(C, M): # Reads the image of the given {page} and {clip} illuminated by the # given {band}. Extracts the elements where the mask image {M} is # nonzero, and returns them as a list, in row-by-row order. # # Both {M} and the clip image must have shape {(ny,nx)}. ny, nx, nb = numpy.shape(C) if numpy.shape(M) != (ny, nx): arg_error(f"mask and clip have different sizes {(nx,ny)} {numpy.shape(M)}") # Colect the masked samples: ns = None; # For now. S = None # For now. for ib in range(nb): samples = \ [ min(1.0, C[iy,ix,ib]) \ for iy in range(ny) \ for ix in range(nx) \ if M[iy,ix] != 0 \ ] if ns == None: ns = len(samples) S = numpy.zeros((ns,nb,)) else: assert len(samples) == ns, "bug inconsitent sample counts" S[:,ib] = numpy.array(samples) return S # ---------------------------------------------------------------------- def principal_components(S, ne): # Expects {S} to be a matrix of pixel spectra. It must have shape # {ns,nb}, where {ns} is the number of relevant pixels and {nb} is the # number of spectral bands. # # Computes the average {savg} of all the spectra, and their covariance # matrix {V}, with shape {(nb,nb)}. Then computes the eigenvectors of # {V} as the columns of a matrix {E} with shape {(nb,nb)}, and the # vector of the respective eigenvalues {e}. Then extracts the {ne} # eigenvectors with largest eigenvalues, each scaled by the square # root of its eigenvalue, as the rows of a matrix {P} with shape # {(nb,ne)}. Also, for each band {ib} computes {rdev[ib]} which is the # Euclidean norm of {( sqrt(e[ke])*E[ib,ke] : ke \in ne..nb-1 )}. # # Returns {savg,rdev,P}. ns, nb = numpy.shape(S) # Compute the barycenter (mean) {savg} of the spectra: savg = barycenter(S) debug("max(abs(savg))", numpy.max(numpy.abs(savg))) assert numpy.shape(savg) == (nb,), "bug: savg shape" # Compute {D} whose rows are the displacements from barycenter: D = numpy.array([ S[ks,:] - savg for ks in range(ns) ]) debug("max(abs(D))", numpy.max(numpy.abs(D))) # Compute the covariance matrix {V=D'*D}: V = (D.transpose() @ D) / float(ns) debug("max(V)", numpy.max(V)) # Compute the eigenvalues {E} and eigenvectors {e}: eig = numpy.linalg.eigh(V) e = eig.eigenvalues sys.stderr.write(f"eigenvalues:\n{e}\n"); assert numpy.shape(e) == (nb,), "bug: eigenvalues shape" E = eig.eigenvectors sys.stderr.write(f"eigenvectors:\n{E}\n") assert numpy.shape(E) == (nb,nb,), "bug: eigenvectors shape" # Extract the {ne} eigenvectors with largest eigenvalues: P = [ sqrt(e[nb-1-ie])*E[:,nb-1-ie] for ie in range(ne) ] P = numpy.array(P).transpose() # Adjust the sign of the selected eigenvectors: for ie in range(ne): sum = numpy.sum(P[:,ie]) if sum < 0: P[:,ie] = -P[:,ie] # Compute the residual deviation {rdev}: rdev = numpy.zeros((nb,)) for ib in range(nb): sum = 0 for ke in range(nb-ne): sum += e[ke]*E[ib,ke]**2 rdev[ib] = sqrt(sum) assert numpy.shape(savg) == (nb,) assert numpy.shape(P) == (nb,ne,) assert numpy.shape(rdev) == (nb,) check_principal_components(S, savg, P) return savg, rdev, P # ---------------------------------------------------------------------- def extremal_spectra(S, savg, u): # Expects {S} to be a matrix of pixel spectra. It must have shape # {ns,nb}, where {ns} is the number of relevant pixels and {nb} is the # number of spectral bands. Expects savg to be the nominal barycenter # of all those spectra. Expecs {u} to be the principal eigenvector of # the cloud, with unit norm and shape {(nb,)}, oriented from "dark" to # "bright". # # Computes the "darkest" spectrum {smin} and the "lightest" spectrum # {smax} based on the projection {t[ks]} of the pixel spectra {S[ks,:] # - savg} along the direction of {P0}. # # Specificaly, let {tmin} and {tmax} be the # the {fmin} and {1-fmin} fractiles of the projections {t[:]}. # The "darkest" spectrum {smin[:]} is defined as {savg + tmin*P0}, # and the "ligtest" spectrum is defined as {savg + tmax*P0}. # # Returns {smin,smax}. ns, nb = numpy.shape(S) assert numpy.shape(savg) == (nb,) assert numpy.shape(u) == (nb,) # Collect the total projections of all the pixels: t = [] for ks in range(ns): t.append(numpy.dot(S[ks,:] - savg, u)) t = numpy.array(t) assert numpy.shape(t) == (ns,), "bug t shape" # Get the fractiles: fmin = 0.02 tmin = numpy.quantile(t, fmin) tmax = numpy.quantile(t, 1 - fmin) debug("t quantiles", (tmin, tmax,)) assert tmin < 0 and tmax > 0, "bug bad tmin,tmax signs" # Compute the extremal spectra: smin = savg + tmin*u smax = savg + tmax*u assert numpy.shape(smin) == (nb,) assert numpy.shape(smax) == (nb,) return smin, smax # ---------------------------------------------------------------------- def barycenter(P): # Expects {P} to be a {numpy} matrix. Computes the # average of all rows. savg = numpy.sum(P, 0) debug("shape(savg)", numpy.shape(savg)) savg = savg/numpy.size(P, 0) return savg # ---------------------------------------------------------------------- def check_principal_components(S, savg, P): # Checks whether the projections of the pixel spectra in {S} # along the direction of {P} has indeed deviation {|P|}. ns, nb = numpy.shape(S) ne = numpy.size(P,1) assert numpy.shape(P) == (nb, ne,), "bad {P} shape" assert numpy.shape(savg) == (nb,), "bad {savg} shape" for ie in range(ne): Pi = P[:,ie] Pmod = numpy.linalg.norm(Pi) # Check mean in direction {Pi}: savg_proj = (savg @ Pi)/Pmod S_proj = (S @ Pi)/Pmod S_proj_avg = numpy.mean(S_proj) err_avg = S_proj_avg - savg_proj if err_avg > 1.0e-10*sqrt(ns): sys.stderr.write(f"savg_proj = {savg_proj} S_proj_avg = {S_proj_avg} err = {err_avg}\n") assert False, "bug principal component: mean" # Check deviation in direction {pi}: S_proj_dev = numpy.std(S_proj) err_dev = Pmod - S_proj_dev if err_dev > 1.0e-10*sqrt(ns): sys.stderr.write(f"Pmod = {Pmod} S_proj_dev = {S_proj_dev} err = {err_dev}\n") assert False, "bug principal component: deviation" return # ---------------------------------------------------------------------- def write_analysis_data(page, clip, mask, bands, bands_tb, savg, smin, smax, rdev, P): # Writes a file with the analysis # results for the subset of samples of clip {clip} in page {page} # selected by the mask {mask}, namely {savg,smin,smax,rdev,P}. # Assumes that the analysis used the spectral bands # with names {bands[0..nb-1]}. See the top of this # module for the format of this file. # nb, ne = numpy.shape(P) assert len(bands) == nb, "inconsitent {bands}" assert numpy.shape(savg) == (nb,) assert numpy.shape(smin) == (nb,) assert numpy.shape(smax) == (nb,) assert numpy.shape(rdev) == (nb,) adir = f"pages/{page}/clips/{clip}/masks/{mask}" bash(f"mkdir -p {adir}") afile = f"{adir}/analysis.txt" sys.stderr.write(f"writing {afile} ...\n") sfmt = " %+8.4f" # Format for signed PC components pfmt = " %7.4f" # Format for non-negative PC/spectrum components with open(afile, "w") as wr: wr.write(f"nb = {nb}\n") wr.write(f"ne = {ne}\n") for ib in range(nb): band = bands[ib] wr.write("%-15s" % band) wr.write(f" {bands_tb[band]['illum']:d}") wr.write(" %7.1f" % bands_tb[band]['wlen']) wr.write(" ") for svar in ( savg, smin, smax, rdev ): wr.write(pfmt % svar[ib]) # Total PC magnitude except the first one: sum2_P = 0; for ke in range(1,ne): sum2_P += P[ib,ke]**2 tdev = sqrt(sum2_P + rdev[ib]**2) wr.write(pfmt % tdev) for ie in range(ne): wr.write(sfmt % P[ib,ie]) wr.write("\n") wr.close() return # ---------------------------------------------------------------------- def parse_options(): na = len(sys.argv) assert na >= 4, "insuff args" page = sys.argv[1] clip = sys.argv[2] # name of clip to take pixels from. masks = [ sys.argv[k] for k in range(4,na) ] return page, clip, masks # ---------------------------------------------------------------------- def band_error(band, msg): sys.stderr.write(f"** {band}: {msg}\n") assert False # ---------------------------------------------------------------------- main()