#! /usr/bin/python3 # Last edited on 2025-08-11 02:19:26 by stolfi # Extracts from multispectral images the spectra of a set of pixels # selected by a given page, clip, and mask. Computes the {ne} principal # components of those spectra (the {ne} eigenvectors of the covariance # matrix with largest eigenvalues) and saves that information to disk. # # The parameters are the {page}, the clip's name {clip}, the mask's name # {mask}, and a name {vset} for the resulting set of eigenevector. # Currently {ne} is fixed at 10. # # The multispactral images are assumed to be in the external directory # {pgdir_in}, namely "MS/davis/{page}". # # Assumes that there is a file "{pgdir_in}/bands.txt" that # specifies the names of the bands avaliable for that page (e.g. # "MB870IR_030_F") and the corresponding {maxval} for sample scaling. # # The input images are read from "{pgdir_in}/clips/{clip}.pgm", for each # narrow reflection ("MB") band {band} listed in the "bands.txt" file # above. These files are assumed to be 16-bit PGM (not PNG), with the # pixel values extratced from the TIFF files withouut any gamma mapping # (other than whatever was used to encode those file by the scanning # team). # # The mask is read from the local file "{cdir}/masks/{mask}.png" where # {cdir} is "pages/{page}/clips/{clip}" This is supposed to be a PNG # (not PGM) file, with same dimensions as the clip images, with pixels # either 0 ("ignore") or 1 ("take"). # # The set of eigenvectors is saved to disk as as a file "pages/{page}/vsets/{vset}/princ.txt". # The file has {nb} lines and {ne+1} columns. Each line # has the format "{band[ib]} {ev[0,ib]} {ev[1,ib]} ... {ev[ne-1,ib]}" # where {band[ib]} is the name of a spectrum band, and {ev[ie,ib]} is the # value of eigenvector {ev[ie]} at that band. import numpy from math import sin, cos, log, exp, floor, pi, inf import sys, re from error_funcs import file_line_error, prog_error, arg_error from process_funcs import bash, run_command from bands_table import read_bands_table import argparser def main(): page, clip, mask = parse_options() maxval_tb = read_maxval_table(page) # Dict mapping band name to {maxval}. # debug("bands", maxval_tb) bands = tuple(maxval_tb.keys()) nb = len(bands) # Number of bands. debug("bands", bands) samples_tb = dict() ns = None # Number of spectra in the set. for band in bands: maxval = maxval_tb[band] samples_tb[band] = get_samples(page, band, clip, maxval, mask) ns1 = len(samples_tb[band]) debug(f" {band}: num samples", ns1) if ns == -1: ns = ns1 else: if ns1 != ns: band_error(band, f"inconsistent num samples {ns} {ns1}") # Create a matrix with one row per pixel, one column per band: M = numpy.array([ samples_tb[bands[ib]] for ib in range(nb) ]) M = M.transpose() debug("M[0,:]", M[0,:]) debug("M[:,0]", M[:,0]) ne = 10; P = principal_components(M, ne) assert numpy.shape(P) == (nb, ne) # Generate images with last three eigenvectors: write_components(page, vset, bands, P) return 0 # ---------------------------------------------------------------------- def read_maxval_table(page): pgdir_in = f"MS/davis/{page}" with open(f"{pgdir_in}/bands.txt") as rd: res = { } for line in rd: line = re.sub(r"[#].*$", "", line) if line.strip() != "": key, val = line.split(' ', 1) key = key.strip(); val = val.strip() if re.match(r"MB", key): res[key] = int(val) return res # ---------------------------------------------------------------------- def get_samples(page, band, clip, maxval, mask): res = [] pgdir_in = f"MS/davis/{page}" cfile = f"{pgdir_in}/{band}/clips/{clip}.pgm" mfile = f"pages/{page}/clips/{clip}/masks/{mask}.png" xfile = f".{clip}.pgm" vfile = f".{clip}.vals" hblack = "#000100010001" hwhite = "#ffffffffffff" bash \ ( \ f"convert " + \ f" \\( {cfile} -set colorspace LinearGray +level-colors '{hblack},{hwhite}' \\)" + \ f" \\( {mfile} -set colorspace LinearGray -depth 16 -colorspace LinearGray \\)" + \ f" -compose multiply" + \ f" -composite" + \ f" -depth 16 -colorspace LinearGray" + \ f" {xfile}" ) bash \ ( \ f"cat {xfile}" + \ r" | pnmtoplainpnm" + \ r" | tr ' ' '\012'" + \ r" | egrep -e '.'" + \ r" | gawk '// { if ((FNR > 4) && ($1 > 0)) { print $1; } }'" + \ f" > {vfile}" ) with open(vfile) as rd: res = [] nbig = 0; ismp_max = 0 for line in rd: line = re.sub(r"[#].*$", "", line) if line.strip() != "": ismp = int(line.strip()) if ismp != 0: if ismp > maxval: nbig += 1 if ismp > ismp_max: ismp_max = ismp ismp = maxval fval = float(ismp)/float(maxval) res.append(fval) rd.close() if nbig > 0: sys.stderr.write(f"{nbig} samples above {maxval}, max = {ismp_max}\n") return res # ---------------------------------------------------------------------- def principal_components(M, ne): # Compute {N} whose rows are the displacements from barycenter: bar = barycenter(M) debug("max(abs(bar))", numpy.max(numpy.abs(bar))) N = numpy.array([ M[ip] - bar for ip in range(ns) ]) debug("max(abs(N))", numpy.max(numpy.abs(N))) # Compute the principal directions of {N'*N}: C = (N.transpose() @ N) / float(ns) debug("max(C)", numpy.max(C)) evv = numpy.linalg.eigh(C) sys.stderr.write("eigenvalues:\n") print(evv.eigenvalues) sys.stderr.write("eigenvectors:\n") print(evv.eigenvectors) return(evv[-ne:]) # ---------------------------------------------------------------------- def barycenter(M): # Expects {M} to be a {numpy} matrix. Computes the # average of all rows. bar = numpy.sum(M, 0) debug("shape(bar)", numpy.shape(bar)) bar = bar/numpy.size(M, 0) return bar # ---------------------------------------------------------------------- def write_components(page, vset, bands, P): # Writes a file with the {ne} principal components in a list of {nb} # spectrum bands. The components are the {ne} columns of the # matrix {P}, which should have {nb} rows. # nb = numpy.size(evv.eigenvalues,0) efile = f"pages/{page}/vsets/{vset}/princ.txt" with open(efile, "w") as wr: wr.write(f"nb = {nb}\n") wr.write(f"ne = {ne}\n") for ib in range(nb): wr.write(f"{bands[ib]}") for ie in range(ne): wr.write(" %+7.4f" % evv.eigenvectors[ib,nb-1-ie]) wr.write("\n") wr.close() return # ---------------------------------------------------------------------- def parse_options(): page = sys.argv[1] clip = sys.argv[2] # name of clip to take pixels from. mask = sys.argv[3] # Name of mask to select the pixels. vset = sys.argv[4] # Name of data vector set return page, clip, mask # ---------------------------------------------------------------------- def band_error(band, msg): sys.stderr.write(f"** {band}: {msg}\n") assert False # ---------------------------------------------------------------------- main()