#! /usr/bin/python3 import sys, re import html_gen as h from process_funcs import bash import html_report_funcs as hr last_edit = "Last edited on 2025-11-19 07:37:31 by stolfi" def main(): global last_edit title = "Detecting ink under paint" st = h.new_doc(title, "#eeffdd") h.parags(st, """This webpage is about the many places in the Voynich Manuscript (VMS) where glyphs, words, or entire lines were (or are suspected to have been) obfuscated by being painted over with a watercolor or tempera type of paint. The paint is often semi-transparent, given some hope that the original text and drawings can be recovered with the help of image processing techniques. Such recovery could improve the transcription of the text affect the interpretation of the figures.""") h.section(st, 2, "Evidence for obscured ink") h.parags(st, """The Painter who applied the semi-opaque tempera colors often painted over inked outlines. Examples are easily seen where these inked strokes were still dark and clear, like (A,B) below.""") hr.online_image(st, ???f079r1-green-over-ink, or maybe the whole pool) h.parags(st, """Besides obscuring those strokes, it seems that the painting also washed away some of the ink, and sometimes deposited it a short distance away, as in (D). Thus any ink strokes that were already quite faint and faded, like (E), must have become invisible to the naked after being painted over. In particular, there seems to be something between the feet of that nymph, and maybe for something under the east end of the "backrest" she is leaning onto.""") h.section(st, 2, "Principles of Bayesian ink-paint separation") h.parags(st, """The idea is as follows.""") h.begin_enum(st, "ul") h.enum_item_parags(st, """Take an image of an area which is suspected of having "invisible" drawings or text under some semi-opaque paint.""") h.enum_item_parags(st, """Select a set of pixels A representative of what one wants to detect, like places where there is definitely ink covered by green paint.""") h.enum_item_parags(st, """Select one or more additional sets B, C, ... that are to be distinguished from A -- like places where there is green paint with but almost surely without ink underneath.""") h.enum_item_parags(st, """Look at the colors of those pixels as points of three-dimensional space, within the unit cube where (0,0,0) is black, (1,1,1) is white, (1,0,0) is red, etc. Here is an example with three subsets of a page, representative of blank vellum (red), dark text ink (green), and green paint over blank vellum (blue):""") h.enum_item_image(st, ???color clouds) h.enum_item_parags(st, """Approximate each cloud ou points A, B, C, ... by a trivariate Gaussian probability density function (PDF). This can be visualized as a fuzzy ellipsoid with varied dimensions along three axes, with some generic orientation in space.""") h.enum_item_parags(st, """Take each pixel of the image and use Bayes's formula to estimate the probability that the pixel belongs to each distribution A, B, C, ... or is an "outlier" that probably does not belong to any of them.""") h.enum_item_parags(st, """Write one grayscale image for each set, showing the probability of each pixel belonging to that set.""") h.end_enum(st, "ul") h.parags(st, """As others have pointed out, the texture of the vellum itself provides a lot of subtle lines and curves, which can easily be confused for traces of drawing or text. However, the above approach i honest at least in the sense that the final classification is made independently for each pixel, based only on its color; without trying to look for multi-pixel patterns like lines or characters. Which is where actual pareidolia comes in. It will be left to the human user to "see" such patterns on the computed probability maps. The user's choice of sample pixels will influence the classification, but only through their colors, not through their positions or adjacency relations. This classification method is rather robust in the sense that small perturbations of a pixel's color will usually not affect the classification, unless that color is on the transition between two provinces with overlapping color distributions; and even then the change in the classification (the probability of the pixel belonging to each propvince) will be gradual.""") h.section(st, 3, "Challenges and limitations") h.parags(st, """The following image (a clip of f79r magnified 400%) shows some of the challenges on the way to uncovering drawings that have been painted over.""") hr.image(st, "f79r magnified 400%") h.parags(st, """There are some ink traces, like (A), that were quite dark to begin with and suffered little from being painted over, apart from their color getting mixed with the paint color. But there are some ink traces, like (B), that are very faint, almost invisible. Would we be able to recover them if they were painted over? I think that smudges at ( C) look like they were the original outline of the nymph's left foot which was incorrectly retraced as the thick dark stroke a bit further to the NW. Those smudges are only a little fainter than the toes of that same foot, which almost certainly were there. To make things worse, when ink strokes were painted over, sometimes the ink would dissolve and either would spread around, or would be pushed by the brush for a small distance; as seems to have happened at (D). Presumably, some strokes were completely washed away and spread over a larger area, mixed with the green paint. At (E) there are some smudges which look like those at ( C) or at the nymph's toes, and they seem to form a rounded shape that does not seem to be just a random stain or vellum defects. Will we be able to determine if it was indeed painted-over ink, and, if so, recover enough of it to tell what it is?""") h.section(st, 3, "Case study: leaf on f22r") h.parags(st, """It has been concetured that there is some writing under the green paint on f22r. Specifically, the front leaf of the second leaf bundle on the left side of the stem.""") hr.image(st, "f22r leaf 2,W - clip") h.parags(st, """However, there is some bleedthrough of the dark brown ink of the other side (f22r) that may be responsible for those darker streaks on that clip:""") hr.image(st, "f22r leaf 2,W - pilc") hr.image(st, "f22r leaf 2,W - blip") hr.parags(st, """Image B is a clip of the matching area on f22r (as accurately as I could determine it), flipped left-to-right for convenient comparison. Image C is image B with inverted colors and ~50% transparency overlaid on image A. On the left half of mage A, notice the faint ghosts of the berries of image B. On image C, notice that there are many berries on the other side of your area of interest. However, the ghosts are not that strong, so maybe there are indeed inked details under the green paint.""") h.section(st, 3, "Case study: ochre structure on f35r") hr.image(st, "f35r???") h.parags(st, """There does seem to be writing inside that big ocher-painted area, but is it just bleedthrough or offset? Offset from f34v can be excluded since there is no writing on f34v anywhere near that area. It could be offset from some other page, that happened back when the bifolios were still unbound. But it does look like bleedthrough from f35v. Here is a clip of f35v corresponding to that area, flipped L-R for easy comparison:""") h.parags(st, """It does look like most if not all of the "writing under the ocher paint" is indeed bleed-through of the writing on f35v. Here is the second clip with colors inverted and transparent background, overlaid on the first clip: There still seems to be some ink traces in the right half of that structure, not attributable to bleedthrough. They may be decoration.""") h.section(st, 3, "Using Bayesian classification for retracing") h.parags(sr, """As others have noted, the colors of the text pixels are a continuum between full ink color and bare vellum color. That holds at least for the orginal and first retrace (Rt1) inks. These two inks sem to have the same hue and differ mostly in brightness, and both merge with the blank parchment ink as they become fainter, and along the edges of the strokes.""") h.section(st, 3, "Using pixel windows instead of single pixels") h.parags(st, """A more sophisticated variant of this method would use pixel windows instead of single pixels. Namely, for each pixel on the image, we extract the color of that pixel and of a fixed set of M nearby pixels, such as the 4 nearest neighbors (M=5) or a 3x3 window centered at the pixel (M=9). Then each sample and each pixel to be classified becomes a point of R^{3*M}, and the distributions for each province are multivariate Gaussians. This approach may be valuable to recover strokes that are only a pixel wide.""") h.section(st, 3. "Prior probabilities") h.parags(st, """A fundamental limitation of Bayesian inference is that it does not give the probabilities of each possible cause, it only tell us how to change out a priori probabilities. So we must specify those priors in order to use the formula. When the evidence is strong enough, the results do not depend much on the priors. These matter only when the evidence is ambiguous. In the worst case, when the evidence carries no information about the cause, the results will be the same as the priors. When it rains at night, streets are usually wet by the next morning. If you have no special information about the night's weather, and you see the street wet in the morning, it is reasonable to assume that it rained. Bayes's formula will say so, whether your prior for "it rained" is 1% or 99%. That's because P(Wet|NoRain) is very small; say, 0.0001 (but not zero, because there may have been a flood or a street washing truck mat have showed up. While P(Wet|Rain) is basically 1. Therefore P(Rain)P(Wet|Rain) is still much bigger than P(NoRain)P(Wet|NoRain) in any case, and these numbers will become ~1 and ~0 after normalization. But if you are sure that it did not rain, because you have been out stargazing in the garden all night, the wet street should not make you change your belief. That's because your P(Rain) will be essentially zero, and then P(Rain)P(Wet|Rain) will become much smaller than P(NoRain)P(Wet|NoRain), even if P(Wet|NoRain) is only 0.00001. For the analysis of the f79r pool illustrated above the prior probability of "OTHER" was set arbitrarily at 0.05, and that of each of the three other classes was set to 0.95/3 = ~0.32.""") h.section(st, 3, "Comparison with linear vector machines") h.parags(st, """This approach is somewhat similar to the so-called linear vector machine (LVM) classification. In that method, each object to be classified (in this case, a pixel, or a small pixel window) is a vector of some d-dimensional space R^d, and any two classes are separated by a set of linear functions. In contrast, Bayesian classification with Gaussian distributions is inherently non-linear, and usually extremely so. For instance, imagine that you have only two Gaussian classes (plus "OTHER"), where class A has a very broad spherical distribution centered at middle gray (0.5,0.5,0.5) and class B has a much narrower one centered at slightly darker gray (0.4,0.4,0.4). Bayesian classification will assign class A to colors inside the A sphere, except within a small region around the darker gray, where it will say B. A linear classifier will be unable to delimit even the A sphere, much less the B hole inside it. That is why linear vector classifiers are usually applied to non-linear functions of the inputs, the (improperly) so called "kernels". Which requires the user to come up with suitable kernels. If one tries to use as kernels all polynomials on the input coordinates up to degree (say) 4, one gets so many kernels that the classification will probably be garbage. That is also a danger if one uses "magical" non-linear classifiers with zillions of internal parameters, like neural networks...""") h.section(st, 3, "Local vs. global training") h.parags(st, """The samples of each province can be collected locally, on the same page or even on the same figure under analysis; or globally, scatered all over the book. Global sampling could be slightly safer against accidental color variations affecting only a small area of a page, such as stains or ink blots. However, local sampling is justified because there seems to be some overall variation from folio to folio in the colors of parchment, ink, and paint. For instance, the green paint of f8r seems to be more bluish than that used in the Bio section. And there is also a much larger variety of paints and stains over the whole MS. On the southwest pool of f79r, for example, it is not necessary to include the red, blue, yellow, and rusty paints as separate provinces. They will be classified as "OTHER" without significantly affecting the classification of the other provinces of interest. And we don't have to worry about ketchup stains, or the gray offset from blue flowers, that are important "noise" features on some other pages.""") h.section(st, 3, "Imaging requirements") h.parags(st, """Ideally we should do this with high-resolution. The resolution should be high enough for the thinnest ink strokes to be several pixels across, so that there will be pixels entirely inside the stroke. Ideally the images should also be uncompressed (to avoid JPEG encoding artifacts), taken under frontal illumination (to reduce brightness variations due to the roughness of the vellum surface) with multiple narrow-band light from ultraviolet to infrared (to distinguish colors that just look the same), and with linear encoding (so that the color clouds will not be distorted away from the ideal Gaussian shape). Unfortunately we don't have multispectral scans for any of the pages that may have significant details hidden under the paint. And even those that we do have are taken with oblique lighting that creates light and dark spots at every tiny bump on the vellum surface. So we must do with the Beinecke 2014 scans, which have frustratingly low resolution (some ink traces being only a couple of pixels across), only the three RGB color coordinates, oblique illumination, non-linear "gamma" encoding, and complex JPEG compression artifacts.""") h.section(st, 3, "Where should we look") h.parags(st, """The herbal pages have green paint, but the ink that can be seen under it is just boring nervures or leaf outlines. At best, those images could be useful to validate this approach.""") h.section(st, 2, "When was the paint applied?") h.parags(st, """According to Rene, microscope examination of the folio number on f42 shows definitely that the paint was on top of the ink. The paint included small crystals which were obviously on top of everything else. Besides, it seems unlikely that whoever wrote the folio number would choose to write it over an already painted area.""") h.section(st, 2, "Some examples") h.parags(st, """Instances of ink traces being painted over are rather common. Here are some annotated examples.""") h.parags(st, """*NOTE*: The claims in image captions are all personal guesses with varied degrees of confidence. For brevity, they are stated as facts; however, the reader should assume disclaimers such as "apparently", "probably", "it seems that", etc. before every claim that is not totally evident from the images.""") bash("(cd images && ls -d f[0-9][0-9][0-9][rv][0-9]-* ) | sort > .files") hr.clip_fig_links_and_pages_enum(st, fnames) h.output_doc(st, sys.stdout, 99, last_edit) return 0 # ---------------------------------------------------------------------- def test_html_gen(): txt = "We need (/weed/) but not (*knot*)" h.err("[[" + txt + "]] -> [[" + h.simple_markup(txt) + "]]\n") return # ---------------------------------------------------------------------- # test_html_gen() main()