#! /bin/bash
# Last edited on 2010-08-07 20:27:19 by stolfilocal

# Splits every image in the database into text regions with homogeneous
# fore/back color and uniform font size. Also creates reduced copies
# where the stroke/gap period is approximately 2*sqrt(2) pixels wide.

# Does the same with the binary ground truth images.
# The reduced versions of the masks are greyscale not binary.

# Finally creates binary masks 

function extr() {

  idir="$1"; shift;   # Image group
  inum="$1"; shift;   # Image id-number in group
  tnum="$1"; shift;   # Id-number of homogeneous text region in image
  hmin="$1"; shift;   # Crop region first column
  vmin="$1"; shift;   # Crop region first row
  hmax="$1"; shift;   # Crop region last column
  vmax="$1"; shift;   # Crop region last row
  per="$1"; shift;    # Stroke+gap width (pixels)
  
  # Make sure that the output directories exist:
  mkdir -p {crop,cnat}/{orig,true,mask}/${idir}
  
  # Compute the crop image dimensions:
  hsize=$(( ${hmax} + 1 - ${hmin} ))
  vsize=$(( ${vmax} + 1 - ${vmin} ))
  
  # Compute the percent reduction factor ${pct}:
  pct=`gawk -v per="${per}" 'BEGIN{ pct=100.0*2*sqrt(2)/per; printf "%.2f", pct; }'`
  
  # Compute the blur parameter ${brd} for mask broadening:
  brd=`gawk -v per="${per}" 'BEGIN{ brd=0.5*per; printf "%d", int(brd + 0.5); }'`
  
  # Create a "mask" image for the text:
  fulltrueimg="full/true/${idir}/${inum}.png"
  fullmaskimg="full/mask/${idir}/${inum}.png"
  cropmaskimg="crop/mask/${idir}/${inum}-${tnum}.png"
  cnatmaskimg="cnat/mask/${idir}/${inum}-${tnum}.png"
  
  convert \
    ${fulltrueimg} \
    -alpha Off \
    -gaussian-blur "${brd}x${brd}" \
    -threshold '99%' \
    -negate \
    -crop "${hsize}x${vsize}+${hmin}+${vmin}" \
    ${cropmaskimg}
    
  convert \
    ${cropmaskimg} \
    -scale "${pct}%" \
    -threshold '99%' \
    ${cnatmaskimg}
    
  
  # Extract the text from the "orig" and "true" images:
  for kind in orig true ; do
    fullimg="full/${kind}/${idir}/${inum}.png"
    cropimg="crop/${kind}/${idir}/${inum}-${tnum}.png"
    cnatimg="cnat/${kind}/${idir}/${inum}-${tnum}.png"
    tempimg="/tmp/$$.png"
    
    convert \
      ${fullimg} \
      -alpha Off \
      -crop "${hsize}x${vsize}+${hmin}+${vmin}" \
      ${tempimg}
      
    composite \
      -compose Multiply \
      ${cropmaskimg} \
      ${tempimg} \
      ${cropimg}
      
    convert \
      ${cropimg} \
      -scale "${pct}%" \
      ${cnatimg}
  done
        
}

extr dibc2009 001 000  251   18  1279  244  8.0 # "maedchen ... Diener zu be="
                                         
extr dibc2009 002 000    0   44  1148  172 30.0 # D secundu~ dubium An"
extr dibc2009 002 001    0  166  1148  302 16.0 # liceat ... co~mu="
                                          
extr dibc2009 003 000  146    0   373  279 40.0 # "S" (?)
extr dibc2009 003 001  373    0   861  264 40.0 # "alau"
extr dibc2009 003 002  482  260   540  307 11.0 # "im"
extr dibc2009 003 003  118  308   907  391 18.0 # "Marggafthum Rieder=Lausik,"
extr dibc2009 003 004  459  383   564  420  7.0 # "beneft"
extr dibc2009 003 005  220  419   307  457  7.0 # "deren"
extr dibc2009 003 006  305  418   802  460  7.0 # "Statuten, Recessen, Privilegien,"
extr dibc2009 003 007  331  458   692  489  6.5 # "un andern alten Urtunden."

extr dibc2009 004 000  801   85  1005  178 11.0 # "[ 2 ]"
extr dibc2009 004 001   75  165  1795  356 11.0 # "they are limited ... appointed by the"

extr dibc2009 005 000  187    0  1185  246 10.5 # "der Natur, ... der Koerperwelt"
 
