# Last edited on 2013-03-18 12:07:16 by stolfilocal

ANALYSIS OF IMAGEMAGICK'S RESIZE

  make-dot-images.gawk > data/dots.pgm
  convert dots.pgm -resize '50x480!' PGM:- | pnmnoraw > dots-r50.pgm
  convert dots.pgm -resize '70x480!' PGM:- | pnmnoraw > dots-r70.pgm
  convert dots.pgm -resize '20x480!' PGM:- | pnmnoraw > dots-r20.pgm
  
  Converting the PGM samples to filter weights:
  
    gawk '//{ printf "    %d", $1; for (i=2; i<=NF; i++) { printf " %+8.5f", ($(i)-16383)/(49152 - 16383); }; printf "\n"; }'
  
  Filter used for expansion 60-->70:

    xc weights from xc to x
    -- --------------------------------------------------------------------------------------------------------------------
    31 -0.01019 +0.08755 +0.78144 +0.19564 -0.02206
    32   .      -0.00137 +0.02130 +0.71870 +0.33092 -0.03088
    33   .        .        .      -0.01526 +0.61061 +0.47530 -0.03064
    34   .        .        .        .      -0.03064 +0.47530 +0.61061 -0.01526
    35   .        .        .        .        .      -0.03088 +0.33092 +0.71870 +0.02130 -0.00137
    36   .        .        .        .        .        .      -0.02206 +0.19564 +0.78144 +0.08755 -0.01019
    37   .        .        .        .        .        .        .      -0.01019 +0.08755 +0.78144 +0.19564 -0.02206
    38   .        .        .        .        .        .        .        .      -0.00137 +0.02130 +0.71870 +0.33092 -0.03088

  Filter used for reduction 60-->50:

    xc  weights from xc to x
    --  ----------------------------------------------------------------------------------------------------------
    31 +0.00067 -0.01623 +0.06405 +0.82471 -0.05078 +0.01242
    32   .      +0.00613 -0.05664 +0.22567 +0.74393 -0.11120 +0.02508
    33   .        .      +0.01553 -0.09744 +0.41365 +0.59810 -0.12182 +0.02408
    34   .        .        .      +0.02408 -0.12182 +0.59810 +0.41365 -0.09744 +0.01553
    35   .        .        .        .      +0.02508 -0.11120 +0.74393 +0.22567 -0.05664 +0.00613
    36   .        .        .        .        .      +0.01242 -0.05078 +0.82471 +0.06405 -0.01623 +0.00067
    37   .        .        .        .        .      +0.00067 -0.01623 +0.06405 +0.82471 -0.05078 +0.01242
    38   .        .        .        .        .        .      +0.00613 -0.05664 +0.22567 +0.74393 -0.11120 +0.02508

  Filter used for reduction 60-->20:

    xc  weights from xc to x
    --  ----------------------------------------------------------------------
    31 +0.00424 -0.03119 +0.12728 +0.27090 -0.04877 +0.01038
    32   .        .        .      +0.33431
    33   .      +0.01038 -0.04877 +0.27090 +0.12728 -0.03119 +0.00424
    34   .      +0.00424 -0.03119 +0.12728 +0.27090 -0.04877 +0.01038
    35   .        .        .        .      +0.33431
    36   .        .      +0.01038 -0.04877 +0.27090 +0.12728 -0.03119 +0.00424
    37   .        .      +0.00424 -0.03119 +0.12728 +0.27090 -0.04877 +0.01038
    38   .        .        .        .        .      +0.33431

  Plotting the filters:
  
    analyze-imagemagick-filter.sh 20 50 70 
    
    
COMBINING TRUTH IMAGES

  Combining the "truth" images (positive and negative) into single image:

    cd data
    for img in "storefronts" ; do
      convert ${img}-truth0.png ${img}-truth0.pgm
      convert ${img}-truth1.png ${img}-truth1.pgm
      pnmarith -max ${img}-truth0.pgm ${img}-truth1.pgm > ${img}-truth01.pgm
      convert ${img}-truth01.pgm ${img}-truth01.png
    done
    
  Then cleaned up storefronts-truth01.png by hand, excluding letters that are

    * tilted more than 30 degrees, or rotated 90 degrees

    * not legible in isolation in the binarized image

    * cursive or hollowed
  
  Also made sure that the letters were separated by at least 1 pixel of black.

  Plotting the box dimensions:
  
    gnuplot
    set xrange [0:60]
    set yrange [0:60]
    plot "sgtr/storefronts-legup01.txt" using (column(5)+rand(0)-0.5):(column(6)+rand(0)-0.5) with points

# EXTRACTING THE BBOXES OF THE TRUTH TABLES

  Segmentation reference files:

    imagenames=( storefronts-legup01 )

  This should create files "sgtr/${img}-chs.txt" and "sgtr/${img}-wds.txt
    
    make all-segtruth
    
  Assigning word indices to character file:
  
    for img in ${imagenames[@]} ; do 
      sort-segs-by-words.gawk \
          -v charFile=sgtr/${img}-chs.txt \
          -v wordFile=sgtr/${img}-wds.txt \
        > sgtr/${img}-chs-wds.txt 
    done