#! /bin/bash 
# Last edited on 2013-03-02 01:06:12 by stolfilocal

# Works only for "Places and streets created" and "Place polygons edited"
# For roads the watchlist has only revision numbers not object numbers.

tmp="/tmp/$$"

cat \
  | expand \
  | gawk \
      ' BEGIN{ lin = ""; }
        //{ 
          gsub(/^[ ]+/, "", $0);
          gsub(/[ ]+$/, "", $0);
          gsub(/[ ][ ]+/, " ", $0);
          if (lin != "") { lin = (lin " "); }
          lin = ( lin $0 );
          ok = 0;
          while(ok == 0)
            { ok = 1;
              gsub(/^[ ]+/, "", lin);
              if (match(substr(lin,2), /[<][/]?(li|ul|div|.--|div class|select|option)[^<>]*>/))
                { if (substr(lin,RSTART+2,1) == "/")
                    { print substr(lin,1,RSTART+RLENGTH);
                      lin = substr(lin,RSTART+RLENGTH+1);
                    }
                  else
                    { print substr(lin,1,RSTART);
                      lin = substr(lin,RSTART+1);
                    }
                  ok = 0;
                }
            }
        }
        END{ print lin; }
      ' \
  | sed \
      -e 's: *<img align="top" hspace="0" src="/img/menu/user.png">: :g' \
      -e 's: *<span id="wm-panel-[0-9][0-9]*-close-button" class="control-button close"></span>: :g' \
      -e 's: *<span id="wm-panel-[0-9][0-9]*-hide-button" class="control-button hide"></span>: :g' \
      -e 's: *style="[^"<>]*; *": :g' \
      -e 's: *;return false: :g' \
      -e 's: *<p *>:<p>:g' \
      -e 's: *<span class="bullet" *>...</span>: :g' \
      -e 's: *<[/]*select[^<>]*>: :g' \
      -e 's: *<[/]*ul[^<>]*>: :g' \
      -e 's: *<[/]div>: :g' \
      -e 's: *<option[^<>]*>[^<>]*<[/]option>: :g' \
      -e 's: *<span id="jwindow3_body" *></span>: :g' \
      -e 's: *<span class="smcl"><span class="prevnext" [^<>]*> *[A-Z]* *<img src=[^<>]*> *[A-Z]* *</span> *</span>: :g' \
      -e 's: *<table *><tbody *><tr *><td *> *</td *> *<td *> *</td *> *</tr *> *</tbody> *</table>: :g' \
      -e 's: *<.-- js [^<>]* -->: :g' \
  | egrep -v -e '^[ ]*$' \
  | egrep -v -e '^<div (id="wm-panel-([0-9][0-9]*|contents*-[0-9][0-9]*)|class="(container|row|filters|well))[^<>]*> *$' \
  > ${tmp}-lines-raw.htm
  
# cat ${tmp}-lines-raw.htm
# exit 0
  
cat ${tmp}-lines-raw.htm \
  | sed \
      -e 's: *<i class=["]icon icon-user["] *></i>: :g' \
      -e 's: *<[/]*h3 *>: :g' \
      -e 's: *- <span *>deleted</span>: :g' \
      -e 's: *<p class="modifications" *> .* </p>: :g' \
      -e 's: *Rev.[&]nbsp;[0-9]*</a>[&]nbsp; *<i>[0-9]*[ a-z]*</i> *</p>:</a></p>:g' \
      -e 's: *RoadRev.clear(); *: :g' \
      -e 's: *parent.jevals=.jwindow([\\]\(.\)\([^<>\\]*\)[\\]\(.\), *\([0-9]\));.:jwindow(\1\2\3, \4):g' \
      -e 's: *jevals=.jwindow([\\]\(.\)\([^<>\\]*\)[\\]\(.\), *\([0-9]\));.:jwindow(\1\2\3, \4):g' \
      -e 's: *onclick=["]jwindow(./user/1654246., *1) *; *" *>JorgeStolfiBR</a>:></a>:g' \
      -e 's:[&]amp;object_type=[0-9]*[&]amp;rev=[0-9]*: :g' \
      -e 's: *<p> *<a href="/user/\([0-9][0-9]*\)"> *</a> *</p>:{USER=\1}:g' \
      -e 's: *<li onclick="test_zoom[(]\([-+.0-9][.0-9]*\), *\([-+.0-9][.0-9]*\), *\([-+.0-9][.0-9]*\), *\([-+.0-9][.0-9]*\), *\([-+.0-9][.0-9]*\), *\([-+.0-9][.0-9]*\) *);">:<li>{LON1=\1} {LAT1=\2} {LON2=\3} {LAT2=\4} {ZOOM=\5} {FLAG=\6} :g' \
      -e 's: *<a href="/\([0-9]*\)/\([a-z]*\)/" *onclick="jwindow(./\1/\2/., *1) *;">:{OBJ=/\1} {LANG=\2} <a>:g' \
      -e 's: *<a href="/\([a-z][a-z]*\)/\([0-9]*\)/\([a-z]*\)/" *onclick="jwindow(./\1/\2/\3/., *1) *;">:{OBJ=/\1/\2} {LANG=\3} <a>:g' \
      -e 's: *<p> *<a href="/*\([a-z][a-z]*\)/history/show/[?]id=\([0-9]*\)[&]amp;rev=\([0-9]*\)" onclick="jwindow(./\1/history/show/[?]id=\2[&]amp;rev=\3., *2) *;">:{LREV=\1/\2} <p><a>:g' \
      -e 's: *<p> *<a href="/*linear/history/show/[?]id=[&]amp;rev=\([0-9]*\)" onclick="RoadRev.load(2, *\1) *; *" *>:{LREV=\1} <p><a>:g' \
      -e 's: *<p> *<a href="/history/show/[?]id=\([0-9]*\)[&]amp;rev=\([0-9]*\)" onclick="jwindow(./history/show/[?]id=\1[&]amp;rev=\2., *2) *; *" *>:{OBJ=/\1} <p><a>:g' \
      -e 's: *<a>\[unnamed object\]</a>:{NAME=UNNAMED}  :g' \
      -e 's: *<a> *\([A-Za-z0-9`(][^<>{}]*\)</a>:{NAME=\1} :g' \
      -e 's: *\[Linear objects\] Roads: :g' \
      -e 's: *<a> *</a>: :g' \
      -e 's: *<p> *</p>: :g' \
      \
      -e 's: [&]amp; : and :g' \
      -e 's:A[&]amp;N:Andaman and Nicobar:g' \
      -e 's:24[&]amp;25:24 and 25:g' \
      \
      -e 's:^ *<li *> *\([^<>]*\) *</li> *$:@@ \1:g' \
  > ${tmp}-lines.htm
  
# cat ${tmp}-lines.htm 
# exit 0

# printf "=== first 10 ======================================================================\n"  1>&2           
# head -10 ${tmp}-lines.htm 1>&2
# printf "=== last 10 =======================================================================\n"  1>&2           
# tail -10 ${tmp}-lines.htm 1>&2
# printf "===================================================================================\n"  1>&2           

printf "=== still without @@ =================================================================\n"  1>&2           
grep -v -e '^@@' ${tmp}-lines.htm 1>&2
printf "=== still with HTML markup ========================================================\n"  1>&2           
grep -e '[;<>&]' ${tmp}-lines.htm 1>&2
printf "===================================================================================\n"  1>&2           

# Extract items:

cat ${tmp}-lines.htm \
  | sed \
      -e 's:} *{:}|{:g' \
      -e 's:@ *{:@|{:g' \
      -e 's:} *$:}:g' \
  | convert-contrib-lines-to-table.gawk \
  | txtable-reformat  

rm ${tmp}-*.htm

