#! /usr/bin/gawk -f
# Last edited on 2003-07-21 22:22:01 by stolfi

# Reads an html page CTX that is obtained by clicking a "context"
# button in a NEC "Citation Search" report page.
# Outputs a list of links to the papers listed in the
# CTX page.

BEGIN { grab = 0; split ("", fld); printf "@\n" > "/dev/stderr"; }

/<hr> *$/ {
  printf "*" > "/dev/stderr";
  grab = 1; next;
}

/^[<][a]/ { 
  if (grab)
    { lin = $0;
      printf "!\n" > "/dev/stderr";
      if (match(lin, /[<] *a +href *[=] *[\"]([^\"<>]*)[\"] *[>]/, fld))
        { url = fld[1];
          tit = fld[2];
          gsub(/[ ]/, "_", tit);
          printf "%s \n", url, tit;
        }
      else
        { data_error(("no url?")); }
      grab = 0;
    }
  next;
}

// { next; }

function data_warning(msg)
{
  printf "%s:%d: ++ Warning: %s\n", FILENAME, FNR, msg > "/dev/stderr";
  printf "   $0 = \"%s\"\n", $0 > "/dev/stderr";
}

function data_error(msg)
{
  printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr";
  printf "   $0 = \"%s\"\n", $0 > "/dev/stderr";
  abort = -1;
  exit abort;
}