#! /usr/bin/gawk -f
# Last edited on 2019-04-23 19:07:54 by stolfilocal

# Replaces any named online reference <ref name=FOO> by a remote link <ref name=FOO/>,
# and writes the extracted ref to a file ".therefs".

BEGIN {
  ofile = ".therefs"
}

// { 
  lin = $0
  new=""
  while (match(lin, /<ref *name=[^<>\/]*>/) > 0) {
    # Skip part before the <ref name=...>
    new = (new substr(lin, 1, RSTART-1))
    lin = substr(lin, RSTART)
    # Extract the tag <ref name=.../>:
    tag = (substr(lin,1,RLENGTH-1) "/>")
    # Find the closing </ref>:
    if (match(lin, /<[ ]*[\/][ ]*ref[ ]*>/) == 0) {
      printf "line %d: ** ref not closed [%s]\n", FNR, lin > "/dev/stderr"
      break
    } else {
      #Extract the text of the reference:
      ref = substr(lin, 1, RSTART-1+RLENGTH)
      lin = substr(lin, RSTART+RLENGTH)
      # Write the <ref name=...> to the output file:
      print ref > ofile
      print "" > ofile
      # Copy the <ref .../> to the new output line, with final "/":
      new = (new tag)
    }
  }
  new = (new lin); lin = "";
  print new
  next
}

END {
  close(ofile)
}