#! /usr/bin/gawk -f # Last edited on 2019-04-23 19:07:54 by stolfilocal # Replaces any named online reference <ref name=FOO> by a remote link <ref name=FOO/>, # and writes the extracted ref to a file ".therefs". BEGIN { ofile = ".therefs" } // { lin = $0 new="" while (match(lin, /<ref *name=[^<>\/]*>/) > 0) { # Skip part before the <ref name=...> new = (new substr(lin, 1, RSTART-1)) lin = substr(lin, RSTART) # Extract the tag <ref name=.../>: tag = (substr(lin,1,RLENGTH-1) "/>") # Find the closing </ref>: if (match(lin, /<[ ]*[\/][ ]*ref[ ]*>/) == 0) { printf "line %d: ** ref not closed [%s]\n", FNR, lin > "/dev/stderr" break } else { #Extract the text of the reference: ref = substr(lin, 1, RSTART-1+RLENGTH) lin = substr(lin, RSTART+RLENGTH) # Write the <ref name=...> to the output file: print ref > ofile print "" > ofile # Copy the <ref .../> to the new output line, with final "/": new = (new tag) } } new = (new lin); lin = ""; print new next } END { close(ofile) }