#! /bin/bash


# Input is list of wikipedia refs <ref name={xxx}{YYYY}{z}>...</ref>, in
# a single line, one per line, on column 1, where {xxx} is ascii
# letters, {YYYY} is a 4-digit year, and {z} is one or more lowercase
# letters. Sorts them by year then by {xxx}. 

# The {YYYY}{z} part may be omitted, in which case it assumed {YYYY}{z}
# is "0000" iBlank lines are discarded and the a blak line in iserted
# after each ref.

gawk -b \
    ' /^[ ]*$/ { next; }
      /^[ ]*<ref / {
        lin = gensub(/^[ ]*<ref[ ]+name[ ]*=[ ]*([a-zA-Z]+)([0-9]*[a-z]*)[ ]*>/, "@@\\2 <ref name=\\1\\2>", "g", $0);
        print lin;
        next;
      }
      // {
        printf "** bad format «%s»\n", $0 > "/dev/stderr"; 
      } 
    ' \
 | sort \
 | gawk -b \
   ' /^@@/ { 
       lin = $0; 
       gsub(/@@[0-9]*[a-z]*[ ]+/, "", lin);
       print lin; print "";
       next;
     }
     // { 
       printf "** bad re-format «%s»\n", $0 > "/dev/stderr"; 
     } 
   '

       
