#! /bin/bash


# Input is list of wikipedia refs <ref name={xxx}>{yyy}</ref>, each in
# a single line, one per line, where {xxx} is ascii
# letters, digits, or punctuation [-_.:"]. If the contents {yyy}
# is a {{cite ...}} template, undoes it.

# Blank lines are passed through.

tmp="/tmp/$$"

# Step 1. Convert each reference to a Python3 dictionary,
# and create a little program to process the latter:

pyprog="${tmp}_dic.py"
rm -f ${pyprog}
echo "#! /usr/bin/python3" >> ${pyprog}
echo "import convert_python_dict_to_ref"
echo "refs = [\\" >> ${pyprog}
gawk -b -f convert_cite_to_dict.gawk \
    ' /^[ ]*$/ { print ""; next; }
      /^[ ]*<ref name=[-_.:"a-zA-Z0-9]+ *>/ {
        dic = convert_cite_to_dict($0)
        printf "    %s,\n", dic
        next;
      }
      // {
        printf "** bad format «%s»\n", $0 > "/dev/stderr"; 
      } 
    ' \
  >> ${pyprog}
echo "  ]" >> ${pyprog}
echo "for dic in refs:" >> ${pyprog}
echo "  ref = convert_python_dict_to_ref(dic)" >> ${pyprog}
echo "  print(\"%s\\n\" % ref)" >> ${pyprog}
echo "  print(\"\")" >> ${pyprog}

# Step 2: run that Python program to regenerate the list of
# 

#

 | sort \
 | gawk -b \
   ' /^@@/ { 
       lin = $0; 
       gsub(/@@[0-9]*[a-z]*[ ]+/, "", lin);
       print lin; print "";
       next;
     }
     // { 
       printf "** bad re-format «%s»\n", $0 > "/dev/stderr"; 
     } 
   '