#! /usr/bin/gawk -f

# To be included by {remove_cite_templates.sh}

function convert_cite_to_dict(ref) {
  # Converts a named citation {ref} in the formal "<ref
  # name={xxx}>{yyy}</ref>" to a python3 dictionary, formatted as a
  # string "{ ... }". The name {xxx} must be ascii letters, digits, or
  # punctuation [-_.:"].
  
  # The ref's name becomes the value (a string) of the dict with key
  # 'refname' and value {xxx}. If the contents is a {{cite}} template,
  # each of its arguments becomes a key-value pair where the key is the
  # name of the parameter and the value is a string literal. If the
  # contents is not a {{cite}} template, it becomes a single pair with
  # key 'contents'.
  
  pat = /^[ ]*[<]ref[ ]+name[ ]*[=][ ]*([-_.:\"]+)[ ]*[>](.*)[<][ ]*[/][ ]*ref[ ]*[>][ ]*$/
  
  split("", fld)
  dic = "{"
  if (! match(ref, pat, fld)) { 
    printf "** bad format [%s]\n", ref > "/dev/stderr"; exit(1)
  }
  dic = ( dic " refname: \"" fld[1] "\",")
  cont = fld[2]
  gsub(/^[ ]+/, "", cont)
  gsub(/[ ]+$/, "", cont)
  if (match(cont, /{{ *cite[ A-Za-z]*}}/, fld)) {
    cite = fld[1]
    while (match(cite, /^[ ]*[|][ ]*([^|=]*)[ ]*[=][ ]*([^|]*)/, fld)) {
      key = fld[1]
      val = fld[2]
      dic = (dic " " key ": \"" val "\",")
      cite = fld[2]
    }
  } else {
    gsub(/["]/, "\\\"", cont)
    dic = (dic " contents: \"" cont "\",")
  }
  dic = (dic "}")
  return dic
}