#! /usr/bin/gawk -f
# Last edited on 2020-12-27 14:06:28 by jstolfi

BEGIN \
  {
    # Subsamples the synapse table of the network provided by Nilton Kamiji to 
    # Jorge Stolfi on 2020-12-03, reducing the number of neurons to 10%.
    
    # In the reduction, keeps only the neurons whose index is a multiple
    # of 10. Keeps a synapse only if its post-neuron is retained; if the
    # pre-synaptic neuron is not retained, it is mapped to the nearest
    # retained neuron below it, cycling through the neuron group.
    
    # User must specify with "-v" the variable {renfname},
    # the name of the table with the mapping from old to new
    # pre-synaptic neurons.

    abort = -1;
    
    if (renfname == "") { arg_error("must define {renfname}"); }
    
    nng = 8 # Number of neuron groups.
    
    nne_old = 77169  # Expected number of neurons in input network.
    nne_new = 7717  # Expected number of neurons in subsampled network.

    # Define the number {nne_per_ng_new[ing]} of neurons per group {ing}
    # in reduced network:
    split("2069 583 2192 548 485 106 1440 294", nne_per_ng_new)
    ashift(nng, nne_per_ng_new)

    # Read the reassignmen of old neurons to new neurons 
    # {ine_map[0..nne_old-1]}:
    split("", ine_map)
    read_ine_map(nne_old, nne_new, renfname, ine_map)
    
    # Write the preamble:
    printf "  N_layers: %d\n", nng
    ine_start = 0
    for (ing = 0; ing < nng; ing++)
      { ine_lim = ine_start + nne_per_ng_new[ing]
        printf "  %d %d\n", ine_start + 1, ine_lim - 1 + 1
        ine_start = ine_lim
      }
    printf "\n"

    # Start sampling the synapses:
    ine_max_old = -1 # Largest neuron index in old network.
    ine_max_new = -1 # Largest neuron index in new network.
    nse_old = 0;
    nse_new = 0;
  }
  
(abort >= 0) { exit(abort); }

/N_layers/ { next; }

/^ *[0-9]+ +[0-9]+ *$/ { next; }

/^ *$/ { next; }

/^ *[0-9]+[,] *[0-9]+[,] *[-.0-9]+[,] *[.0-9]+ *$/ \
  {
    nse_old++;
    gsub(/^ +/, "", $0);
    gsub(/ +$/, "", $0);
    gsub(/ *[,] */, " ", $0);
    ine_pre_old = $1 - 1;
    ine_pos_old = $2 - 1;
    wt = $3 + 0.0; # Weight (pA).
    dt = $4 + 0.0; # Delay (ms)

    if ((ine_pre_old < 0) || (ine_pre_old >= nne_old))
      { data_error(("bad ine_pre = \"" ine_pre "\""), $0); }
    if ((ine_pos_old < 0) || (ine_pos_old >= nne_old))
      { data_error(("bad ine_pos = \"" ine_pos "\""), $0); }
    
    # Remember largest index seen:
    if (ine_pre_old > ine_max_old) { ine_max_old = ine_pre_old; }
    if (ine_pos_old > ine_max_old) { ine_max_old = ine_pos_old; }

    # Decide if synapse stays:
    if ((ine_pos_old % 10) == 0)
      { # Convert to subsampled neuron indices:
        ine_pre_new = ine_map[ine_pre_old]
        ine_pos_new = ine_map[ine_pos_old]
        # Write it out:
        printf "%d, %d, %.4f, %.5f\n", ine_pre_new + 1, ine_pos_new + 1, wt, dt
        nse_new++
        # Remember largest index seen:
        if (ine_pre_new > ine_max_new) { ine_max_new = ine_pre_new; }
        if (ine_pos_new > ine_max_new) { ine_max_new = ine_pos_new; }

      }
    next
  }

// { printf "** line %d: bad format\n  [[%s]]\n", FNR, $0 > "/dev/stderr"; exit(1); }

END \
  {
    if (abort >= 0) { exit(abort); }

    # Print synapse counts:
    printf "synapses: old %d  new %d\n", nse_old, nse_new > "/dev/stderr"
  }

function ashift(n,arr, i) 
  { # Assumes {arr} is an array with elements indexed {1..n}.
    # Shifts them so that they are indexed {0..n-1}.
    for (i = 0; i < n; i++) { arr[i] = arr[i+1]; }
    delete arr[n];
  }

function read_ine_map(nne_old,nne_new,fname,tbl,   ine_old,ine_new,nlin,lin,ntbl,fld,nfld) 
  {
    nlin = 0; # Number of lines read (including blanks and comments).
    ntbl = 0; # Number of table entries read.
    while((getline lin < fname) > 0) { 
      nlin++;
      if (! match(lin, /^[ \011]*([#]|$)/))
        { nfld = split(lin, fld, " ");
          if ((nfld >= 3) && (fld[3] ~ /^[#]/)) { nfld = 2; }
          if (nfld != 2) { tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); }
          if (ntbl >= nne_old)
            { tbl_error(fname, nlin, ("too many entries: \"" lin "\"")); }
          ine_old = fld[1] + 0;
          if (ine_old != ntbl) 
            { tbl_error(fname, nlin, ("invalid old neuron index: \"" lin "\"")); }
          ine_new = fld[2] + 0;
          if ((ine_new < 0) || (ine_new >= nne_new)) 
            { tbl_error(fname, nlin, ("invalid new neuron index: \"" lin "\"")); }
          tbl[ine_old] = ine_new;
          ntbl++;
        }
    }
    if ((ERRNO != "0") && (ERRNO != "")) { tbl_error(fname, nlin, ERRNO); }
    close (fname);
    if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); }
    # printf "loaded %6d map pairs\n", ine > "/dev/stderr"
  }

function tbl_error(f,n,msg)
  { printf "%s:%d: %s\n", f, n, msg > "/dev/stderr";
    abort = 1;
    exit(1)
  }

function data_error(msg,lin)
  { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr";
    if (lin != "") { printf "  [[%s]]\n", lin > "/dev/stderr"; }
    abort = 1;
    exit(1)
  }

function arg_error(msg)
  { printf "** %s\n", msg > "/dev/stderr";
    abort = 1;
    exit(1)
  }