#define PROG_NAME "frb_correlations"
#define PROG_DESC "analyze correlations between Fourier coefs of a set of signals"
#define PROG_VERS "1.0"

/* Copyright  2004 by the State University of Campinas (UNICAMP). */
/* See the copyright, authorship, and warranty notice at end of file. */
/* Last edited on 2006-04-15 11:14:21 by stolfi */

/* Created aug/2004 by Jorge Stolfi, UNICAMP. */
/* Based on {BITCorrelation.m3} of dec/1999 by 
  Helena C. G. Leito and Jorge Stolfi, UNICAMP. */

/* Last edited on 2006-03-19 12:11:36 by stolfi */

#define PROG_HELP \
  "  " ${PROG_NAME} " \\\n"
  "    -outFile FILE \\\n"
  "    -input NAME \\\n"
  "    -inName NAME \\\n"
  "    -curvePrefix NAME [ -curveDir DIR ] \\\n"
  "    -outName  NAME \n"
  "    -nSamples int  \\\n"
  "    -output  NAME \\\n"
  "    -maxShift NUM \\\n"
  "    -nSteps NUMBER [ -maxCands NUMBER ]\n"
  "    -band BAND [ -extension EXT ] \\\n"
  "    [ -log ] -name NAME \\\n"
  "    -ixMin INT -ixMax INT  \\\n"
  "    -iyMin INT -iyMax INT  \\\n"
  "    [ -avgFile FILE ] \\\n"
  "    -nFiles INT  \\\n"
  "    -nFreqs INT  \\\n"
  "    FILE1 FILE2 ... FILEn\n"
  "    -tagchar *\n"

#define _GNU_SOURCE

#include <frb_types.h>
#include <frb_signal.h>
#include <frb_segment.h>
#include <frb_candidate.h>
#include <frb_curve.h>
#include <frb_match.h>
#include <frb_fourier.h>
#include <frb_analyze.h>

#include <argparser.h>
#include <affirm.h>
#include <r3.h>

#include <assert.h>
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <values.h>

int main(int argc, char**argv)
  {

    /* Correlation analysis: */

    /* Read strings and accumulate products: */
    frb_signal_t *rchs = frb_read_n_signals
      ( o->nFiles, o->nSamples, o->inFile, log );

    if (Text.Empty(o->avgFile))
      { fprintf(stderr, "assuming zero averages.\n");
        for (i = 0; i < n; i++) { m[i] = 0.0; }
      }
    else
      { fprintf(stderr, o->avgFile & " ...");
        FILE *rd = open_read(o->avgFile, TRUE);
        frb_signal_read_data_t chd = frb_signal_read(rd, /* headerOnly */ FALSE);
        frb_signal_t *c = &(chd.c);
        affirm(c->nel >= o->iMax , "bug");
        for (i = 0; i < n; i++) { m[i] = c->el[i]; }
      }
      fprintf(stderr, "\n");
    }

    fprntf(stderr, "\n\ndone.\n");
    return 0;
  }

void frb_analyze_correlations
  (
    frb_signal_t *sx,   /* Signals to use as the {X} axis. */
    frb_signal_t *avgx, /* {avgx->el[i]} is mean value of {sx[k].el[i]} */
    int ixMin,          /* Minimum index {i} for {sx} signals. */
    int ixMax,          /* Maximum index {i} for {sx} signals. */
    frb_signal_t *sy,   /* Signals to use as the {Y} axis. */
    frb_signal_t *avgy, /* {avgy->el[j]} is mean value of {sy[k].el[j]}. */
    int iyMin,               /* Minimum index {j} for {sy} signals. */
    int iyMax,               /* Maximum index {j} for {sy} signals. */
    int nf                   /* Number of signals. */
  )
  {
    
    
    affirm((avgx->nel == ns) || (avgx->nel == 0), "bad avgx signal size");
    affirm((avgy->nel == ns) || (avgy->nel == 0), "bad avgy signal size");

    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");
    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");

    int nx = ixMax - ixMin + 1;
    int ny = iyMax - iyMin + 1;
    double cov[nx*ny];
    double varx[nx], vary[ny];
    double xk[nx], yk[nx];

    /* Initialize variance vectors and covariance matrix. */
    for (i = 0; i < nx; i++) { varx[i] = 0; }
    for (j = 0; j < ny; j++) { vary[j] = 0; }
    for (i = 0; i < nx; i++)
      for (j = 0; j < ny; j++)
        { cov[i*nx + j] = 0.0; }

   /* Accumulate products: */
   int nok = 0;
   for (k = 0; k < nf; k++)
     { frb_signal_t *sxk = &(sx[k]);
       frb_signal_t *syk = &(sy[k]);
       if ((sxk->nel > 0) && (syk->nel > 0))
         { /* Extract the deviations and accumulate the variance sums: */
           ExtractValues(sxk, avgx, ixMin, ixMax, xk);
           for (i = 0; i < nx; i++) { double xki = xk[i]; varx[i] += xki*xki; }

           ExtractValues(syk, avgy, iyMin, iyMax, yk);
           for (j = 0; j < ny; j++) { double ykj = yk[j]; vary[j] += ykj*ykj; }

           /* accumulate the covariance sums: */
           for (i = 0; i < nx; i++)
             { double xki = xk[i];
               for (j = 0; j < ny; j++)
                 { double ykj = yk[j];
                   cov[i*nx + j]+= xki*ykj;
                 }
             }
           nok++;
         }
      }

    /* Compute variances and covariances: */
    for (i = 0; i < nx; i++) { varx[i] /= nok; }
    for (j = 0; j < ny; j++) { vary[j] /= nok; }
    for (i = 0; i <= n-1; i++)
      for (j = 0; j <= n-1; j++)
        { cov[i*nx + j] /= nok; }

    /* Print results: */
    frb_print_covariances(cov, varx, ixMin, ixMax, vary, iyMin, iyMax);
    frb_print_correlations(cov, varx, ixMin, ixMax, vary, iyMin, iyMax);
  }

void frb_extract_elements
  ( frb_signal_t *s;
    frb_signal_t *avg;
    int iMin,
    int iMax,
    double *v
  )
  {
    affirm((0 <= iMin) && (iMin <= iMax), "bad iMin, iMax");
    affirm(s->nel > iMax, "bad s signal size");
    affirm((avg->nel == 0) || (avg->nel > iMax), "bad avg signal size");

    int ns = iMax - iMin + 1;
    for (i = 0; i < ns; i++)
      { double ai = (avg->nel == 0 ? 0 : avg->el[iMin + i]);
        v[i] = s->el[iMin + i] - ai;
      }
  }

void frb_print_covariances
  ( double *cov,    /* Matrix of covariances. */
    double *varx,   /* Variances of {X} variables. */
    int ixMin,      /* Actual initial {X} index. */
    int ixMax,      /* Actual final {X} index. */
    double *vary,   /* Variances of {Y} variables. */
    int iyMin,      /* Actual initial {Y} index. */
    int iyMax       /* Actual final {Y} index. */
  )
  {
    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");
    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");

    int nx = ixMax - ixMin + 1;
    int ny = iyMax - iyMin + 1;

    /* Choose column width, precision, and element format: */
    int width, prec;
    char *fmt;
    frb_choose_element_format(cov, varx, nx, vary, ny, &width, &prec, &fmt);

    frb_print_headers("Covariance matrix", iyMin, iyMax, width, TRUE);

    /* Print Y variances: */
    fprintf(stdout, "%3s ", "");
    fprintf(stdout, "%*s", width, "");
    for (j = 0; j < ny; j++)
      { fprintf(stdout, " ");
        fprintf(stdout, fmt, vary[j]);
      }
    fprintf(stdout, "\n\n");

    /* Print matrix: */
    for (i = 0; i < nx; i++)
      { double vi = ;
        fprintf(stdout, fmt, width, varx[i]);
        fprintf(stdout, " ");
        for (j = 0; j < ny; j++)
          { fprintf(stdout, " ");
            fprintf(stdout, fmt, cov[i*nx + j]);
          }
        fprintf(stdout, "\n");
      }
    free(fmt);
  }

#define FRB_CORR_PREC (3)
#define FRB_CORR_WIDTH (FRB_CORR_PREC + 3)

void frb_print_correlations
  ( double *cov,    /* Matrix of covariances. */
    double *varx,   /* Variances of {X} variables. */
    int ixMin,      /* Actual initial {X} index. */
    int ixMax,      /* Actual final {X} index. */
    double *vary,   /* Variances of {Y} variables. */
    int iyMin,      /* Actual initial {Y} index. */
    int iyMax       /* Actual final {Y} index. */
  )
  {
    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");
    affirm((0 <= ixMin) && (ixMin <= ixMax), "bad ixMin, ixMax");

    int nx = ixMax - ixMin + 1;
    int ny = iyMax - iyMin + 1;

    /* Build element format: */
    char *fmt;
    asprintf(&fmt, "%%%d.%df", FRB_CORR_WIDTH, FRB_CORR_PREC);

    frb_print_headers("Correlation matrix", iyMin, iyMax, FRB_CORR_WIDTH, FALSE);

    /* Print matrix: */
    for (i = 0; i <= n-1; i++)
      { fprintf(stdout, "%3d ", i + iMin);
        for (j = 0; j <= n-1; j++)
          { double rij = cov[i*nx + j]/sqrt(varx[i]*vary[j] + 1.0d-100);
            fprintf(stdout, " ");
            fprintf(stdout, fmt, rij);
          }
        fprintf(stdout, "\n");
      }
    free(fmt);
  }

void frb_print_headers(char *title, int iMin, int iMax, int width, bool_t vars)
  {
    fprintf(stdout, "\n\n%s\n\n", title);

    /* Print column headers */
    fprintf(stdout, "%3s ", "");
    if (vars) { fprintf(stdout, "%*s", width, ""); }
    for (j = iMin; j <= iMax; j++)
      { fprintf(stdout, " ");
        fprintf(stdout, "%*d", width, j);
      }
    fprintf(stdout, "\n");

    /* Print dashes */
    fprintf(stdout, "--- ");
    for (j = iMin; j <= iMax; j++)
      { fprintf(stdout, " ");
        fprintf(stdout, "%*.*s", width, width, "-------------------");
      }
    fprintf(stdout, "\n");
  }

#define FRB_SIG_DIGITS (4)
#define FRB_MIN_WIDTH (3)
#define FRB_MAX_WIDTH (SIG_DIGITS+7)   /* Allows sign, period, {d}, and exponent */
#define FRB_MAX_PREC (MAX_WIDTH - 3)   /* Allows sign, period, and one integer digit */

/* These values must be consistent with the above: */
#define FRB_MAX_FIX_VAL (9999999999.0) /* = 10^MAX_WIDTH - 1 */
#define FRB_MIN_FIX_VAL (0.0001000)    /* = 10^-(MAX_PREC - SIG_DIGITS) */
#define FRB_MIN_ROUND (0.000000005)    /* = 0.5 * 10^-MAX_PREC */
#define FRB_MAX_ROUND (0.5)

void frb_choose_element_format
  (
    double *cov,
    double *varx,
    int nx,
    double *vary,
    int ny,
    int *widthp,
    int *precp,
    char **fmtp
  )
  {
    double maxVal = 0.0;
    char *style;
    int width, prec;

    /* Find maximum abs entry of variances and covariances: */
    for (i = 0; i <= nx; i++) { maxVal = frb_dmax(maxVal, fabs(varx[i])); }
    for (j = 0; j <= ny; j++) { maxVal = frb_dmax(maxVal, fabs(vary[j])); }
    for (i = 0; i <= nx; i++)
      for (j = 0; j <= ny; j++)
        { maxVal = frb_dmax(maxVal, fabs(cov[i*nx+j])); }

    if (maxVal == 0.0)
      { style = "f"; width = FRB_MIN_WIDTH; prec = 0; }
    else if (maxVal < FRB_MIN_FIX_VAL OR maxVal + FRB_MAX_ROUND > FRB_MAX_FIX_VAL)
      { style = "e"; width = FRB_SIG_DIGITS + 7; prec = FRB_SIG_DIGITS - 1; }
    else
      { style = "f";
        prec = FRB_MAX_PREC; width = 3 + prec;
        while (maxVal/10.0 + FRB_MIN_ROUND > FRB_MIN_FIX_VAL)
          { maxVal = maxVal/10.0;

            if (prec >= FRB_SIG_DIGITS)
              { prec = prec - 1; width = width - 1; }
            else if (prec > 0)
              { prec = prec - 1; }
            else
              { width = width + 1; }
          }
      }

    char *fmt = NULL;
    asprintf(&fmt, "%%%d.%d%s", width, prec, style);
    (*widthp) = width; (*precp) = prec; (*fmtp) = fmt;
  }

frb_options_t *frb_get_options(int argc, char **argv)
  {
    argparser_t *pp = argparser_new(stderr, argc, argv);
    argparser_set_help(pp, PROG_NAME " version " PROG_VERS ", usage:\n" PROG_HELP);
    argparser_set_info(pp, PROG_INFO);
    argparser_process_help_info_options(pp);
    
    frb_options_t *o = malloc(sizeof(frb_options_t));

    argparser_get_keyword(pp, "-tag");
    o->tag = argparser_get_next(pp);

    argparser_get_keyword(pp, "-nFreqs");
    o->nFreqs = argparser_get_next_int(pp, 1);

    argparser_get_keyword(pp, "-nFiles");
    o->nFiles = argparser_get_next_int(pp, 1);

    argparser_get_keyword(pp, "-input");
    o->input = argparser_get_next(pp);

    argparser_get_keyword(pp, "-curvePrefix");
    o->curvePrefix = argparser_get_next(pp);

    if (argparser_keyword_present("-curveDir"))
      { o->curveDir = argparser_get_next(pp); }
    else
      { o->curveDir = "."; }

    argparser_get_keyword(pp, "-band");
    o->band = argparser_get_next_int(pp, );

    if (argparser_keyword_present("-extension"))
      { o->extension = argparser_get_next(pp); }
    else
      { o->extension = ".flc"; }

    argparser_get_keyword(pp, "-output");
    o->output = argparser_get_next(pp);

    /* Matching parameters: */

    argparser_get_keyword(pp, "-maxShift");
    o->maxShift = pp.getNextLongReal();

    /* Segment extraction parameters: */

    argparser_get_keyword(pp, "-nSteps");
    (o->nSamples-1)= argparser_get_next_int(pp, 2,4096);

    if (argparser_keyword_present("-maxCands"))
      { o->maxCands = argparser_get_next_int(pp, 1); }
    else
      { o->maxCands = LAST(int); }

    argparser_get_keyword(pp, "-inName");
    o->inName = argparser_get_next(pp);

    argparser_get_keyword(pp, "-outName");
    o->outName = argparser_get_next(pp);

    argparser_get_keyword(pp, "-name");
    o->name = argparser_get_next(pp);

    argparser_get_keyword(pp, "-outFile");
    o->outFile = argparser_get_next(pp);

    argparser_get_keyword(pp, "-nSamples");
    o->nSamples = argparser_get_next_int(pp, 1);

    o->log = argparser_keyword_present("-log");

    argparser_get_keyword(pp, "-ixMin");
    o->ixMin = argparser_get_next_int(pp, FIRST(INT),LAST(INT));

    argparser_get_keyword(pp, "-ixMax");
    o->ixMax = argparser_get_next_int(pp, o->iMin,o->iMin+30);

    argparser_get_keyword(pp, "-iyyMin");
    o->iyMin = argparser_get_next_int(pp, FIRST(INT),LAST(INT));

    argparser_get_keyword(pp, "-iMax");
    o->iyMax = argparser_get_next_int(pp, o->iMin,o->iMin+30);

    if (argparser_keyword_present("-avgFile"))
      { o->avgFile = argparser_get_next(pp); }
    else
      { o->avgFile = ""; }

    argparser_skip_parsed();

    int nFiles = argc - pp.next;
    if (nFiles < 2) { argparser_error("needs at least two files"); }
    o->inFile = malloc(nFiles * sizeof(char *));
    for (i = 0; i < nFiles; i++)
      { char *f = argparser_get_next(pp);
        o->inFile[i] = f;
        if ((strlen(f) > 1) && (f[0] == '-'))
          { argparser_error(txtcat ("bad file name :", f)); }
      }

    argparser_finish();
    return o;
  }

frb_signal_t *frb_read_n_signals
  ( int nFiles,
    int nSamples,
    char **file,
    bool_t log
  )
  {
    int i;
    frb_signal_t *chs = malloc(nFiles * sizeof(frb_signal_t));
    for (i = 0; i < nFiles; i++)
      { fprintf(stderr, "%s ...", file[i]);

        FILE * rd = open_read(file[i], TRUE);
        frb_signal_read_data_t rp = frb_signal_read(rd, /* headerOnly */ FALSE);
        frb_signal_t *p = &(cd->chain);

        fclose(rd);
        affirm(p->nel == nSamples, "bug");

        if (log) { frb_to_log(rp.unit/2.0, p); }

        chs[i] = *p;
      }

    return chs;
  }
