#define PROG_NAME "spegram" #define PROG_DESC "compute the energy spectrogram of a signal" #define PROG_VERS "1.0" /* Copyright © 2006 by the State University of Campinas (UNICAMP). */ /* See the copyright, authorship, and warranty notice at end of file. */ /* Last edited on 2024-12-21 11:58:31 by stolfi */ #define PROG_HELP \ PROG_NAME "\\\n" \ " -input [ ascii | au ] \\\n" \ " -window [ hann | rect ] {WINDOW_SIZE} [ -stride {WINDOW_STRIDE} ] \\\n" \ " -output [ ascii | pgm ] [ -range {VMAX} {VMIN} ] \\\n" \ " < {INPUT_FILE} \\\n" \ " > {OUTPUT_FILE} \\\n" \ " " argparser_help_info_HELP #define PROG_INFO \ "NAME\n" \ " " PROG_NAME " - " PROG_DESC "\n" \ "\n" \ "SYNOPSIS\n" \ " " PROG_HELP "\n" \ "\n" \ "DESCRIPTION\n" \ " The program reads a sampled signal (e.g. an audio file), and outputs" \ " its energy spectrogram.\n" \ "\n" \ " The input signal is read from standard input and is converted from" \ " its external format to a sequence of double-precision floating-point" \ " samples. This sequence is processed in overlapping batches (frames)" \ " of {WINDOW_SIZE} consecutive samples, where successive batches are" \ " displaced by {WINDOW_STRIDE} samples. The program multiplies each" \ " frame by a user-specified windowing function, computes its discrete" \ " Fourier transform (DFT), and writes its power spectrum to the output file.\n" \ "\n" \ "OPTIONS\n" \ " -input [ ascii | au ]\n" \ " Specifies the format of the input file. The value \"ascii\" means" \ " that the input is a text file (see below for details). The value" \ " \"au\" means that the input is in Sun's AU audio format (usually" \ " identified by the \".au\" filename extension). Only a few variants" \ " of the AU format are presently recognized, including 16-bit signed" \ " integer linear PCM and 32-bit floating-point linear PCM.\n" \ " -window [ hann | rect ] {WINDOW_SIZE} \n" \ " Specifies the type of the apodizing function used to avoid Gibbs" \ " ringing, and the number {WINDOW_SIZE} of samples per frame. Presently" \ " recognizes the types \"hann\" (Hann shifted cosine windowing funcion) and" \ " \"rect\" (rectangular windowing function, i.e. no apodizing). Values" \ " of {WINDOW_SIZE} between 1024 and 4096 are usually adequate for human" \ " speech and music (at 44100 Hz sampling rate).\n" \ " -stride {WINDOW_STRIDE}\n" \ " Specifies the number of samples between successive" \ " frames. Usually this value is positive and less" \ " than {WINDOW_SIZE}. The default is {WINDOW_SIZE/8}.\n" \ "\n" \ " -output [ ascii | pgm ]\n" \ " Specifies the format of the output file. The \"ascii\" option" \ " produces a plain text file in a format suitable for {gnuplot}. The" \ " \"pgm\" option produces a grayscale image in the PGM(5) format." \ "\n" \ argparser_help_info_HELP_INFO "\n" \ "\n" \ "ASCII INPUT FORMAT\n" \ " The plain text (\"ascii\") input format begins with three lines" \ " containing \"channels = {NUM_CHANNELS}\", \"samples = {NUM_SAMPLES}\", and" \ " \"frequency = {SAMPLING_FREQ}\". The sampling frequency should be" \ " in Hertz. Then follow {NUM_SAMPLES} lines, each" \ " containing the sampling time and {NUM_CHANNELS} sample values, one" \ " per channel. The values should be floating-point numbers, separated" \ " by one or more spaces." \ "\n" \ "ASCII OUTPUT FORMAT\n" \ " The plain text (\"ascii\") output format begins with four lines" \ " containing \"tstep = {TIME_STEP}\", \"fstep = {FREQ_STEP}\", " \ " \"freqs = {NUM_FREQUENCIES}\", and \"times = {NUM_TIMES}\". Then" \ " follow {NUM_TIMES} blocks of spectrogram data, one block for each" \ " frame in the spectrogram, separated by blank lines. Each block" \ " consists of {NUM_FREQUENCIES} data lines. Each data line has" \ " three floating-point values: the time at the center of the" \ " frame, the frequency (a multiple of {FREQ_STEP}), and the" \ " spectrogram's value (signal power in that frame and frequency).\n" \ "\n" \ "SEE ALSO\n" \ " pgminvert(1).\n" \ "\n" \ "AUTHOR\n" \ " Created oct/2006 by Jorge Stolfi, State University" \ " of Campinas (UNICAMP), as {egrama.c}.\n" \ " Revised and renamed {spegram.c} on oct/2006 by Jorge Stolfi, UNICAMP." #define stringify(x) strngf(x) #define strngf(x) #x #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* COMMAND LINE ARGUMENTS */ /* Input file type: */ typedef enum { IFM_AU, /* Sun AU format. */ IFM_ASCII /* Plain ASCII format. */ } Input_Fmt_t; /* Window function type: */ typedef enum { WIN_HANN, /* Hann window. */ WIN_RECT /* Rectangular window. */ } Window_Type_t; /* Output file type: */ typedef enum { OFM_PGM, /* PGM output. */ OFM_ASCII /* Plain ASCII output, suitable for {gnuplot}. */ } Output_Fmt_t; /* Command line options: */ typedef struct options_t { Input_Fmt_t ifmt; /* Format of input file. */ Window_Type_t wtype; /* Window function type. */ uint32_t wsize; /* Window width. */ uint32_t stride; /* Samples to skip between successive windows. */ double vmin; /* Map this spectrogram value to 0.0 output. */ double vmax; /* Map this spectrogram value to 1.0 output. */ Output_Fmt_t ofmt; /* Format of output file. */ } options_t; /* PROTOTYPES */ jsaudio_t read_signal(FILE *rd, Input_Fmt_t ifmt); jsaudio_t read_signal_ascii(FILE *rd); void write_output_header(FILE *wr, Output_Fmt_t ofmt, uint32_t ne, uint32_t nf, double tstep, double fstep); void write_frame(FILE *wr, uint32_t nfrq, double pwr[], double t, double fstep, Output_Fmt_t ofmt, double vmin, double vmax); void write_output_trailer(FILE *wr, Output_Fmt_t ofmt); void compute_frame ( jsaudio_t *s, uint32_t skip, Window_Type_t wtype, uint32_t nw, fftw_complex *in, fftw_complex *out, fftw_plan *plan, double pwr[] ); void update_egram_range(uint32_t nfrq, double pwr[], double *emin, double *emax); double safelog(double v, double vmin); options_t* get_options(int32_t argc, char **argv); int32_t main(int32_t argc, char **argv); /* IMPLEMENTATIONS */ int32_t main(int32_t argc, char **argv) { options_t *o = get_options(argc, argv); jsaudio_t si = read_signal(stdin, o->ifmt); fprintf(stderr, "input channels = %d\n", si.nc); fprintf(stderr, "input samples %d\n", si.ns); fprintf(stderr, "sampling frequency = %.4f Hz\n", si.fsmp); /* Sampling step: */ double tstep = 1.0/si.fsmp; fprintf(stderr, "sampling step = %.6f sec\n", tstep); /* Get number {nw} of time steps in window: */ uint32_t nw = o->wsize; fprintf(stderr, "window size = %d samples (%.6f sec)\n", nw, ((double)nw)/si.fsmp); /* Get number {stride} os samples between spectrograms: */ uint32_t stride = o->stride; fprintf(stderr, "frame spacing = %d samples (%.6f sec)\n", stride, ((double)stride)/si.fsmp); /* Get number {nf} of frames in spectrogram: */ uint32_t nf = (si.ns - nw)/stride + 1; fprintf(stderr, "number of frames = %d\n", nf); /* Get number {nfrq} of values (freqs) in each frame of the spectrogram: */ uint32_t nfrq = nw/2 + 1; fprintf(stderr, "number of distinct frequencies = %d\n", nfrq); /* Frequency step: */ double fstep = si.fsmp/nw; fprintf(stderr, "frequency step = %.3f Hz\n", fstep); /* Allocate working storage for Fast Fourier Transform (FFT): */ fftw_complex *in, *out; in = fftw_malloc(nw*sizeof(fftw_complex)); out = fftw_malloc(nw*sizeof(fftw_complex)); /* Precompute parameters for the FFT: */ fftw_plan plan; plan = fftw_plan_dft_1d((int32_t)nw, in, out, FFTW_FORWARD, FFTW_ESTIMATE); /* Output header: */ write_output_header(stdout, o->ofmt, nfrq, nf, tstep, fstep); /* Compute frames and output them: */ double pwr[nfrq]; /* Power spectrum of slice. */ double emin = +INFINITY; double emax = -INFINITY; for (uint32_t ifrm = 0; ifrm < nf; ifrm++) { /* Compute index {skip} of first sample in frame: */ uint32_t skip = ifrm*stride; /* Compute time at center of window: */ double t = (skip + 0.5*nw)/si.fsmp; /* Compute the power spectrum: */ compute_frame(&si, skip, o->wtype, nw, in, out, &plan, pwr); update_egram_range(nfrq, pwr, &emin, &emax); /* Write it out: */ write_frame(stdout, nfrq, pwr, t, fstep, o->ofmt, o->vmin, o->vmax); } /* Show egram range: */ fprintf(stderr, "spectrogram value range = [ %14.6e _ %14.6e ]\n", emin, emax); /* Output header: */ write_output_trailer(stdout, o->ofmt); fprintf(stderr, "done.\n"); return 0; } jsaudio_t read_signal(FILE *rd, Input_Fmt_t ifmt) { fprintf(stderr, "Reading input file"); if (ifmt == IFM_AU) { fprintf(stderr, " (Sun \".au\" format)\n"); return jsaudio_au_read_file(rd); } else if (ifmt == IFM_ASCII) { fprintf(stderr, " (plain ASCII format)\n"); return read_signal_ascii(rd); } else { fprintf(stderr, "\n"); fprintf(stderr, "unspecified format, aborted\n"); exit(1); } } jsaudio_t read_signal_ascii(FILE *rd) { uint32_t nc = nget_uint32(rd, "channels", 10); fget_eol(rd); uint32_t ns = nget_uint32(rd, "samples", 10); fget_eol(rd); double fsmp = nget_double(rd, "frequency"); fget_eol(rd); jsaudio_t si = jsaudio_allocate_sound(nc, ns); int32_t r = getc(rd); uint32_t k = 0; /* Sample index */ while (TRUE) { /* Skip spaces and TABs: */ while ((r == '\011') || (r == ' ')) { r = getc(rd); } /* If end-of-file, we should be done: */ if (r == EOF) { break; } if (r == '#') { /* Comment line, ignore: */ while ((r != EOF) && (r != '\n')) { r = getc(rd); } } else { /* Data line, read time {t} and {nc} channel samples: */ ungetc(r, rd); (void)fget_double(rd); /* Sampling time. */ for (uint32_t c = 0; c < nc; c++) { si.sv[c][k] = fget_double(rd); } k++; fget_eol(rd); } r = getc(rd); } assert(k == ns); si.ns = ns; si.fsmp = fsmp; return si; } /* Max pixel value for PGM output: */ #define PGM_MAXVAL 65535 void write_output_header(FILE *wr, Output_Fmt_t ofmt, uint32_t nfrq, uint32_t nf, double tstep, double fstep) { if (ofmt == OFM_PGM) { fprintf(wr, "P2\n"); /* PGM ASCII format. */ fprintf(wr, "%d %d\n", nfrq, nf); /* Width and height of image. */ fprintf(wr, "%d\n", PGM_MAXVAL); } else if (ofmt == OFM_ASCII) { fprintf(wr, "# tstep = %24.16e\n", tstep); fprintf(wr, "# fstep = %24.16e\n", fstep); fprintf(wr, "# freqs = %d\n", nfrq); fprintf(wr, "# times = %d\n", nf); } else { fprintf(stderr, "unspecified format, aborted\n"); exit(1); } } double safelog(double v, double vmin) { return log(hypot(v, vmin)); } void write_frame(FILE *wr, uint32_t nfrq, double pwr[], double t, double fstep, Output_Fmt_t ofmt, double vmin, double vmax) { if (ofmt == OFM_PGM) { /* Make sure that {vmin} is positive, to avoid {log} errors: */ if (vmin <= 0) { vmin = 1.0e-100; } /* Ensure {vmax > vmin}, to avoid divide by zero: */ if (vmax <= vmin) { vmax = (1 + 1.0e-14)*vmin; } double umin = log(vmin); double umax = log(vmax); for (uint32_t k = 0; k < nfrq; k++) { if (k > 0) { fprintf(wr, "%c", (k % 10 == 0 ? '\n' : ' ')); } double pk = fmin(vmax, fmax(vmin, pwr[k])); double u = log(pk); double v = (u - umin)/(umax - umin); assert(isfinite(v)); if (v < 0.0) { v = 0.0; } if (v > 1.0) { v = 1.0; } uint32_t vi = (uint32_t)(v*PGM_MAXVAL + 0.5); assert((vi >= 0) && (vi <= PGM_MAXVAL)); fprintf(wr, "%d", vi); } fprintf(wr, "\n"); } else if (ofmt == OFM_ASCII) { for (uint32_t k = 0; k < nfrq; k++) { double v = (pwr[k] - vmin)/(vmax - vmin); fprintf(wr, "%24.16e %24.16e %24.16e\n", t, k*fstep, v); } fprintf(wr, "\n"); } else { fprintf(stderr, "unspecified format, aborted\n"); exit(1); } } void write_output_trailer(FILE *wr, Output_Fmt_t ofmt) { if (ofmt == OFM_PGM) { /* Nothing to do. */ } else if (ofmt == OFM_ASCII) { /* Nothing to do. */ } else { fprintf(stderr, "unspecified format, aborted\n"); exit(1); } fflush(wr); } void compute_frame ( jsaudio_t *si, uint32_t skip, Window_Type_t wtype, uint32_t nw, fftw_complex *in, fftw_complex *out, fftw_plan *plan, double pwr[] ) { uint32_t nfrq = nw/2 + 1; /* Number of distinct frequencies. */ /* Power spectrum normalization factor: */ double norm = 1.0/nw; /* Extract samples {0..nw-1} of frame: */ assert(skip + nw <= si->ns); for (uint32_t k = 0; k < nw; k++) { in[k][0] = si->sv[0][skip+k]; in[k][1] = 0.0; } /* Apply window function: */ if (wtype == WIN_RECT) { /* Nothing to do. */ } else if (wtype == WIN_HANN) { /* Multiply {tr} by the Hann window: */ for (uint32_t k = 0; k < nw; k++) { double x = M_PI*(2*((double)k)/((double)nw) - 1.0); double w = 0.5*(1.0 + cos(x)); in[k][0] *= w; in[k][1] *= w; } } /* Compute discrete transform: */ fftw_execute(*plan); /* Save power spectrum in {pwr}: */ for (uint32_t f = 0; f < nfrq; f++) { double rep = out[f][0], imp = out[f][1]; double sum = rep*rep + imp*imp; if ((f > 0) && (f < nw - f)) { /* Term with frequency {-f}: */ uint32_t g = nw - f; double rem = out[g][0], imm = out[g][1]; sum += rem*rem + imm*imm; } pwr[f] = sum * norm; } } void update_egram_range(uint32_t nfrq, double pwr[], double *emin, double *emax) { uint32_t f; for (f = 0; f < nfrq; f++) { double v = pwr[f]; if (v < (*emin)) { (*emin) = v; } if (v > (*emax)) { (*emax) = v; } } } #define MAX_WSIZE 9192 options_t* get_options(int32_t argc, char **argv) { argparser_t *pp = argparser_new(stderr, argc, argv); argparser_set_help(pp, PROG_NAME " version " PROG_VERS ", usage:\n" PROG_HELP); argparser_set_info(pp, PROG_INFO); argparser_process_help_info_options(pp); options_t *o = (options_t *)malloc(sizeof(options_t)); argparser_get_keyword(pp, "-input"); if (argparser_keyword_present_next(pp, "ascii")) { o->ifmt = IFM_ASCII; } else if (argparser_keyword_present_next(pp, "au")) { o->ifmt = IFM_AU; } else { argparser_error(pp, "missing or invalid input format"); } argparser_get_keyword(pp, "-window"); if (argparser_keyword_present_next(pp, "hann")) { o->wtype = WIN_HANN; } else if (argparser_keyword_present_next(pp, "rect")) { o->wtype = WIN_RECT; } else { argparser_error(pp, "missing or invalid window function"); } o->wsize = (uint32_t)argparser_get_next_int(pp, 1, MAX_WSIZE); if (argparser_keyword_present(pp, "-stride")) { o->stride = (uint32_t)argparser_get_next_int(pp, 0, MAXINT); } else { o->stride = o->wsize/8; } argparser_get_keyword(pp, "-output"); if (argparser_keyword_present_next(pp, "pgm")) { o->ofmt = OFM_PGM; } else if (argparser_keyword_present_next(pp, "ascii")) { o->ofmt = OFM_ASCII; } else { argparser_error(pp, "missing or invalid output format"); } if (argparser_keyword_present(pp, "-range")) { o->vmin = argparser_get_next_double(pp, 0.0, DBL_MAX); o->vmax = argparser_get_next_double(pp, o->vmin, DBL_MAX); } else { o->vmin = 0.0; o->vmax = 1.0; } argparser_finish(pp); return o; }