# Last edited on 2023-11-29 11:18:31 by stolfi

OBTAINING THE DATA FILES

  Copied the Malu EEG data from Claudia Vargas's external hard drive
  folder "EEG_PREDICAO_LPB_MALU/*"
  
    raw-eeg/{PERSON}{SESSION}.raw -- EEG full recording.
    raw-emg/{PERSON}{SESSION}.acq -- EMG full recording.
    
  {PERSON} = volunteer type: "s" for controls, "p" for patients.
      
  {SESSION} = session number ("01","02"...)
      
  Note that "s05" and "p05" are unrelated volunteers and sessions.
  
  There was an extra EEG folder "p05_lesaoD_canhoto".

    copy_malu_files_from_ufrj_hd.sh 

>>>JUNK

  Auxiliary files

    DataDescription.txt -- description of file contents (electrodes, merkers).
    Touca128.pdf -- electrode names and positions.
    Net_Station_File_Formats.pdf -- description of ".raw" format.
  
  Counting lines
  
    subjects=( 13 14 )
    
    for subject in ${subjects[@]} ; do 
      file="S${subject}.raw"
      cat raw/${file} \
        | nmeeg_convert_raw \
            -unit 1.00 \
            -sourceFile ${file} \
            -subject ${subject} \
        > /tmp/.eeg.txt
      nb="`cat /tmp/.eeg.txt | wc -c`"
      nt1="`egrep -e '^[ ]*[-+0-9]' /tmp/.eeg.txt | wc -l`"
      nt2="`egrep -e '^nt[ ]*[=]' /tmp/.eeg.txt`"
      printf "${file} : %s bytes  %s frames  (%s)\n" "${nb}" "${nt1}" "${nt2}"
    done
    
      S13.raw : 1805955471 bytes  ?? frames  (nt = 811665)
        sample data type = 4
        recording date = 2012-06-25
        recording time = 15:50:23.000
        sampling rate = 500 Hz
        number of channels = 129
        board gain = 1
        conversion bits = 0
        amplifier range = 0 uV
        number of data frames = 811665
      
      S14.raw : 1791185921 bytes  ?? frames  (nt = 805027)
        sample data type = 4
        recording date = 2012-06-25
        recording time = 15:51:54.000
        sampling rate = 500 Hz
        number of channels = 129
        board gain = 1
        conversion bits = 0
        amplifier range = 0 uV
        number of data frames = 805027
        number of event channels = 10


EXTRACTING THE RUNS

  Each line of the files "s{SS}_r{BBB}{RR}.txt" has 139 numbers:
   
    * measurements of 128 electrodes "C001" to "C128" (microvolts?)
    
    * a ``reference electrode'' "CZ" (apparently always zero)
    
    * 10 pulse channels ("HB" "HI32" "HN4" "HS" "Q128" "QB" "QN2" "QS" "epoc" "fix1")
    that are either 0 or something positive.
  
  Each session file contains a sequence of experimental ``blocks''.
  
  Each block consists of 32 ``runs''.
  markets/mtgox/btcusd
  Each run lasts ~6 seconds (~3000 frames) and is marked by a
  ``start-of-fixation'' pulse in channel 139 = "fix1" and then a
  ``start-of-stimulus'' pulse in channel "HB", "HS", "QB", or "QS".
  
  Extracting the runs as separate files and plotting them:
  
    today="`yyyy-mm-dd-hhmmss`"
    extract_runs.sh > raw-runs/extract_runs-${today}.log 2>&1 
    echo 'done extracting.'
    egrep -e '[!*][!*]' raw-runs/extract_runs-${today}.log
    plot_all_runs.sh \
        NOSHOW ${today} raw-runs 0 130 260 500 \
      > raw-runs/plot_runs-.log 2>&1 
    echo 'done plotting.'
    egrep -e '[!*][!*]' raw-runs/plot_runs-${today}.log
    
FILTERING THE RUNS

  Running a bandpass filter over the extracted runs.
  The filtered runs are flt-runs-${tag}/*.txt and the removed noise is in nse-runs-${tag}/*.txt
  
  My chosen filter band
  
    filter_runs_B.sh
    
  Tried twice, with "-trend 2 0" (directories flt-runs-B2,nse-runs-B2) 
  and "-trend 3 0" (directories flt-runs-B,nse-runs-B).  The latter 
  seemed to be better.
  
PLOTTING SPECTRA OF SOME FILTERED RUNS

  Plotting the power spectra of selected runs:
  
    notable_runs=( s013_r00121 s013_r00209 s013_r00229 s013_r00401 s013_r00805 s014_r00203 s014_r00425 )
    for run in ${notable_runs[@]} ; do \
      plot_run_spectrum.sh SHOW flt-runs-B/${run} 0 127  50  500
      plot_run_spectrum.sh SHOW nse-runs-B/${run} 0 127 250  500
    done

STATISTICS OF ELECTRODES IN PHASES

  Tabulating the range of electrodes within each phase:
  
    for f in flt-runs-B/s???_r?????.txt ; do 
      stfile="${f%%.*}.stat"
      echo "${f} -> ${stfile}"
      cat ${f} \
        | get_electrode_stats.gawk \
        > ${stfile}
    done

IDENTIFYING THE RUNS

  Extracting the run types:
  
    (cd flt-runs-B && egrep -e '^type *[=]' s???_r?????.txt ) \
      | sed -e 's/[.]txt:type *= */ /g' \
      > run-types.txt

LOCATING BLINKS

  Creating a file blinks/blinks-by-hand.txt with the time ranges (seconds) of blinks.
  
    ( cd flt-runs-B && ls s???_r?????.txt | sed -e 's:[.]txt::g' | sort ) > blinks/blinks-by-hand.txt
    
  Editing manually the file, extracting visually the intervals with 0.1 precision.
  Considered only blinks that are completely inside the run's time window 
  (fixation and stimulus phases, plus ~0.5sec buffer on each end).
  Typically each blink lasts 0.45 seconds, almost never more than 0.6 sec.
  
    cleanup_blinks.gawk \
        -v trun=7.0 \
        blinks/blinks-by-hand.txt \
      > blinks/blinks-clean.txt
  
  Plotting an histogram of the blink locations:
  
    bfile="blinks/blinks-clean.txt"
    for sid in 013 014 ; do
      plot_blink_locations.sh SHOW ${sid} blinks/blinks-clean.txt
    done
    
  Locating runs without weirdness flags and without 
  blinks in the stimulus phase or in the end of the fixation phase:
    
    list_blink_free_runs.gawk \
      -v stini=2.5 \
      -v stfin=6.5 \
      blinks/blinks-by-hand.txt \
    > blinks/blink-safe-runs.txt
      
      107 safe runs in 512 runs (20.90%)
      subject 013: 91 safe runs in 256 runs (35.55%)
      subject 014: 16 safe runs in 256 runs (6.25%)

EXTRACTING ALL BLINKS OF EACH SUBJECT

  To model the blinks, we extract all blinks from all good runs of each subject and
  concatenate them into a single file "flt-runs-B/s${sid}_blinks.txt":
  
    vmax=160
  
    bfile="blinks/blinks-clean.txt"
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_blinks.txt"
      cat ${bfile} \
        | egrep -e '^[ ]*'"${sid}" \
        | gawk \
            -v trun=7.0 \
            ' // { 
                sid = $1; rid = $2; nfl = $3; 
                tini = 0+$5; tfin = 0+$6;
                if ((nfl == 0) && (tini >= 0) && (tfin <= trun)) { print sid, rid, tini, tfin; }
              }
            ' \
        | extract_run_segments.gawk -v dir="flt-runs-B" \
        > .xfr
      nt=`cat .xfr | egrep -v -e '[=]' | wc -l`
      printf "nt = %d\n" "${nt}" > ${xfile} 
      cat .xfr >> ${xfile} 
    done
    
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_blinks.txt"
      nmeeg_plot_channels.sh SHOW ${xfile%%.*} 0 ${vmax} 0 99999  0 1000  1400 500
    done
    
EXTRACTING ALL NON-BLINK IMPORTANT SEGMENTS

  To analyze the non-blink sections, we extract all the blink-free important segments
  and concatenate them into a single file "flt-runs-B/s${sid}_nonblinks.txt":
  
    bfile="blinks/blink-safe-runs.txt"
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_nonblinks.txt"
      cat ${bfile} \
        | egrep -e '^[ ]*'"${sid}" \
        | gawk \
            ' // { 
                sid = $1; rid = $2; 
                tini = 2.5; tfin = 6.5;
                print sid, rid, tini, tfin;
              }
            ' \
        | extract_run_segments.gawk -v dir="flt-runs-B" \
        > .xfr
      nt=`cat .xfr | egrep -v -e '[=]' | wc -l`
      printf "nt = %d\n" "${nt}" > ${xfile} 
      cat .xfr >> ${xfile} 
    done
    
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_nonblinks.txt"
      nmeeg_plot_channels.sh SHOW ${xfile%%.*} 0 ${vmax} 0 9999  0 2000  1400 500
    done
    
IDENTIFYING PRINCIPAL COMPONENTS OF BLINKS
    
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_blinks.txt"
      opref="flt-runs-B/s${sid}_blinks"
      echo "computing components"
      nmeeg_correl \
          -outPrefix ${opref} \
          -zeroMean T \
          -maxComps 5 \
        < ${xfile}
    done

    btype=2
    for sid in 013 014 ; do
      opref="flt-runs-B/s${sid}_blinks"
      echo "plotting components"
      for cfile in ${opref}_P???_eig.txt ; do
        cpref="${cfile%.*}_b${btype}"
        nmeeg_animate ${cpref} 0 1 1 ${btype} 560 640 < ${cfile}
      done
    done

IDENTIFYING PRINCIPAL COMPONENTS OF BLINK-FREE SEGMENTS
    
    for sid in 013 014 ; do
      xfile="flt-runs-B/s${sid}_nonblinks.txt"
      opref="flt-runs-B/s${sid}_nonblinks"
      echo "computing components"
      nmeeg_correl \
          -outPrefix ${opref} \
          -zeroMean F \
          -maxComps 5 \
        < ${xfile}
    done

    btype=2
    for sid in 013 014 ; do
      opref="flt-runs-B/s${sid}_nonblinks"
      echo "plotting components"
      for cfile in ${opref}_P???_eig.txt ; do
        cpref="${cfile%.*}_b${btype}"
        nmeeg_animate ${cpref} 0 1 1 ${btype} 560 640 < ${cfile}
      done
    done

SEPARATING THE BLINKS AND THE 10 HZ OSCILLATION

  Decomposing the filtered runs into a combination of the blink principal component (BL0),
  the general 10 Hz oscillation component (H10), and a residue (RES) that is orthogonal
  to both patterns:

    vmax=160
    subruns=( 013:00229 014:00425)
    for sr in ${subruns[@]} ; do
      subjid="${sr%%:*}"
      runid="${sr##*:}"
      ppref=flt-runs-B/s${subjid}
      opref=flt-runs-B/s${subjid}_r${runid}
      nmeeg_comp_analysis \
        -pattern BL0 ${ppref}_blinks_P000_eig.txt \
        -pattern H10 ${ppref}_nonblinks_P000_eig.txt \
        -normalize \
        -writeComp BL0 ${opref}_BL0.txt = BL0 \
        -writeComp H10 ${opref}_H10.txt = H10 \
        -writeComp BLH ${opref}_BLH.txt = BL0 H10 \
        -delete BL0 H10 \
        < ${opref}.txt \
        > ${opref}_RES.txt
      nmeeg_plot_channels.sh SHOW ${opref}_BL0 0 ${vmax} 0 9999  0 0  1400 500 
      nmeeg_plot_channels.sh SHOW ${opref}_H10 0 ${vmax} 0 9999  0 0  1400 500 
      nmeeg_plot_channels.sh SHOW ${opref}_BLH 0 ${vmax} 0 9999  0 0  1400 500 
      nmeeg_plot_channels.sh SHOW ${opref}_RES 0      50 0 9999  0 0  1400 500 
    done

EXPORTING SOME TO DROPBOX

  Moving selected files to Dropbox for easy access:
  
    projdir="."
    dropdir="${HOME}/Dropbox/eeg_sinal_ghislain/2013-11-15-stolfi"
    echo "${notable_runs[@]}"
    for run in ${notable_runs[@]} ; do 
      cp -av ${projdir}/raw-runs/${run}{.txt,.png,_pwr.png} ${dropdir}/raw-runs/
      cp -av ${projdir}/flt-runs-B/${run}{.txt,.png,_pwr.png} ${dropdir}/flt-runs-B/
      cp -av ${projdir}/nse-runs-B/${run}{.txt,.png,_pwr.png} ${dropdir}/nse-runs-B/
    done
  
>>>> TO FIX AND DO >>>>

  Tabulating the range of electrodes Fp1 and Fp2 between trigger pulses 
  1 and 4, and flagging those that vary too much:
  
    vfile="flt-runs-B/Fp-variation.txt"
    rm -f ${vfile}
    for f in flt-runs-B/s???_r?????.txt ; do 
      printf "%s " "${f}" >> ${vfile}
      cat ${f} \
        | get_electrode_ranges.gawk -v ix1=4 -v ix2=12 -v ixt=21 -v dvmax=150 \
        >> ${vfile}
    done
  
  

SAMPLE RUNS FOR TESTING

  Identified two runs from each subject, one "Bio" and one "nonBio", both apparently
  free from blinks in the inter-pulse region.

    datadir=${HOME}/programs/c/NEUROMAT/neuromat_show_eeg/tests/data
    cp -av raw-runs/s001_r005.txt ${datadir} #  nonBio 
    cp -av raw-runs/s001_r039.txt ${datadir} #  Bio    
    cp -av raw-runs/s002_r016.txt ${datadir} #  Bio    
    cp -av raw-runs/s002_r043.txt ${datadir} #  nonBio 
    cp -av raw-runs/s003_r009.txt ${datadir} #  Bio    
    cp -av raw-runs/s003_r030.txt ${datadir} #  nonBio 
    cp -av raw-runs/s004_r030.txt ${datadir} #  nonBio 
    cp -av raw-runs/s004_r036.txt ${datadir} #  Bio    
    cp -av raw-runs/s005_r014.txt ${datadir} #  Bio    
    cp -av raw-runs/s005_r044.txt ${datadir} #  nonBio 
    cp -av raw-runs/s006_r024.txt ${datadir} #  Bio    
    cp -av raw-runs/s006_r042.txt ${datadir} #  nonBio 
    cp -av raw-runs/s007_r028.txt ${datadir} #  nonBio 
    cp -av raw-runs/s007_r046.txt ${datadir} #  Bio    
    cp -av raw-runs/s008_r015.txt ${datadir} #  Bio    
    cp -av raw-runs/s008_r049.txt ${datadir} #  nonBio