# Last edited on 2023-11-29 11:18:31 by stolfi OBTAINING THE DATA FILES Copied the Malu EEG data from Claudia Vargas's external hard drive folder "EEG_PREDICAO_LPB_MALU/*" raw-eeg/{PERSON}{SESSION}.raw -- EEG full recording. raw-emg/{PERSON}{SESSION}.acq -- EMG full recording. {PERSON} = volunteer type: "s" for controls, "p" for patients. {SESSION} = session number ("01","02"...) Note that "s05" and "p05" are unrelated volunteers and sessions. There was an extra EEG folder "p05_lesaoD_canhoto". copy_malu_files_from_ufrj_hd.sh >>>JUNK Auxiliary files DataDescription.txt -- description of file contents (electrodes, merkers). Touca128.pdf -- electrode names and positions. Net_Station_File_Formats.pdf -- description of ".raw" format. Counting lines subjects=( 13 14 ) for subject in ${subjects[@]} ; do file="S${subject}.raw" cat raw/${file} \ | nmeeg_convert_raw \ -unit 1.00 \ -sourceFile ${file} \ -subject ${subject} \ > /tmp/.eeg.txt nb="`cat /tmp/.eeg.txt | wc -c`" nt1="`egrep -e '^[ ]*[-+0-9]' /tmp/.eeg.txt | wc -l`" nt2="`egrep -e '^nt[ ]*[=]' /tmp/.eeg.txt`" printf "${file} : %s bytes %s frames (%s)\n" "${nb}" "${nt1}" "${nt2}" done S13.raw : 1805955471 bytes ?? frames (nt = 811665) sample data type = 4 recording date = 2012-06-25 recording time = 15:50:23.000 sampling rate = 500 Hz number of channels = 129 board gain = 1 conversion bits = 0 amplifier range = 0 uV number of data frames = 811665 S14.raw : 1791185921 bytes ?? frames (nt = 805027) sample data type = 4 recording date = 2012-06-25 recording time = 15:51:54.000 sampling rate = 500 Hz number of channels = 129 board gain = 1 conversion bits = 0 amplifier range = 0 uV number of data frames = 805027 number of event channels = 10 EXTRACTING THE RUNS Each line of the files "s{SS}_r{BBB}{RR}.txt" has 139 numbers: * measurements of 128 electrodes "C001" to "C128" (microvolts?) * a ``reference electrode'' "CZ" (apparently always zero) * 10 pulse channels ("HB" "HI32" "HN4" "HS" "Q128" "QB" "QN2" "QS" "epoc" "fix1") that are either 0 or something positive. Each session file contains a sequence of experimental ``blocks''. Each block consists of 32 ``runs''. markets/mtgox/btcusd Each run lasts ~6 seconds (~3000 frames) and is marked by a ``start-of-fixation'' pulse in channel 139 = "fix1" and then a ``start-of-stimulus'' pulse in channel "HB", "HS", "QB", or "QS". Extracting the runs as separate files and plotting them: today="`yyyy-mm-dd-hhmmss`" extract_runs.sh > raw-runs/extract_runs-${today}.log 2>&1 echo 'done extracting.' egrep -e '[!*][!*]' raw-runs/extract_runs-${today}.log plot_all_runs.sh \ NOSHOW ${today} raw-runs 0 130 260 500 \ > raw-runs/plot_runs-.log 2>&1 echo 'done plotting.' egrep -e '[!*][!*]' raw-runs/plot_runs-${today}.log FILTERING THE RUNS Running a bandpass filter over the extracted runs. The filtered runs are flt-runs-${tag}/*.txt and the removed noise is in nse-runs-${tag}/*.txt My chosen filter band filter_runs_B.sh Tried twice, with "-trend 2 0" (directories flt-runs-B2,nse-runs-B2) and "-trend 3 0" (directories flt-runs-B,nse-runs-B). The latter seemed to be better. PLOTTING SPECTRA OF SOME FILTERED RUNS Plotting the power spectra of selected runs: notable_runs=( s013_r00121 s013_r00209 s013_r00229 s013_r00401 s013_r00805 s014_r00203 s014_r00425 ) for run in ${notable_runs[@]} ; do \ plot_run_spectrum.sh SHOW flt-runs-B/${run} 0 127 50 500 plot_run_spectrum.sh SHOW nse-runs-B/${run} 0 127 250 500 done STATISTICS OF ELECTRODES IN PHASES Tabulating the range of electrodes within each phase: for f in flt-runs-B/s???_r?????.txt ; do stfile="${f%%.*}.stat" echo "${f} -> ${stfile}" cat ${f} \ | get_electrode_stats.gawk \ > ${stfile} done IDENTIFYING THE RUNS Extracting the run types: (cd flt-runs-B && egrep -e '^type *[=]' s???_r?????.txt ) \ | sed -e 's/[.]txt:type *= */ /g' \ > run-types.txt LOCATING BLINKS Creating a file blinks/blinks-by-hand.txt with the time ranges (seconds) of blinks. ( cd flt-runs-B && ls s???_r?????.txt | sed -e 's:[.]txt::g' | sort ) > blinks/blinks-by-hand.txt Editing manually the file, extracting visually the intervals with 0.1 precision. Considered only blinks that are completely inside the run's time window (fixation and stimulus phases, plus ~0.5sec buffer on each end). Typically each blink lasts 0.45 seconds, almost never more than 0.6 sec. cleanup_blinks.gawk \ -v trun=7.0 \ blinks/blinks-by-hand.txt \ > blinks/blinks-clean.txt Plotting an histogram of the blink locations: bfile="blinks/blinks-clean.txt" for sid in 013 014 ; do plot_blink_locations.sh SHOW ${sid} blinks/blinks-clean.txt done Locating runs without weirdness flags and without blinks in the stimulus phase or in the end of the fixation phase: list_blink_free_runs.gawk \ -v stini=2.5 \ -v stfin=6.5 \ blinks/blinks-by-hand.txt \ > blinks/blink-safe-runs.txt 107 safe runs in 512 runs (20.90%) subject 013: 91 safe runs in 256 runs (35.55%) subject 014: 16 safe runs in 256 runs (6.25%) EXTRACTING ALL BLINKS OF EACH SUBJECT To model the blinks, we extract all blinks from all good runs of each subject and concatenate them into a single file "flt-runs-B/s${sid}_blinks.txt": vmax=160 bfile="blinks/blinks-clean.txt" for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_blinks.txt" cat ${bfile} \ | egrep -e '^[ ]*'"${sid}" \ | gawk \ -v trun=7.0 \ ' // { sid = $1; rid = $2; nfl = $3; tini = 0+$5; tfin = 0+$6; if ((nfl == 0) && (tini >= 0) && (tfin <= trun)) { print sid, rid, tini, tfin; } } ' \ | extract_run_segments.gawk -v dir="flt-runs-B" \ > .xfr nt=`cat .xfr | egrep -v -e '[=]' | wc -l` printf "nt = %d\n" "${nt}" > ${xfile} cat .xfr >> ${xfile} done for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_blinks.txt" nmeeg_plot_channels.sh SHOW ${xfile%%.*} 0 ${vmax} 0 99999 0 1000 1400 500 done EXTRACTING ALL NON-BLINK IMPORTANT SEGMENTS To analyze the non-blink sections, we extract all the blink-free important segments and concatenate them into a single file "flt-runs-B/s${sid}_nonblinks.txt": bfile="blinks/blink-safe-runs.txt" for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_nonblinks.txt" cat ${bfile} \ | egrep -e '^[ ]*'"${sid}" \ | gawk \ ' // { sid = $1; rid = $2; tini = 2.5; tfin = 6.5; print sid, rid, tini, tfin; } ' \ | extract_run_segments.gawk -v dir="flt-runs-B" \ > .xfr nt=`cat .xfr | egrep -v -e '[=]' | wc -l` printf "nt = %d\n" "${nt}" > ${xfile} cat .xfr >> ${xfile} done for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_nonblinks.txt" nmeeg_plot_channels.sh SHOW ${xfile%%.*} 0 ${vmax} 0 9999 0 2000 1400 500 done IDENTIFYING PRINCIPAL COMPONENTS OF BLINKS for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_blinks.txt" opref="flt-runs-B/s${sid}_blinks" echo "computing components" nmeeg_correl \ -outPrefix ${opref} \ -zeroMean T \ -maxComps 5 \ < ${xfile} done btype=2 for sid in 013 014 ; do opref="flt-runs-B/s${sid}_blinks" echo "plotting components" for cfile in ${opref}_P???_eig.txt ; do cpref="${cfile%.*}_b${btype}" nmeeg_animate ${cpref} 0 1 1 ${btype} 560 640 < ${cfile} done done IDENTIFYING PRINCIPAL COMPONENTS OF BLINK-FREE SEGMENTS for sid in 013 014 ; do xfile="flt-runs-B/s${sid}_nonblinks.txt" opref="flt-runs-B/s${sid}_nonblinks" echo "computing components" nmeeg_correl \ -outPrefix ${opref} \ -zeroMean F \ -maxComps 5 \ < ${xfile} done btype=2 for sid in 013 014 ; do opref="flt-runs-B/s${sid}_nonblinks" echo "plotting components" for cfile in ${opref}_P???_eig.txt ; do cpref="${cfile%.*}_b${btype}" nmeeg_animate ${cpref} 0 1 1 ${btype} 560 640 < ${cfile} done done SEPARATING THE BLINKS AND THE 10 HZ OSCILLATION Decomposing the filtered runs into a combination of the blink principal component (BL0), the general 10 Hz oscillation component (H10), and a residue (RES) that is orthogonal to both patterns: vmax=160 subruns=( 013:00229 014:00425) for sr in ${subruns[@]} ; do subjid="${sr%%:*}" runid="${sr##*:}" ppref=flt-runs-B/s${subjid} opref=flt-runs-B/s${subjid}_r${runid} nmeeg_comp_analysis \ -pattern BL0 ${ppref}_blinks_P000_eig.txt \ -pattern H10 ${ppref}_nonblinks_P000_eig.txt \ -normalize \ -writeComp BL0 ${opref}_BL0.txt = BL0 \ -writeComp H10 ${opref}_H10.txt = H10 \ -writeComp BLH ${opref}_BLH.txt = BL0 H10 \ -delete BL0 H10 \ < ${opref}.txt \ > ${opref}_RES.txt nmeeg_plot_channels.sh SHOW ${opref}_BL0 0 ${vmax} 0 9999 0 0 1400 500 nmeeg_plot_channels.sh SHOW ${opref}_H10 0 ${vmax} 0 9999 0 0 1400 500 nmeeg_plot_channels.sh SHOW ${opref}_BLH 0 ${vmax} 0 9999 0 0 1400 500 nmeeg_plot_channels.sh SHOW ${opref}_RES 0 50 0 9999 0 0 1400 500 done EXPORTING SOME TO DROPBOX Moving selected files to Dropbox for easy access: projdir="." dropdir="${HOME}/Dropbox/eeg_sinal_ghislain/2013-11-15-stolfi" echo "${notable_runs[@]}" for run in ${notable_runs[@]} ; do cp -av ${projdir}/raw-runs/${run}{.txt,.png,_pwr.png} ${dropdir}/raw-runs/ cp -av ${projdir}/flt-runs-B/${run}{.txt,.png,_pwr.png} ${dropdir}/flt-runs-B/ cp -av ${projdir}/nse-runs-B/${run}{.txt,.png,_pwr.png} ${dropdir}/nse-runs-B/ done >>>> TO FIX AND DO >>>> Tabulating the range of electrodes Fp1 and Fp2 between trigger pulses 1 and 4, and flagging those that vary too much: vfile="flt-runs-B/Fp-variation.txt" rm -f ${vfile} for f in flt-runs-B/s???_r?????.txt ; do printf "%s " "${f}" >> ${vfile} cat ${f} \ | get_electrode_ranges.gawk -v ix1=4 -v ix2=12 -v ixt=21 -v dvmax=150 \ >> ${vfile} done SAMPLE RUNS FOR TESTING Identified two runs from each subject, one "Bio" and one "nonBio", both apparently free from blinks in the inter-pulse region. datadir=${HOME}/programs/c/NEUROMAT/neuromat_show_eeg/tests/data cp -av raw-runs/s001_r005.txt ${datadir} # nonBio cp -av raw-runs/s001_r039.txt ${datadir} # Bio cp -av raw-runs/s002_r016.txt ${datadir} # Bio cp -av raw-runs/s002_r043.txt ${datadir} # nonBio cp -av raw-runs/s003_r009.txt ${datadir} # Bio cp -av raw-runs/s003_r030.txt ${datadir} # nonBio cp -av raw-runs/s004_r030.txt ${datadir} # nonBio cp -av raw-runs/s004_r036.txt ${datadir} # Bio cp -av raw-runs/s005_r014.txt ${datadir} # Bio cp -av raw-runs/s005_r044.txt ${datadir} # nonBio cp -av raw-runs/s006_r024.txt ${datadir} # Bio cp -av raw-runs/s006_r042.txt ${datadir} # nonBio cp -av raw-runs/s007_r028.txt ${datadir} # nonBio cp -av raw-runs/s007_r046.txt ${datadir} # Bio cp -av raw-runs/s008_r015.txt ${datadir} # Bio cp -av raw-runs/s008_r049.txt ${datadir} # nonBio