Obsolete stuff
Last edited on 1999-07-28 01:54:26 by stolfi

ANOTHER SET OF KEYS FOR THE SCATTER PLOTS

  Let's make another dictionary with words whose frequencies differ
  substantially between clusters:

    set bigsecs = ( bio.1 cos.1 hea.1 heb.1 pha.2 str.2 zod.1 )

    foreach etag ( RAW EQV )
      set tmpsecs = ( ${bigsecs} )
      foreach sa ( ${bigsecs} )
        set tmpsecs = ( $tmpsecs[2-] )
        foreach sb ( ${tmpsecs} )
          echo "${etag}: ${sa} - ${sb}"
          compute-freq-diffs ${etag}/wfreqs/subsecs/{${sa},${sb}}.frq \
            | sort +0 -1gr \
            > ${etag}/wfreqs/subsecs/${sa}-${sb}.dfr
        end
      end
    end

    foreach etag ( RAW EQV )
      /bin/rm /tmp/temp-${etag}.dsq
      /bin/rm -rf ${etag}/plots/maxd
      mkdir -p ${etag}/plots/maxd

      set tmpsecs = ( ${bigsecs} )
      set npairs = 0
      foreach sa ( ${bigsecs} )
        set tmpsecs = ( $tmpsecs[2-] )
        foreach sb ( ${tmpsecs} )
          @ npairs = ${npairs} + 1
          echo "${etag}: ${sa} - ${sb}"
          cat ${etag}/wfreqs/subsecs/${sa}-${sb}.dfr \
            | gawk \
                ' (($2 \!~ /[?*]/) &&($2 \!~ /^.[~]?$/)){ \
                    dd = 1000*$1; dd = dd*dd; printf "%7d %s\n", dd, $2;}' \
            | egrep -v ' 0 '\
            >> /tmp/temp-${etag}.dsq
        end
      end
      cat /tmp/temp-${etag}.dsq \
        | sort +1 -2 \
        | combine-counts \
        | gawk \
            -v np=${npairs} \
            '/./{ printf "%8.5f %s\n", sqrt($1/np)/1000, $2;}' \
        | sort +0 -1gr \
        | head -50 \
        > ${etag}/plots/maxd/keys.dsq
    end

    foreach etag ( RAW EQV )
      cat ${etag}/plots/maxd/keys.dsq \
        | gawk '/./{print $2;}' | sort \
        > ${etag}/plots/maxd/keys.dic
      list -filter 'fmt -w60' ${etag}/plots/maxd/keys.dic
    end

    --- RAW/plots/maxd/keys.dic ------------------------
    aiin al ar chckhy chdy chedy cheey cheol chey chol chor
    chy cthy daiin dain dal dar dol dy lchedy okaiin okal okar
    okedy okeey okeol ol or otaiin otedy oteey oteody otey
    qokaiin qokain qokal qokar qokedy qokeedy qokeey qokeol
    qoky qol sh shedy shey sho shol shor shy
    ----------------------------------------------------
    --- EQV/plots/maxd/keys.dic ------------------------
    chctho~ chdo~ chectho~ chedo~ cheedo~ cheeo~ cheol~ cheor~
    cheo~ cheto~ choin~ chol~ chor~ cho~ ch~ ctheo~ cthol~
    cthor~ ctho~ doin~ dol~ dor~ do~ lchedo~ oin~ ol~ or~
    otchdo~ otchedo~ otchol~ otchor~ otcho~ otedo~ oteedo~
    oteeo~ oteodo~ oteol~ oteor~ oteos~ oteo~ otod~ otoin~
    otolo~ otol~ otor~ oto~ sol~ tcho~ tedo~ tor~
    ----------------------------------------------------