#! /bin/csh -f # Last edited on 2001-04-08 21:43:49 by stolfi set usage = "$0 < INNAME.sep > OUTNAME.dic" # Converts a lexicon from the Sao Carlos septuple format (INNAME.sep) # to the new format (INNANE,dic). # # The input is assumed to be in the format # (PAL,ORD,CATCODE,ATTRS,CANON,XX,YY) # The output will be in the format # CATEG(PAL,CANON) # Single-letter nouns and abbreviations are removed. # cat \ | tr -d '()' \ | gawk -v FS=',' \ ' /./{ \ if (($3 ~ /^[ac][1-4][n-r]/) && (length($1) == 1)) { next; } \ printf "%s(%s,%s)\n",$3,$1,$5; \ } \ '