[26] | 1 | #csh |
---|
| 2 | |
---|
| 3 | #--------------------------------------------------------------------- |
---|
| 4 | # N-way cross-validation script |
---|
| 5 | #--------------------------------------------------------------------- |
---|
| 6 | # |
---|
| 7 | # invocation: |
---|
| 8 | # csh xval.sh filestem N [options for c4.5 and c4.5rules] [suffix] |
---|
| 9 | # |
---|
| 10 | # individual results from each block are left in |
---|
| 11 | # filestem.[rt]o*[suffix], |
---|
| 12 | # averages over all blocks in |
---|
| 13 | # filestem.[rt]res[suffix] |
---|
| 14 | #--------------------------------------------------------------------- |
---|
| 15 | |
---|
| 16 | # sort the options into result suffix and control options for the programs |
---|
| 17 | # Note: for options with values, there must be no space between the option |
---|
| 18 | # name and value; e.g. "-v1", not "-v 1" |
---|
| 19 | |
---|
| 20 | set treeopts = |
---|
| 21 | set ruleopts = |
---|
| 22 | set suffix = |
---|
| 23 | |
---|
| 24 | foreach i ( $argv[3-] ) |
---|
| 25 | switch ( $i ) |
---|
| 26 | case "+*": |
---|
| 27 | set suffix = $i |
---|
| 28 | breaksw |
---|
| 29 | case "-v*": |
---|
| 30 | case "-c*": |
---|
| 31 | set treeopts = ($treeopts $i) |
---|
| 32 | set ruleopts = ($ruleopts $i) |
---|
| 33 | breaksw |
---|
| 34 | case "-p": |
---|
| 35 | case "-t*": |
---|
| 36 | case "-w*": |
---|
| 37 | case "-i*": |
---|
| 38 | case "-g": |
---|
| 39 | case "-s": |
---|
| 40 | case "-m*": |
---|
| 41 | set treeopts = ($treeopts $i) |
---|
| 42 | breaksw |
---|
| 43 | case "-r*": |
---|
| 44 | case "-F*": |
---|
| 45 | case "-a": |
---|
| 46 | set ruleopts = ($ruleopts $i) |
---|
| 47 | breaksw |
---|
| 48 | default: |
---|
| 49 | echo "unrecognised or inappropriate option" $i |
---|
| 50 | exit |
---|
| 51 | endsw |
---|
| 52 | end |
---|
| 53 | |
---|
| 54 | # prepare the data for cross-validation |
---|
| 55 | |
---|
| 56 | cat $1.data $1.test | xval-prep $2 >XDF.data |
---|
| 57 | cp /dev/null XDF.test |
---|
| 58 | ln $1.names XDF.names |
---|
| 59 | rm $1.[rt]o[0-9]*$suffix |
---|
| 60 | set junk = `wc XDF.data` |
---|
| 61 | set examples = $junk[1] |
---|
| 62 | set large = `expr $examples % $2` |
---|
| 63 | set segsize = `expr \( $examples / $2 \) + 1` |
---|
| 64 | |
---|
| 65 | # perform the cross-validation trials |
---|
| 66 | |
---|
| 67 | set i = 0 |
---|
| 68 | while ( $i < $2 ) |
---|
| 69 | if ( $i == $large ) set segsize = `expr $examples / $2` |
---|
| 70 | cat XDF.test XDF.data | split -`expr $examples - $segsize` |
---|
| 71 | mv xaa XDF.data |
---|
| 72 | mv xab XDF.test |
---|
| 73 | |
---|
| 74 | c4.5 -f XDF -u $treeopts >$1.to$i$suffix |
---|
| 75 | c4.5rules -f XDF -u $ruleopts >$1.ro$i$suffix |
---|
| 76 | |
---|
| 77 | @ i++ |
---|
| 78 | end |
---|
| 79 | |
---|
| 80 | # remove the temporary files and summarize results |
---|
| 81 | |
---|
| 82 | rm -f XDF.* |
---|
| 83 | cat $1.to[0-9]*$suffix | grep "<<" | average >$1.tres$suffix |
---|
| 84 | cat $1.ro[0-9]*$suffix | grep "<<" | average >$1.rres$suffix |
---|