1 | #csh |
---|
2 | |
---|
3 | #--------------------------------------------------------------------- |
---|
4 | # N-way cross-validation script |
---|
5 | #--------------------------------------------------------------------- |
---|
6 | # |
---|
7 | # invocation: |
---|
8 | # csh xval.sh filestem N [options for c4.5 and c4.5rules] [suffix] |
---|
9 | # |
---|
10 | # individual results from each block are left in |
---|
11 | # filestem.[rt]o*[suffix], |
---|
12 | # averages over all blocks in |
---|
13 | # filestem.[rt]res[suffix] |
---|
14 | #--------------------------------------------------------------------- |
---|
15 | |
---|
16 | # sort the options into result suffix and control options for the programs |
---|
17 | # Note: for options with values, there must be no space between the option |
---|
18 | # name and value; e.g. "-v1", not "-v 1" |
---|
19 | |
---|
20 | set treeopts = |
---|
21 | set ruleopts = |
---|
22 | set suffix = |
---|
23 | |
---|
24 | foreach i ( $argv[3-] ) |
---|
25 | switch ( $i ) |
---|
26 | case "+*": |
---|
27 | set suffix = $i |
---|
28 | breaksw |
---|
29 | case "-v*": |
---|
30 | case "-c*": |
---|
31 | set treeopts = ($treeopts $i) |
---|
32 | set ruleopts = ($ruleopts $i) |
---|
33 | breaksw |
---|
34 | case "-p": |
---|
35 | case "-t*": |
---|
36 | case "-w*": |
---|
37 | case "-i*": |
---|
38 | case "-g": |
---|
39 | case "-s": |
---|
40 | case "-m*": |
---|
41 | set treeopts = ($treeopts $i) |
---|
42 | breaksw |
---|
43 | case "-r*": |
---|
44 | case "-F*": |
---|
45 | case "-a": |
---|
46 | set ruleopts = ($ruleopts $i) |
---|
47 | breaksw |
---|
48 | default: |
---|
49 | echo "unrecognised or inappropriate option" $i |
---|
50 | exit |
---|
51 | endsw |
---|
52 | end |
---|
53 | |
---|
54 | # prepare the data for cross-validation |
---|
55 | |
---|
56 | cat $1.data $1.test | xval-prep $2 >XDF.data |
---|
57 | cp /dev/null XDF.test |
---|
58 | ln $1.names XDF.names |
---|
59 | rm $1.[rt]o[0-9]*$suffix |
---|
60 | set junk = `wc XDF.data` |
---|
61 | set examples = $junk[1] |
---|
62 | set large = `expr $examples % $2` |
---|
63 | set segsize = `expr \( $examples / $2 \) + 1` |
---|
64 | |
---|
65 | # perform the cross-validation trials |
---|
66 | |
---|
67 | set i = 0 |
---|
68 | while ( $i < $2 ) |
---|
69 | if ( $i == $large ) set segsize = `expr $examples / $2` |
---|
70 | cat XDF.test XDF.data | split -`expr $examples - $segsize` |
---|
71 | mv xaa XDF.data |
---|
72 | mv xab XDF.test |
---|
73 | |
---|
74 | c4.5 -f XDF -u $treeopts >$1.to$i$suffix |
---|
75 | c4.5rules -f XDF -u $ruleopts >$1.ro$i$suffix |
---|
76 | |
---|
77 | @ i++ |
---|
78 | end |
---|
79 | |
---|
80 | # remove the temporary files and summarize results |
---|
81 | |
---|
82 | rm -f XDF.* |
---|
83 | cat $1.to[0-9]*$suffix | grep "<<" | average >$1.tres$suffix |
---|
84 | cat $1.ro[0-9]*$suffix | grep "<<" | average >$1.rres$suffix |
---|