[26] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Statistical routines for C4.5 */ |
---|
| 4 | /* ----------------------------- */ |
---|
| 5 | /* */ |
---|
| 6 | /*************************************************************************/ |
---|
| 7 | |
---|
| 8 | |
---|
| 9 | #include "defns.i" |
---|
| 10 | #include "types.i" |
---|
| 11 | #include "extern.i" |
---|
| 12 | |
---|
| 13 | |
---|
| 14 | /*************************************************************************/ |
---|
| 15 | /* */ |
---|
| 16 | /* Compute the additional errors if the error rate increases to the */ |
---|
| 17 | /* upper limit of the confidence level. The coefficient is the */ |
---|
| 18 | /* square of the number of standard deviations corresponding to the */ |
---|
| 19 | /* selected confidence level. (Taken from Documenta Geigy Scientific */ |
---|
| 20 | /* Tables (Sixth Edition), p185 (with modifications).) */ |
---|
| 21 | /* */ |
---|
| 22 | /*************************************************************************/ |
---|
| 23 | |
---|
| 24 | |
---|
| 25 | float Val[] = { 0, 0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00}, |
---|
| 26 | Dev[] = {4.0, 3.09, 2.58, 2.33, 1.65, 1.28, 0.84, 0.25, 0.00}; |
---|
| 27 | |
---|
| 28 | |
---|
| 29 | float AddErrs(N, e) |
---|
| 30 | /* ------- */ |
---|
| 31 | ItemCount N, e; |
---|
| 32 | { |
---|
| 33 | static float Coeff=0; |
---|
| 34 | float Val0, Pr; |
---|
| 35 | |
---|
| 36 | if ( ! Coeff ) |
---|
| 37 | { |
---|
| 38 | /* Compute and retain the coefficient value, interpolating from |
---|
| 39 | the values in Val and Dev */ |
---|
| 40 | |
---|
| 41 | int i; |
---|
| 42 | |
---|
| 43 | i = 0; |
---|
| 44 | while ( CF > Val[i] ) i++; |
---|
| 45 | |
---|
| 46 | Coeff = Dev[i-1] + |
---|
| 47 | (Dev[i] - Dev[i-1]) * (CF - Val[i-1]) /(Val[i] - Val[i-1]); |
---|
| 48 | Coeff = Coeff * Coeff; |
---|
| 49 | } |
---|
| 50 | |
---|
| 51 | if ( e < 1E-6 ) |
---|
| 52 | { |
---|
| 53 | return N * (1 - exp(log(CF) / N)); |
---|
| 54 | } |
---|
| 55 | else |
---|
| 56 | if ( e < 0.9999 ) |
---|
| 57 | { |
---|
| 58 | Val0 = N * (1 - exp(log(CF) / N)); |
---|
| 59 | return Val0 + e * (AddErrs(N, 1.0) - Val0); |
---|
| 60 | } |
---|
| 61 | else |
---|
| 62 | if ( e + 0.5 >= N ) |
---|
| 63 | { |
---|
| 64 | return 0.67 * (N - e); |
---|
| 65 | } |
---|
| 66 | else |
---|
| 67 | { |
---|
| 68 | Pr = (e + 0.5 + Coeff/2 |
---|
| 69 | + sqrt(Coeff * ((e + 0.5) * (1 - (e + 0.5)/N) + Coeff/4)) ) |
---|
| 70 | / (N + Coeff); |
---|
| 71 | return (N * Pr - e); |
---|
| 72 | } |
---|
| 73 | } |
---|