1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Statistical routines for C4.5 */ |
---|
4 | /* ----------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | |
---|
9 | #include "defns.i" |
---|
10 | #include "types.i" |
---|
11 | #include "extern.i" |
---|
12 | |
---|
13 | |
---|
14 | /*************************************************************************/ |
---|
15 | /* */ |
---|
16 | /* Compute the additional errors if the error rate increases to the */ |
---|
17 | /* upper limit of the confidence level. The coefficient is the */ |
---|
18 | /* square of the number of standard deviations corresponding to the */ |
---|
19 | /* selected confidence level. (Taken from Documenta Geigy Scientific */ |
---|
20 | /* Tables (Sixth Edition), p185 (with modifications).) */ |
---|
21 | /* */ |
---|
22 | /*************************************************************************/ |
---|
23 | |
---|
24 | |
---|
25 | float Val[] = { 0, 0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00}, |
---|
26 | Dev[] = {4.0, 3.09, 2.58, 2.33, 1.65, 1.28, 0.84, 0.25, 0.00}; |
---|
27 | |
---|
28 | |
---|
29 | float AddErrs(N, e) |
---|
30 | /* ------- */ |
---|
31 | ItemCount N, e; |
---|
32 | { |
---|
33 | static float Coeff=0; |
---|
34 | float Val0, Pr; |
---|
35 | |
---|
36 | if ( ! Coeff ) |
---|
37 | { |
---|
38 | /* Compute and retain the coefficient value, interpolating from |
---|
39 | the values in Val and Dev */ |
---|
40 | |
---|
41 | int i; |
---|
42 | |
---|
43 | i = 0; |
---|
44 | while ( CF > Val[i] ) i++; |
---|
45 | |
---|
46 | Coeff = Dev[i-1] + |
---|
47 | (Dev[i] - Dev[i-1]) * (CF - Val[i-1]) /(Val[i] - Val[i-1]); |
---|
48 | Coeff = Coeff * Coeff; |
---|
49 | } |
---|
50 | |
---|
51 | if ( e < 1E-6 ) |
---|
52 | { |
---|
53 | return N * (1 - exp(log(CF) / N)); |
---|
54 | } |
---|
55 | else |
---|
56 | if ( e < 0.9999 ) |
---|
57 | { |
---|
58 | Val0 = N * (1 - exp(log(CF) / N)); |
---|
59 | return Val0 + e * (AddErrs(N, 1.0) - Val0); |
---|
60 | } |
---|
61 | else |
---|
62 | if ( e + 0.5 >= N ) |
---|
63 | { |
---|
64 | return 0.67 * (N - e); |
---|
65 | } |
---|
66 | else |
---|
67 | { |
---|
68 | Pr = (e + 0.5 + Coeff/2 |
---|
69 | + sqrt(Coeff * ((e + 0.5) * (1 - (e + 0.5)/N) + Coeff/4)) ) |
---|
70 | / (N + Coeff); |
---|
71 | return (N * Pr - e); |
---|
72 | } |
---|
73 | } |
---|