1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Evaluation of a test on a discrete valued attribute */ |
---|
4 | /* --------------------------------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | |
---|
9 | #include "buildex.i" |
---|
10 | |
---|
11 | |
---|
12 | /*************************************************************************/ |
---|
13 | /* */ |
---|
14 | /* Set Info[] and Gain[] for discrete partition of items Fp to Lp */ |
---|
15 | /* */ |
---|
16 | /*************************************************************************/ |
---|
17 | |
---|
18 | |
---|
19 | EvalDiscreteAtt(Att, Fp, Lp, Items) |
---|
20 | /* --------------- */ |
---|
21 | Attribute Att; |
---|
22 | ItemNo Fp, Lp; |
---|
23 | ItemCount Items; |
---|
24 | { |
---|
25 | ItemCount KnownItems; |
---|
26 | float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo(); |
---|
27 | |
---|
28 | ComputeFrequencies(Att, Fp, Lp); |
---|
29 | |
---|
30 | KnownItems = Items - ValFreq[0]; |
---|
31 | |
---|
32 | /* Special case when no known values of the attribute */ |
---|
33 | |
---|
34 | if ( Items <= ValFreq[0] ) |
---|
35 | { |
---|
36 | Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
37 | |
---|
38 | Gain[Att] = -Epsilon; |
---|
39 | Info[Att] = 0.0; |
---|
40 | return; |
---|
41 | } |
---|
42 | |
---|
43 | Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]), |
---|
44 | UnknownRate[Att], MaxAttVal[Att], KnownItems); |
---|
45 | Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items; |
---|
46 | |
---|
47 | Verbosity(2) |
---|
48 | { |
---|
49 | printf("\tAtt %s", AttName[Att]); |
---|
50 | Verbosity(3) PrintDistribution(Att, MaxAttVal[Att], true); |
---|
51 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
52 | } |
---|
53 | |
---|
54 | } |
---|
55 | |
---|
56 | |
---|
57 | |
---|
58 | /*************************************************************************/ |
---|
59 | /* */ |
---|
60 | /* Compute frequency tables Freq[][] and ValFreq[] for attribute */ |
---|
61 | /* Att from items Fp to Lp, and set the UnknownRate for Att */ |
---|
62 | /* */ |
---|
63 | /*************************************************************************/ |
---|
64 | |
---|
65 | |
---|
66 | ComputeFrequencies(Att, Fp, Lp) |
---|
67 | /* ------------------ */ |
---|
68 | Attribute Att; |
---|
69 | ItemNo Fp, Lp; |
---|
70 | { |
---|
71 | Description Case; |
---|
72 | ClassNo c; |
---|
73 | DiscrValue v; |
---|
74 | ItemCount CountItems(); |
---|
75 | ItemNo p; |
---|
76 | |
---|
77 | ResetFreq(MaxAttVal[Att]); |
---|
78 | |
---|
79 | /* Determine the frequency of each class amongst cases |
---|
80 | with each possible value for the given attribute */ |
---|
81 | |
---|
82 | ForEach(p, Fp, Lp) |
---|
83 | { |
---|
84 | Case = Item[p]; |
---|
85 | Freq[ DVal(Case,Att) ][ Class(Case) ] += Weight[p]; |
---|
86 | } |
---|
87 | |
---|
88 | /* Determine the frequency of each possible value for the |
---|
89 | given attribute */ |
---|
90 | |
---|
91 | ForEach(v, 0, MaxAttVal[Att]) |
---|
92 | { |
---|
93 | ForEach(c, 0, MaxClass) |
---|
94 | { |
---|
95 | ValFreq[v] += Freq[v][c]; |
---|
96 | } |
---|
97 | } |
---|
98 | |
---|
99 | /* Set the rate of unknown values of the attribute */ |
---|
100 | |
---|
101 | UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp); |
---|
102 | } |
---|
103 | |
---|
104 | |
---|
105 | |
---|
106 | /*************************************************************************/ |
---|
107 | /* */ |
---|
108 | /* Return the base info for items with known values of a discrete */ |
---|
109 | /* attribute, using the frequency table Freq[][] */ |
---|
110 | /* */ |
---|
111 | /*************************************************************************/ |
---|
112 | |
---|
113 | |
---|
114 | float DiscrKnownBaseInfo(KnownItems, MaxVal) |
---|
115 | /* ------------------ */ |
---|
116 | DiscrValue MaxVal; |
---|
117 | ItemCount KnownItems; |
---|
118 | { |
---|
119 | ClassNo c; |
---|
120 | ItemCount ClassCount; |
---|
121 | double Sum=0; |
---|
122 | DiscrValue v; |
---|
123 | |
---|
124 | ForEach(c, 0, MaxClass) |
---|
125 | { |
---|
126 | ClassCount = 0; |
---|
127 | ForEach(v, 1, MaxVal) |
---|
128 | { |
---|
129 | ClassCount += Freq[v][c]; |
---|
130 | } |
---|
131 | Sum += ClassCount * Log(ClassCount); |
---|
132 | } |
---|
133 | |
---|
134 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
135 | } |
---|
136 | |
---|
137 | |
---|
138 | |
---|
139 | /*************************************************************************/ |
---|
140 | /* */ |
---|
141 | /* Construct and return a node for a test on a discrete attribute */ |
---|
142 | /* */ |
---|
143 | /*************************************************************************/ |
---|
144 | |
---|
145 | |
---|
146 | DiscreteTest(Node, Att) |
---|
147 | /* ---------- */ |
---|
148 | Tree Node; |
---|
149 | Attribute Att; |
---|
150 | { |
---|
151 | ItemCount CountItems(); |
---|
152 | |
---|
153 | Sprout(Node, MaxAttVal[Att]); |
---|
154 | |
---|
155 | Node->NodeType = BrDiscr; |
---|
156 | Node->Tested = Att; |
---|
157 | Node->Errors = 0; |
---|
158 | } |
---|