1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Evaluation of a test on a discrete valued attribute */ |
---|
4 | /* --------------------------------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | #include "buildex.i" |
---|
9 | |
---|
10 | /*************************************************************************/ |
---|
11 | /* */ |
---|
12 | /* Set Info[] and Gain[] for discrete partition of items Fp to Lp */ |
---|
13 | /* */ |
---|
14 | /*************************************************************************/ |
---|
15 | |
---|
16 | EvalDiscreteAtt(Att, Fp, Lp, Items) |
---|
17 | /* --------------- */ |
---|
18 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; { |
---|
19 | ItemCount KnownItems; |
---|
20 | float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo(); |
---|
21 | |
---|
22 | ComputeFrequencies(Att, Fp, Lp); |
---|
23 | |
---|
24 | KnownItems = Items - ValFreq[0]; |
---|
25 | |
---|
26 | /* Special case when no known values of the attribute */ |
---|
27 | |
---|
28 | if (Items <= ValFreq[0]) { |
---|
29 | Verbosity(2) |
---|
30 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
31 | |
---|
32 | Gain[Att] = -Epsilon; |
---|
33 | Info[Att] = 0.0; |
---|
34 | return; |
---|
35 | } |
---|
36 | |
---|
37 | Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]), |
---|
38 | UnknownRate[Att], MaxAttVal[Att], KnownItems); |
---|
39 | Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items; |
---|
40 | |
---|
41 | Verbosity(2) { |
---|
42 | printf("\tAtt %s", AttName[Att]); |
---|
43 | Verbosity(3) |
---|
44 | PrintDistribution(Att, MaxAttVal[Att], true); |
---|
45 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
46 | } |
---|
47 | |
---|
48 | } |
---|
49 | |
---|
50 | EvalDiscreteAtt_Discr(Att, Fp, Lp, Items, Freq_discr, ValFreq_discr, UnknownRate_discr, the_gain, the_info) |
---|
51 | /* --------------- */ |
---|
52 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr; |
---|
53 | float* the_gain; float* the_info;{ |
---|
54 | ItemCount KnownItems; |
---|
55 | float DiscrKnownBaseInfo_Discr(), ComputeGain_Discr(), TotalInfo(); |
---|
56 | |
---|
57 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr); |
---|
58 | |
---|
59 | KnownItems = Items - ValFreq_discr[0]; |
---|
60 | |
---|
61 | /* Special case when no known values of the attribute */ |
---|
62 | |
---|
63 | if (Items <= ValFreq_discr[0]) { |
---|
64 | Verbosity(2) |
---|
65 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
66 | |
---|
67 | *the_gain = -Epsilon; |
---|
68 | *the_info = 0.0; |
---|
69 | return; |
---|
70 | } |
---|
71 | |
---|
72 | *the_gain = ComputeGain_Discr(DiscrKnownBaseInfo_Discr(KnownItems, MaxAttVal[Att], Freq_discr), |
---|
73 | UnknownRate_discr[Att], MaxAttVal[Att], KnownItems, Freq_discr, ValFreq_discr); |
---|
74 | *the_info = TotalInfo(ValFreq_discr, 0, MaxAttVal[Att]) / Items; |
---|
75 | |
---|
76 | Verbosity(2) { |
---|
77 | printf("\tAtt %s", AttName[Att]); |
---|
78 | Verbosity(3) |
---|
79 | PrintDistribution_Discr(Att, MaxAttVal[Att], true, Freq_discr); |
---|
80 | printf("\tinf %.3f, gain %.3f\n", *the_info, *the_gain); |
---|
81 | } |
---|
82 | |
---|
83 | } |
---|
84 | /*************************************************************************/ |
---|
85 | /* */ |
---|
86 | /* Compute frequency tables Freq[][] and ValFreq[] for attribute */ |
---|
87 | /* Att from items Fp to Lp, and set the UnknownRate for Att */ |
---|
88 | /* */ |
---|
89 | /*************************************************************************/ |
---|
90 | |
---|
91 | ComputeFrequencies(Att, Fp, Lp) |
---|
92 | /* ------------------ */ |
---|
93 | Attribute Att;ItemNo Fp, Lp; { |
---|
94 | Description Case; |
---|
95 | ClassNo c; |
---|
96 | DiscrValue v; |
---|
97 | ItemCount CountItems(); |
---|
98 | ItemNo p; |
---|
99 | |
---|
100 | ResetFreq(MaxAttVal[Att], Freq, ValFreq); |
---|
101 | |
---|
102 | /* Determine the frequency of each class amongst cases |
---|
103 | with each possible value for the given attribute */ |
---|
104 | |
---|
105 | ForEach(p, Fp, Lp) { |
---|
106 | Case = Item[p]; |
---|
107 | Freq[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
108 | } |
---|
109 | |
---|
110 | /* Determine the frequency of each possible value for the |
---|
111 | given attribute */ |
---|
112 | |
---|
113 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
114 | ForEach(c, 0, MaxClass) { |
---|
115 | ValFreq[v] += Freq[v][c]; |
---|
116 | } |
---|
117 | } |
---|
118 | |
---|
119 | /* Set the rate of unknown values of the attribute */ |
---|
120 | |
---|
121 | UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp); |
---|
122 | } |
---|
123 | |
---|
124 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr) |
---|
125 | /* ------------------ */ |
---|
126 | Attribute Att;ItemNo Fp, Lp; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr;{ |
---|
127 | Description Case; |
---|
128 | ClassNo c; |
---|
129 | DiscrValue v; |
---|
130 | ItemCount CountItems(); |
---|
131 | ItemNo p; |
---|
132 | |
---|
133 | ResetFreq_discr(MaxAttVal[Att], Freq_discr, ValFreq_discr); |
---|
134 | |
---|
135 | /* Determine the frequency of each class amongst cases |
---|
136 | with each possible value for the given attribute */ |
---|
137 | |
---|
138 | ForEach(p, Fp, Lp) { |
---|
139 | Case = Item[p]; |
---|
140 | Freq_discr[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
141 | } |
---|
142 | |
---|
143 | /* Determine the frequency of each possible value for the |
---|
144 | given attribute */ |
---|
145 | |
---|
146 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
147 | ForEach(c, 0, MaxClass) { |
---|
148 | ValFreq_discr[v] += Freq_discr[v][c]; |
---|
149 | } |
---|
150 | } |
---|
151 | |
---|
152 | /* Set the rate of unknown values of the attribute */ |
---|
153 | |
---|
154 | UnknownRate_discr[Att] = ValFreq_discr[0] / CountItems(Fp, Lp); |
---|
155 | } |
---|
156 | /*************************************************************************/ |
---|
157 | /* */ |
---|
158 | /* Return the base info for items with known values of a discrete */ |
---|
159 | /* attribute, using the frequency table Freq[][] */ |
---|
160 | /* */ |
---|
161 | /*************************************************************************/ |
---|
162 | |
---|
163 | float DiscrKnownBaseInfo(KnownItems, MaxVal) |
---|
164 | /* ------------------ */ |
---|
165 | DiscrValue MaxVal;ItemCount KnownItems; { |
---|
166 | ClassNo c; |
---|
167 | ItemCount ClassCount; |
---|
168 | double Sum = 0; |
---|
169 | DiscrValue v; |
---|
170 | |
---|
171 | ForEach(c, 0, MaxClass) { |
---|
172 | ClassCount = 0; |
---|
173 | ForEach(v, 1, MaxVal) { |
---|
174 | ClassCount += Freq[v][c]; |
---|
175 | } |
---|
176 | Sum += ClassCount * Log(ClassCount); |
---|
177 | } |
---|
178 | |
---|
179 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
180 | } |
---|
181 | |
---|
182 | float DiscrKnownBaseInfo_Discr(KnownItems, MaxVal, Freq_discr) |
---|
183 | /* ------------------ */ |
---|
184 | DiscrValue MaxVal;ItemCount KnownItems; ItemCount** Freq_discr;{ |
---|
185 | ClassNo c; |
---|
186 | ItemCount ClassCount; |
---|
187 | double Sum = 0; |
---|
188 | DiscrValue v; |
---|
189 | |
---|
190 | ForEach(c, 0, MaxClass) { |
---|
191 | ClassCount = 0; |
---|
192 | ForEach(v, 1, MaxVal) { |
---|
193 | ClassCount += Freq_discr[v][c]; |
---|
194 | } |
---|
195 | Sum += ClassCount * Log(ClassCount); |
---|
196 | } |
---|
197 | |
---|
198 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
199 | } |
---|
200 | |
---|
201 | /*************************************************************************/ |
---|
202 | /* */ |
---|
203 | /* Construct and return a node for a test on a discrete attribute */ |
---|
204 | /* */ |
---|
205 | /*************************************************************************/ |
---|
206 | |
---|
207 | DiscreteTest(Node, Att) |
---|
208 | /* ---------- */ |
---|
209 | Tree Node;Attribute Att; { |
---|
210 | ItemCount CountItems(); |
---|
211 | |
---|
212 | Sprout(Node, MaxAttVal[Att]); |
---|
213 | |
---|
214 | Node->NodeType = BrDiscr; |
---|
215 | Node->Tested = Att; |
---|
216 | Node->Errors = 0; |
---|
217 | } |
---|