1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Evaluation of a test on a discrete valued attribute */ |
---|
4 | /* --------------------------------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | #include "buildex.i" |
---|
9 | |
---|
10 | /*************************************************************************/ |
---|
11 | /* */ |
---|
12 | /* Set Info[] and Gain[] for discrete partition of items Fp to Lp */ |
---|
13 | /* */ |
---|
14 | /*************************************************************************/ |
---|
15 | |
---|
16 | EvalDiscreteAtt(Att, Fp, Lp, Items) |
---|
17 | /* --------------- */ |
---|
18 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; { |
---|
19 | ItemCount KnownItems; |
---|
20 | float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo(); |
---|
21 | |
---|
22 | ComputeFrequencies(Att, Fp, Lp); |
---|
23 | |
---|
24 | KnownItems = Items - ValFreq[0]; |
---|
25 | |
---|
26 | /* Special case when no known values of the attribute */ |
---|
27 | |
---|
28 | if (Items <= ValFreq[0]) { |
---|
29 | Verbosity(2) |
---|
30 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
31 | |
---|
32 | Gain[Att] = -Epsilon; |
---|
33 | Info[Att] = 0.0; |
---|
34 | return; |
---|
35 | } |
---|
36 | |
---|
37 | Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]), |
---|
38 | UnknownRate[Att], MaxAttVal[Att], KnownItems); |
---|
39 | Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items; |
---|
40 | |
---|
41 | Verbosity(2) { |
---|
42 | printf("\tAtt %s", AttName[Att]); |
---|
43 | Verbosity(3) |
---|
44 | PrintDistribution(Att, MaxAttVal[Att], true); |
---|
45 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
46 | } |
---|
47 | |
---|
48 | } |
---|
49 | |
---|
50 | EvalDiscreteAtt_Discr(Att, Fp, Lp, Items, Freq_discr, ValFreq_discr, UnknownRate_discr, the_gain, the_info) |
---|
51 | /* --------------- */ |
---|
52 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr; |
---|
53 | float* the_gain; float* the_info;{ |
---|
54 | ItemCount KnownItems; |
---|
55 | float DiscrKnownBaseInfo_Discr(), ComputeGain_Discr(), TotalInfo(); |
---|
56 | |
---|
57 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr); |
---|
58 | |
---|
59 | KnownItems = Items - ValFreq_discr[0]; |
---|
60 | |
---|
61 | /* Special case when no known values of the attribute */ |
---|
62 | |
---|
63 | //printf("nr threads executing: %d\n", omp_get_num_threads()); |
---|
64 | |
---|
65 | if (Items <= ValFreq_discr[0]) { |
---|
66 | Verbosity(2) |
---|
67 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
68 | |
---|
69 | //*the_gain = -Epsilon; |
---|
70 | Gain[Att] = -Epsilon; |
---|
71 | //*the_info = 0.0; |
---|
72 | Info[Att] = 0.0; |
---|
73 | return; |
---|
74 | } |
---|
75 | |
---|
76 | //*the_gain = ComputeGain_Discr(DiscrKnownBaseInfo_Discr(KnownItems, MaxAttVal[Att], Freq_discr), |
---|
77 | //UnknownRate_discr[Att], MaxAttVal[Att], KnownItems, Freq_discr, ValFreq_discr); |
---|
78 | Gain[Att] = ComputeGain_Discr(DiscrKnownBaseInfo_Discr(KnownItems, MaxAttVal[Att], Freq_discr), |
---|
79 | UnknownRate_discr[Att], MaxAttVal[Att], KnownItems, Freq_discr, ValFreq_discr); |
---|
80 | //*the_info = TotalInfo(ValFreq_discr, 0, MaxAttVal[Att]) / Items; |
---|
81 | Info[Att] = TotalInfo(ValFreq_discr, 0, MaxAttVal[Att]) / Items; |
---|
82 | |
---|
83 | Verbosity(2) { |
---|
84 | printf("\tAtt %s", AttName[Att]); |
---|
85 | Verbosity(3) |
---|
86 | PrintDistribution_Discr(Att, MaxAttVal[Att], true, Freq_discr); |
---|
87 | //printf("\tinf %.3f, gain %.3f\n", *the_info, *the_gain); |
---|
88 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
89 | } |
---|
90 | |
---|
91 | } |
---|
92 | /*************************************************************************/ |
---|
93 | /* */ |
---|
94 | /* Compute frequency tables Freq[][] and ValFreq[] for attribute */ |
---|
95 | /* Att from items Fp to Lp, and set the UnknownRate for Att */ |
---|
96 | /* */ |
---|
97 | /*************************************************************************/ |
---|
98 | |
---|
99 | ComputeFrequencies(Att, Fp, Lp) |
---|
100 | /* ------------------ */ |
---|
101 | Attribute Att;ItemNo Fp, Lp; { |
---|
102 | Description Case; |
---|
103 | ClassNo c; |
---|
104 | DiscrValue v; |
---|
105 | ItemCount CountItems(); |
---|
106 | ItemNo p; |
---|
107 | |
---|
108 | ResetFreq(MaxAttVal[Att], Freq, ValFreq); |
---|
109 | |
---|
110 | /* Determine the frequency of each class amongst cases |
---|
111 | with each possible value for the given attribute */ |
---|
112 | |
---|
113 | ForEach(p, Fp, Lp) { |
---|
114 | Case = Item[p]; |
---|
115 | Freq[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
116 | } |
---|
117 | |
---|
118 | /* Determine the frequency of each possible value for the |
---|
119 | given attribute */ |
---|
120 | |
---|
121 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
122 | ForEach(c, 0, MaxClass) { |
---|
123 | ValFreq[v] += Freq[v][c]; |
---|
124 | } |
---|
125 | } |
---|
126 | |
---|
127 | /* Set the rate of unknown values of the attribute */ |
---|
128 | |
---|
129 | UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp); |
---|
130 | } |
---|
131 | |
---|
132 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr) |
---|
133 | /* ------------------ */ |
---|
134 | Attribute Att;ItemNo Fp, Lp; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr;{ |
---|
135 | Description Case; |
---|
136 | ClassNo c; |
---|
137 | DiscrValue v; |
---|
138 | ItemCount CountItems(); |
---|
139 | ItemNo p; |
---|
140 | |
---|
141 | ResetFreq_discr(MaxAttVal[Att], Freq_discr, ValFreq_discr); |
---|
142 | |
---|
143 | /* Determine the frequency of each class amongst cases |
---|
144 | with each possible value for the given attribute */ |
---|
145 | |
---|
146 | ForEach(p, Fp, Lp) { |
---|
147 | Case = Item[p]; |
---|
148 | |
---|
149 | Freq_discr[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
150 | } |
---|
151 | |
---|
152 | /* Determine the frequency of each possible value for the |
---|
153 | given attribute */ |
---|
154 | |
---|
155 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
156 | ForEach(c, 0, MaxClass) { |
---|
157 | ValFreq_discr[v] += Freq_discr[v][c]; |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | /* Set the rate of unknown values of the attribute */ |
---|
162 | |
---|
163 | UnknownRate_discr[Att] = ValFreq_discr[0] / CountItems(Fp, Lp); |
---|
164 | } |
---|
165 | /*************************************************************************/ |
---|
166 | /* */ |
---|
167 | /* Return the base info for items with known values of a discrete */ |
---|
168 | /* attribute, using the frequency table Freq[][] */ |
---|
169 | /* */ |
---|
170 | /*************************************************************************/ |
---|
171 | |
---|
172 | float DiscrKnownBaseInfo(KnownItems, MaxVal) |
---|
173 | /* ------------------ */ |
---|
174 | DiscrValue MaxVal;ItemCount KnownItems; { |
---|
175 | ClassNo c; |
---|
176 | ItemCount ClassCount; |
---|
177 | double Sum = 0; |
---|
178 | DiscrValue v; |
---|
179 | |
---|
180 | ForEach(c, 0, MaxClass) { |
---|
181 | ClassCount = 0; |
---|
182 | ForEach(v, 1, MaxVal) { |
---|
183 | ClassCount += Freq[v][c]; |
---|
184 | } |
---|
185 | Sum += ClassCount * Log(ClassCount); |
---|
186 | } |
---|
187 | |
---|
188 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
189 | } |
---|
190 | |
---|
191 | float DiscrKnownBaseInfo_Discr(KnownItems, MaxVal, Freq_discr) |
---|
192 | /* ------------------ */ |
---|
193 | DiscrValue MaxVal;ItemCount KnownItems; ItemCount** Freq_discr;{ |
---|
194 | ClassNo c; |
---|
195 | ItemCount ClassCount; |
---|
196 | double Sum = 0; |
---|
197 | DiscrValue v; |
---|
198 | |
---|
199 | ForEach(c, 0, MaxClass) { |
---|
200 | ClassCount = 0; |
---|
201 | ForEach(v, 1, MaxVal) { |
---|
202 | ClassCount += Freq_discr[v][c]; |
---|
203 | } |
---|
204 | Sum += ClassCount * Log(ClassCount); |
---|
205 | } |
---|
206 | |
---|
207 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
208 | } |
---|
209 | |
---|
210 | /*************************************************************************/ |
---|
211 | /* */ |
---|
212 | /* Construct and return a node for a test on a discrete attribute */ |
---|
213 | /* */ |
---|
214 | /*************************************************************************/ |
---|
215 | |
---|
216 | DiscreteTest(Node, Att) |
---|
217 | /* ---------- */ |
---|
218 | Tree Node;Attribute Att; { |
---|
219 | ItemCount CountItems(); |
---|
220 | |
---|
221 | Sprout(Node, MaxAttVal[Att]); |
---|
222 | |
---|
223 | Node->NodeType = BrDiscr; |
---|
224 | Node->Tested = Att; |
---|
225 | Node->Errors = 0; |
---|
226 | } |
---|