[26] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Evaluation of a test on a discrete valued attribute */ |
---|
| 4 | /* --------------------------------------------------- */ |
---|
| 5 | /* */ |
---|
| 6 | /*************************************************************************/ |
---|
| 7 | |
---|
| 8 | #include "buildex.i" |
---|
| 9 | |
---|
| 10 | /*************************************************************************/ |
---|
| 11 | /* */ |
---|
| 12 | /* Set Info[] and Gain[] for discrete partition of items Fp to Lp */ |
---|
| 13 | /* */ |
---|
| 14 | /*************************************************************************/ |
---|
| 15 | |
---|
[68] | 16 | EvalDiscreteAtt(Att, Fp, Lp, Items) |
---|
| 17 | /* --------------- */ |
---|
| 18 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; { |
---|
| 19 | ItemCount KnownItems; |
---|
| 20 | float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo(); |
---|
[26] | 21 | |
---|
[68] | 22 | ComputeFrequencies(Att, Fp, Lp); |
---|
[26] | 23 | |
---|
[68] | 24 | KnownItems = Items - ValFreq[0]; |
---|
[26] | 25 | |
---|
[68] | 26 | /* Special case when no known values of the attribute */ |
---|
[26] | 27 | |
---|
[68] | 28 | if (Items <= ValFreq[0]) { |
---|
| 29 | Verbosity(2) |
---|
| 30 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
[26] | 31 | |
---|
[68] | 32 | Gain[Att] = -Epsilon; |
---|
| 33 | Info[Att] = 0.0; |
---|
| 34 | return; |
---|
| 35 | } |
---|
[26] | 36 | |
---|
[68] | 37 | Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]), |
---|
| 38 | UnknownRate[Att], MaxAttVal[Att], KnownItems); |
---|
| 39 | Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items; |
---|
[26] | 40 | |
---|
[68] | 41 | Verbosity(2) { |
---|
| 42 | printf("\tAtt %s", AttName[Att]); |
---|
| 43 | Verbosity(3) |
---|
| 44 | PrintDistribution(Att, MaxAttVal[Att], true); |
---|
| 45 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
| 46 | } |
---|
[26] | 47 | |
---|
[68] | 48 | } |
---|
[26] | 49 | |
---|
[103] | 50 | EvalDiscreteAtt_Discr(Att, Fp, Lp, Items, Freq_discr, ValFreq_discr, UnknownRate_discr, the_gain, the_info) |
---|
[68] | 51 | /* --------------- */ |
---|
[103] | 52 | Attribute Att;ItemNo Fp, Lp;ItemCount Items; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr; |
---|
| 53 | float* the_gain; float* the_info;{ |
---|
[68] | 54 | ItemCount KnownItems; |
---|
| 55 | float DiscrKnownBaseInfo_Discr(), ComputeGain_Discr(), TotalInfo(); |
---|
[26] | 56 | |
---|
[84] | 57 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr); |
---|
[26] | 58 | |
---|
[84] | 59 | KnownItems = Items - ValFreq_discr[0]; |
---|
[26] | 60 | |
---|
[68] | 61 | /* Special case when no known values of the attribute */ |
---|
| 62 | |
---|
[117] | 63 | //printf("nr threads executing: %d\n", omp_get_num_threads()); |
---|
| 64 | |
---|
[84] | 65 | if (Items <= ValFreq_discr[0]) { |
---|
[68] | 66 | Verbosity(2) |
---|
| 67 | printf("\tAtt %s: no known values\n", AttName[Att]); |
---|
| 68 | |
---|
[117] | 69 | //*the_gain = -Epsilon; |
---|
| 70 | Gain[Att] = -Epsilon; |
---|
| 71 | //*the_info = 0.0; |
---|
| 72 | Info[Att] = 0.0; |
---|
[68] | 73 | return; |
---|
| 74 | } |
---|
| 75 | |
---|
[117] | 76 | //*the_gain = ComputeGain_Discr(DiscrKnownBaseInfo_Discr(KnownItems, MaxAttVal[Att], Freq_discr), |
---|
| 77 | //UnknownRate_discr[Att], MaxAttVal[Att], KnownItems, Freq_discr, ValFreq_discr); |
---|
| 78 | Gain[Att] = ComputeGain_Discr(DiscrKnownBaseInfo_Discr(KnownItems, MaxAttVal[Att], Freq_discr), |
---|
| 79 | UnknownRate_discr[Att], MaxAttVal[Att], KnownItems, Freq_discr, ValFreq_discr); |
---|
| 80 | //*the_info = TotalInfo(ValFreq_discr, 0, MaxAttVal[Att]) / Items; |
---|
| 81 | Info[Att] = TotalInfo(ValFreq_discr, 0, MaxAttVal[Att]) / Items; |
---|
[68] | 82 | |
---|
| 83 | Verbosity(2) { |
---|
| 84 | printf("\tAtt %s", AttName[Att]); |
---|
| 85 | Verbosity(3) |
---|
[84] | 86 | PrintDistribution_Discr(Att, MaxAttVal[Att], true, Freq_discr); |
---|
[117] | 87 | //printf("\tinf %.3f, gain %.3f\n", *the_info, *the_gain); |
---|
| 88 | printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]); |
---|
[68] | 89 | } |
---|
| 90 | |
---|
| 91 | } |
---|
[26] | 92 | /*************************************************************************/ |
---|
| 93 | /* */ |
---|
| 94 | /* Compute frequency tables Freq[][] and ValFreq[] for attribute */ |
---|
| 95 | /* Att from items Fp to Lp, and set the UnknownRate for Att */ |
---|
| 96 | /* */ |
---|
| 97 | /*************************************************************************/ |
---|
| 98 | |
---|
[68] | 99 | ComputeFrequencies(Att, Fp, Lp) |
---|
| 100 | /* ------------------ */ |
---|
| 101 | Attribute Att;ItemNo Fp, Lp; { |
---|
| 102 | Description Case; |
---|
| 103 | ClassNo c; |
---|
| 104 | DiscrValue v; |
---|
| 105 | ItemCount CountItems(); |
---|
| 106 | ItemNo p; |
---|
[26] | 107 | |
---|
[68] | 108 | ResetFreq(MaxAttVal[Att], Freq, ValFreq); |
---|
[26] | 109 | |
---|
[68] | 110 | /* Determine the frequency of each class amongst cases |
---|
| 111 | with each possible value for the given attribute */ |
---|
[26] | 112 | |
---|
[68] | 113 | ForEach(p, Fp, Lp) { |
---|
| 114 | Case = Item[p]; |
---|
| 115 | Freq[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
| 116 | } |
---|
[26] | 117 | |
---|
[68] | 118 | /* Determine the frequency of each possible value for the |
---|
| 119 | given attribute */ |
---|
[26] | 120 | |
---|
[68] | 121 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
| 122 | ForEach(c, 0, MaxClass) { |
---|
| 123 | ValFreq[v] += Freq[v][c]; |
---|
| 124 | } |
---|
[26] | 125 | } |
---|
| 126 | |
---|
[68] | 127 | /* Set the rate of unknown values of the attribute */ |
---|
| 128 | |
---|
| 129 | UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp); |
---|
[26] | 130 | } |
---|
| 131 | |
---|
[84] | 132 | ComputeFrequencies_Discr(Att, Fp, Lp, Freq_discr, ValFreq_discr, UnknownRate_discr) |
---|
[68] | 133 | /* ------------------ */ |
---|
[84] | 134 | Attribute Att;ItemNo Fp, Lp; ItemCount** Freq_discr; ItemCount* ValFreq_discr; float* UnknownRate_discr;{ |
---|
[68] | 135 | Description Case; |
---|
| 136 | ClassNo c; |
---|
| 137 | DiscrValue v; |
---|
| 138 | ItemCount CountItems(); |
---|
| 139 | ItemNo p; |
---|
[26] | 140 | |
---|
[84] | 141 | ResetFreq_discr(MaxAttVal[Att], Freq_discr, ValFreq_discr); |
---|
[26] | 142 | |
---|
[68] | 143 | /* Determine the frequency of each class amongst cases |
---|
| 144 | with each possible value for the given attribute */ |
---|
| 145 | |
---|
| 146 | ForEach(p, Fp, Lp) { |
---|
| 147 | Case = Item[p]; |
---|
[117] | 148 | |
---|
[84] | 149 | Freq_discr[DVal(Case,Att)][Class(Case)] += Weight[p]; |
---|
[68] | 150 | } |
---|
| 151 | |
---|
| 152 | /* Determine the frequency of each possible value for the |
---|
| 153 | given attribute */ |
---|
| 154 | |
---|
| 155 | ForEach(v, 0, MaxAttVal[Att]) { |
---|
| 156 | ForEach(c, 0, MaxClass) { |
---|
[84] | 157 | ValFreq_discr[v] += Freq_discr[v][c]; |
---|
[68] | 158 | } |
---|
| 159 | } |
---|
| 160 | |
---|
| 161 | /* Set the rate of unknown values of the attribute */ |
---|
| 162 | |
---|
[84] | 163 | UnknownRate_discr[Att] = ValFreq_discr[0] / CountItems(Fp, Lp); |
---|
[68] | 164 | } |
---|
[26] | 165 | /*************************************************************************/ |
---|
| 166 | /* */ |
---|
| 167 | /* Return the base info for items with known values of a discrete */ |
---|
| 168 | /* attribute, using the frequency table Freq[][] */ |
---|
| 169 | /* */ |
---|
| 170 | /*************************************************************************/ |
---|
| 171 | |
---|
| 172 | float DiscrKnownBaseInfo(KnownItems, MaxVal) |
---|
[68] | 173 | /* ------------------ */ |
---|
| 174 | DiscrValue MaxVal;ItemCount KnownItems; { |
---|
| 175 | ClassNo c; |
---|
| 176 | ItemCount ClassCount; |
---|
| 177 | double Sum = 0; |
---|
| 178 | DiscrValue v; |
---|
[26] | 179 | |
---|
[68] | 180 | ForEach(c, 0, MaxClass) { |
---|
| 181 | ClassCount = 0; |
---|
| 182 | ForEach(v, 1, MaxVal) { |
---|
| 183 | ClassCount += Freq[v][c]; |
---|
| 184 | } |
---|
| 185 | Sum += ClassCount * Log(ClassCount); |
---|
[26] | 186 | } |
---|
| 187 | |
---|
[68] | 188 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
[26] | 189 | } |
---|
| 190 | |
---|
[84] | 191 | float DiscrKnownBaseInfo_Discr(KnownItems, MaxVal, Freq_discr) |
---|
[68] | 192 | /* ------------------ */ |
---|
[84] | 193 | DiscrValue MaxVal;ItemCount KnownItems; ItemCount** Freq_discr;{ |
---|
[68] | 194 | ClassNo c; |
---|
| 195 | ItemCount ClassCount; |
---|
| 196 | double Sum = 0; |
---|
| 197 | DiscrValue v; |
---|
[26] | 198 | |
---|
[68] | 199 | ForEach(c, 0, MaxClass) { |
---|
| 200 | ClassCount = 0; |
---|
| 201 | ForEach(v, 1, MaxVal) { |
---|
[84] | 202 | ClassCount += Freq_discr[v][c]; |
---|
[68] | 203 | } |
---|
| 204 | Sum += ClassCount * Log(ClassCount); |
---|
| 205 | } |
---|
[26] | 206 | |
---|
[68] | 207 | return (KnownItems * Log(KnownItems) - Sum) / KnownItems; |
---|
| 208 | } |
---|
| 209 | |
---|
[26] | 210 | /*************************************************************************/ |
---|
| 211 | /* */ |
---|
| 212 | /* Construct and return a node for a test on a discrete attribute */ |
---|
| 213 | /* */ |
---|
| 214 | /*************************************************************************/ |
---|
| 215 | |
---|
[68] | 216 | DiscreteTest(Node, Att) |
---|
| 217 | /* ---------- */ |
---|
| 218 | Tree Node;Attribute Att; { |
---|
| 219 | ItemCount CountItems(); |
---|
[26] | 220 | |
---|
[68] | 221 | Sprout(Node, MaxAttVal[Att]); |
---|
[26] | 222 | |
---|
[68] | 223 | Node->NodeType = BrDiscr; |
---|
| 224 | Node->Tested = Att; |
---|
| 225 | Node->Errors = 0; |
---|
[26] | 226 | } |
---|