Changeset 69
- Timestamp:
- Jan 7, 2010, 9:02:17 AM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
proiecte/Parallel-DT/R8/Src/info.c
r26 r69 5 5 /* */ 6 6 /*************************************************************************/ 7 8 7 9 8 #include "buildex.i" … … 28 27 /*************************************************************************/ 29 28 30 31 29 float Worth(ThisInfo, ThisGain, MinGain) 32 /* ----- */ 33 float ThisInfo, ThisGain, MinGain; 34 { 35 if ( GAINRATIO ) 36 { 37 if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon ) 38 { 39 return ThisGain / ThisInfo; 40 } 41 else 42 { 43 return -Epsilon; 44 } 45 } 46 else 47 { 48 return ( ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon ); 49 } 50 } 51 52 30 /* ----- */ 31 float ThisInfo, ThisGain, MinGain; { 32 if (GAINRATIO) { 33 if (ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon) { 34 return ThisGain / ThisInfo; 35 } else { 36 return -Epsilon; 37 } 38 } else { 39 return (ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon); 40 } 41 } 53 42 54 43 /*************************************************************************/ … … 58 47 /*************************************************************************/ 59 48 60 61 ResetFreq(MaxVal) 62 /* --------- */ 63 DiscrValue MaxVal; 64 { 65 DiscrValue v; 66 ClassNo c; 67 68 ForEach(v, 0, MaxVal) 69 { 70 ForEach(c, 0, MaxClass) 71 { 72 Freq[v][c] = 0; 73 } 74 ValFreq[v] = 0; 75 } 76 } 77 78 49 ResetFreq(MaxVal, Freq, ValFreq) 50 /* --------- */ 51 DiscrValue MaxVal; ItemCount** Freq; ItemCount* ValFreq; { 52 DiscrValue v; 53 ClassNo c; 54 55 #pragma omp parallel for private(v) 56 ForEach(v, 0, MaxVal) { 57 ForEach(c, 0, MaxClass) { 58 Freq[v][c] = 0; 59 } 60 ValFreq[v] = 0; 61 } 62 } 79 63 80 64 /*************************************************************************/ … … 97 81 /*************************************************************************/ 98 82 99 100 83 float ComputeGain(BaseInfo, UnknFrac, MaxVal, TotalItems) 101 /* ----------- */ 102 float BaseInfo, UnknFrac; 103 DiscrValue MaxVal; 104 ItemCount TotalItems; 105 { 106 DiscrValue v; 107 float ThisInfo=0.0, ThisGain, TotalInfo(); 108 short ReasonableSubsets=0; 109 110 /* Check whether all values are unknown or the same */ 111 112 if ( ! TotalItems ) return -Epsilon; 113 114 /* There must be at least two subsets with MINOBJS items */ 115 116 ForEach(v, 1, MaxVal) 117 { 118 if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++; 119 } 120 if ( ReasonableSubsets < 2 ) return -Epsilon; 121 122 /* Compute total info after split, by summing the 123 info of each of the subsets formed by the test */ 124 125 ForEach(v, 1, MaxVal) 126 { 127 ThisInfo += TotalInfo(Freq[v], 0, MaxClass); 128 } 129 130 /* Set the gain in information for all items, adjusted for unknowns */ 131 132 ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems); 133 134 Verbosity(5) 135 printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n", 136 TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain); 137 138 return ThisGain; 139 } 140 141 84 /* ----------- */ 85 float BaseInfo, UnknFrac;DiscrValue MaxVal;ItemCount TotalItems; { 86 DiscrValue v; 87 float ThisInfo = 0.0, ThisGain, TotalInfo(); 88 short ReasonableSubsets = 0; 89 90 /* Check whether all values are unknown or the same */ 91 92 if (!TotalItems) 93 return -Epsilon; 94 95 /* There must be at least two subsets with MINOBJS items */ 96 97 ForEach(v, 1, MaxVal) { 98 if (ValFreq[v] >= MINOBJS) 99 ReasonableSubsets++; 100 } 101 if (ReasonableSubsets < 2) 102 return -Epsilon; 103 104 /* Compute total info after split, by summing the 105 info of each of the subsets formed by the test */ 106 107 //#pragma omp parallel for reduction(+:ThisInfo) 108 ForEach(v, 1, MaxVal) { 109 ThisInfo += TotalInfo(Freq[v], 0, MaxClass); 110 } 111 112 /* Set the gain in information for all items, adjusted for unknowns */ 113 114 ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems); 115 116 Verbosity(5) 117 printf( 118 "ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n", 119 TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain); 120 121 return ThisGain; 122 } 123 124 float ComputeGain_Discr(BaseInfo, UnknFrac, MaxVal, TotalItems, Freq, ValFreq) 125 /* ----------- */ 126 float BaseInfo, UnknFrac;DiscrValue MaxVal;ItemCount TotalItems; ItemCount** Freq; ItemCount* ValFreq; { 127 DiscrValue v; 128 float ThisInfo = 0.0, ThisGain, TotalInfo(); 129 short ReasonableSubsets = 0; 130 131 /* Check whether all values are unknown or the same */ 132 133 if (!TotalItems) 134 return -Epsilon; 135 136 /* There must be at least two subsets with MINOBJS items */ 137 138 ForEach(v, 1, MaxVal) { 139 if (ValFreq[v] >= MINOBJS) 140 ReasonableSubsets++; 141 } 142 if (ReasonableSubsets < 2) 143 return -Epsilon; 144 145 /* Compute total info after split, by summing the 146 info of each of the subsets formed by the test */ 147 148 ForEach(v, 1, MaxVal) { 149 ThisInfo += TotalInfo(Freq[v], 0, MaxClass); 150 } 151 152 /* Set the gain in information for all items, adjusted for unknowns */ 153 154 ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems); 155 156 Verbosity(5) 157 printf( 158 "ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n", 159 TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain); 160 161 return ThisGain; 162 } 142 163 143 164 /*************************************************************************/ … … 147 168 /*************************************************************************/ 148 169 149 150 170 float TotalInfo(V, MinVal, MaxVal) 151 /* --------- */ 152 ItemCount V[]; 153 DiscrValue MinVal, MaxVal; 154 { 155 DiscrValue v; 156 float Sum=0.0; 157 ItemCount N, TotalItems=0; 158 159 ForEach(v, MinVal, MaxVal) 160 { 161 N = V[v]; 162 163 Sum += N * Log(N); 164 TotalItems += N; 165 } 166 167 return TotalItems * Log(TotalItems) - Sum; 168 } 169 170 171 /* --------- */ 172 ItemCount V[];DiscrValue MinVal, MaxVal; { 173 DiscrValue v; 174 float Sum = 0.0; 175 ItemCount N, TotalItems = 0; 176 177 ForEach(v, MinVal, MaxVal) { 178 N = V[v]; 179 180 Sum += N * Log(N); 181 TotalItems += N; 182 } 183 184 return TotalItems * Log(TotalItems) - Sum; 185 } 171 186 172 187 /*************************************************************************/ … … 176 191 /*************************************************************************/ 177 192 178 179 PrintDistribution(Att, MaxVal, ShowNames) 180 /* ----------------- */ 181 Attribute Att; 182 DiscrValue MaxVal; 183 Boolean ShowNames; 184 { 185 DiscrValue v; 186 ClassNo c; 187 String Val; 188 189 printf("\n\t\t\t "); 190 ForEach(c, 0, MaxClass) 191 { 192 printf("%7.6s", ClassName[c]); 193 } 194 printf("\n"); 195 196 ForEach(v, 0, MaxVal) 197 { 198 if ( ShowNames ) 199 { 200 Val = ( !v ? "unknown" : 201 MaxAttVal[Att] ? AttValName[Att][v] : 202 v == 1 ? "below" : "above" ); 203 printf("\t\t[%-7.7s:", Val); 204 } 205 else 206 { 207 printf("\t\t[%-7d:", v); 208 } 209 210 ForEach(c, 0, MaxClass) 211 { 212 printf(" %6.1f", Freq[v][c]); 213 } 214 215 printf("]\n"); 216 } 217 } 193 PrintDistribution(Att, MaxVal, ShowNames) 194 /* ----------------- */ 195 Attribute Att;DiscrValue MaxVal;Boolean ShowNames; { 196 DiscrValue v; 197 ClassNo c; 198 String Val; 199 200 printf("\n\t\t\t "); 201 ForEach(c, 0, MaxClass) { 202 printf("%7.6s", ClassName[c]); 203 } 204 printf("\n"); 205 206 ForEach(v, 0, MaxVal) { 207 if (ShowNames) { 208 Val = (!v ? "unknown" : MaxAttVal[Att] ? AttValName[Att][v] : v 209 == 1 ? "below" : "above"); 210 printf("\t\t[%-7.7s:", Val); 211 } else { 212 printf("\t\t[%-7d:", v); 213 } 214 215 ForEach(c, 0, MaxClass) { 216 printf(" %6.1f", Freq[v][c]); 217 } 218 219 printf("]\n"); 220 } 221 } 222 223 PrintDistribution_Discr(Att, MaxVal, ShowNames, Freq) 224 /* ----------------- */ 225 Attribute Att;DiscrValue MaxVal;Boolean ShowNames; ItemCount** Freq;{ 226 DiscrValue v; 227 ClassNo c; 228 String Val; 229 230 printf("\n\t\t\t "); 231 ForEach(c, 0, MaxClass) { 232 printf("%7.6s", ClassName[c]); 233 } 234 printf("\n"); 235 236 ForEach(v, 0, MaxVal) { 237 if (ShowNames) { 238 Val = (!v ? "unknown" : MaxAttVal[Att] ? AttValName[Att][v] : v 239 == 1 ? "below" : "above"); 240 printf("\t\t[%-7.7s:", Val); 241 } else { 242 printf("\t\t[%-7d:", v); 243 } 244 245 ForEach(c, 0, MaxClass) { 246 printf(" %6.1f", Freq[v][c]); 247 } 248 249 printf("]\n"); 250 } 251 }
Note: See TracChangeset
for help on using the changeset viewer.