[26] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Evaluatation of rulesets */ |
---|
| 4 | /* ------------------------ */ |
---|
| 5 | /* */ |
---|
| 6 | /*************************************************************************/ |
---|
| 7 | |
---|
| 8 | |
---|
| 9 | #include "defns.i" |
---|
| 10 | #include "types.i" |
---|
| 11 | #include "extern.i" |
---|
| 12 | #include "rulex.i" |
---|
| 13 | |
---|
| 14 | |
---|
| 15 | |
---|
| 16 | /*************************************************************************/ |
---|
| 17 | /* */ |
---|
| 18 | /* Evaluate all rulesets */ |
---|
| 19 | /* */ |
---|
| 20 | /*************************************************************************/ |
---|
| 21 | |
---|
| 22 | |
---|
| 23 | EvaluateRulesets(DeleteRules) |
---|
| 24 | /* ---------------- */ |
---|
| 25 | Boolean DeleteRules; |
---|
| 26 | { |
---|
| 27 | short t; |
---|
| 28 | ItemNo *Errors, Interpret(); |
---|
| 29 | float AvSize=0, AvErrs=0; |
---|
| 30 | Boolean Final; |
---|
| 31 | |
---|
| 32 | if ( TRIALS == 1 ) |
---|
| 33 | { |
---|
| 34 | /* Evaluate current ruleset as there is no composite ruleset */ |
---|
| 35 | |
---|
| 36 | Interpret(0, MaxItem, DeleteRules, true, true); |
---|
| 37 | return; |
---|
| 38 | } |
---|
| 39 | |
---|
| 40 | Errors = (ItemNo *) malloc((TRIALS+1) * sizeof(ItemNo)); |
---|
| 41 | |
---|
| 42 | ForEach(t, 0, TRIALS) |
---|
| 43 | { |
---|
| 44 | NRules = PRSet[t].SNRules; |
---|
| 45 | Rule = PRSet[t].SRule; |
---|
| 46 | RuleIndex = PRSet[t].SRuleIndex; |
---|
| 47 | DefaultClass = PRSet[t].SDefaultClass; |
---|
| 48 | |
---|
| 49 | if ( t < TRIALS ) |
---|
| 50 | { |
---|
| 51 | printf("\nRuleset %d:\n", t); |
---|
| 52 | } |
---|
| 53 | else |
---|
| 54 | { |
---|
| 55 | printf("\nComposite ruleset:\n"); |
---|
| 56 | } |
---|
| 57 | |
---|
| 58 | Final = (t == TRIALS); |
---|
| 59 | Errors[t] = Interpret(0, MaxItem, DeleteRules, Final, Final); |
---|
| 60 | |
---|
| 61 | AvSize += NRules; |
---|
| 62 | AvErrs += Errors[t]; |
---|
| 63 | |
---|
| 64 | if ( DeleteRules ) |
---|
| 65 | { |
---|
| 66 | PRSet[t].SNRules = NRules; |
---|
| 67 | } |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | /* Print report */ |
---|
| 71 | |
---|
| 72 | printf("\n"); |
---|
| 73 | printf("Trial Size Errors\n"); |
---|
| 74 | printf("----- ---- ------\n"); |
---|
| 75 | |
---|
| 76 | ForEach(t, 0, TRIALS) |
---|
| 77 | { |
---|
| 78 | if ( t < TRIALS ) |
---|
| 79 | { |
---|
| 80 | printf("%4d", t); |
---|
| 81 | } |
---|
| 82 | else |
---|
| 83 | { |
---|
| 84 | printf(" **"); |
---|
| 85 | } |
---|
| 86 | printf(" %4d %3d(%4.1f%%)\n", |
---|
| 87 | PRSet[t].SNRules, Errors[t], 100 * Errors[t] / (MaxItem+1.0)); |
---|
| 88 | } |
---|
| 89 | |
---|
| 90 | AvSize /= TRIALS + 1; |
---|
| 91 | AvErrs /= TRIALS + 1; |
---|
| 92 | printf("\t\t\t\tAv size = %.1f, av errors = %.1f (%.1f%%)\n", |
---|
| 93 | AvSize, AvErrs, 100 * AvErrs / (MaxItem+1.0)); |
---|
| 94 | } |
---|
| 95 | |
---|
| 96 | |
---|
| 97 | |
---|
| 98 | /*************************************************************************/ |
---|
| 99 | /* */ |
---|
| 100 | /* Evaluate current ruleset */ |
---|
| 101 | /* */ |
---|
| 102 | /*************************************************************************/ |
---|
| 103 | |
---|
| 104 | |
---|
| 105 | float Confidence; /* certainty factor of fired rule */ |
---|
| 106 | /* (set by BestRuleIndex) */ |
---|
| 107 | |
---|
| 108 | |
---|
| 109 | ItemNo Interpret(Fp, Lp, DeleteRules, CMInfo, Arrow) |
---|
| 110 | /* --------- */ |
---|
| 111 | ItemNo Fp, Lp; |
---|
| 112 | Boolean DeleteRules, CMInfo, Arrow; |
---|
| 113 | { |
---|
| 114 | ItemNo i, Tested=0, Errors=0, *Better, *Worse, *ConfusionMat; |
---|
| 115 | Boolean FoundRule; |
---|
| 116 | ClassNo AssignedClass, AltClass; |
---|
| 117 | Attribute Att; |
---|
| 118 | RuleNo p, Bestr, ri, ri2, riDrop=0, BestRuleIndex(); |
---|
| 119 | float ErrorRate, BestRuleConfidence; |
---|
| 120 | |
---|
| 121 | if ( CMInfo ) |
---|
| 122 | { |
---|
| 123 | ConfusionMat = (ItemNo *) calloc((MaxClass+1)*(MaxClass+1), sizeof(ItemNo)); |
---|
| 124 | } |
---|
| 125 | |
---|
| 126 | ForEach(ri, 1, NRules) |
---|
| 127 | { |
---|
| 128 | p = RuleIndex[ri]; |
---|
| 129 | Rule[p].Used = Rule[p].Incorrect = 0; |
---|
| 130 | } |
---|
| 131 | |
---|
| 132 | Better = (ItemNo *) calloc(NRules+1, sizeof(ItemNo)); |
---|
| 133 | Worse = (ItemNo *) calloc(NRules+1, sizeof(ItemNo)); |
---|
| 134 | |
---|
| 135 | ForEach(i, Fp, Lp) |
---|
| 136 | { |
---|
| 137 | /* Find first choice for rule for this item */ |
---|
| 138 | |
---|
| 139 | ri = BestRuleIndex(Item[i], 1); |
---|
| 140 | Bestr = ( ri ? RuleIndex[ri] : 0 ); |
---|
| 141 | FoundRule = Bestr > 0; |
---|
| 142 | |
---|
| 143 | if ( FoundRule ) |
---|
| 144 | { |
---|
| 145 | Rule[Bestr].Used++; |
---|
| 146 | AssignedClass = Rule[Bestr].Rhs; |
---|
| 147 | BestRuleConfidence = Confidence; |
---|
| 148 | |
---|
| 149 | /* Now find second choice */ |
---|
| 150 | |
---|
| 151 | ri2 = BestRuleIndex(Item[i], ri+1); |
---|
| 152 | AltClass = ( ri2 ? Rule[RuleIndex[ri2]].Rhs : DefaultClass ); |
---|
| 153 | if ( AltClass != AssignedClass ) |
---|
| 154 | { |
---|
| 155 | if ( AssignedClass == Class(Item[i]) ) |
---|
| 156 | { |
---|
| 157 | Better[ri]++; |
---|
| 158 | } |
---|
| 159 | else |
---|
| 160 | if ( AltClass == Class(Item[i]) ) |
---|
| 161 | { |
---|
| 162 | Worse[ri]++; |
---|
| 163 | } |
---|
| 164 | } |
---|
| 165 | } |
---|
| 166 | else |
---|
| 167 | { |
---|
| 168 | AssignedClass = DefaultClass; |
---|
| 169 | } |
---|
| 170 | |
---|
| 171 | if ( CMInfo ) |
---|
| 172 | { |
---|
| 173 | ConfusionMat[Class(Item[i])*(MaxClass+1)+AssignedClass]++; |
---|
| 174 | } |
---|
| 175 | Tested++; |
---|
| 176 | |
---|
| 177 | if ( AssignedClass != Class(Item[i]) ) |
---|
| 178 | { |
---|
| 179 | Errors++; |
---|
| 180 | if ( FoundRule ) Rule[Bestr].Incorrect++; |
---|
| 181 | |
---|
| 182 | Verbosity(3) |
---|
| 183 | { |
---|
| 184 | printf("\n"); |
---|
| 185 | ForEach(Att, 0, MaxAtt) |
---|
| 186 | { |
---|
| 187 | printf("\t%s: ", AttName[Att]); |
---|
| 188 | if ( MaxAttVal[Att] ) |
---|
| 189 | { |
---|
| 190 | if ( DVal(Item[i],Att) ) |
---|
| 191 | { |
---|
| 192 | printf("%s\n", AttValName[Att][DVal(Item[i],Att)]); |
---|
| 193 | } |
---|
| 194 | else |
---|
| 195 | { |
---|
| 196 | printf("?\n"); |
---|
| 197 | } |
---|
| 198 | } |
---|
| 199 | else |
---|
| 200 | { |
---|
| 201 | if ( CVal(Item[i],Att) != Unknown ) |
---|
| 202 | { |
---|
| 203 | printf("%g\n", CVal(Item[i],Att)); |
---|
| 204 | } |
---|
| 205 | else |
---|
| 206 | { |
---|
| 207 | printf("?\n"); |
---|
| 208 | } |
---|
| 209 | } |
---|
| 210 | } |
---|
| 211 | printf("\t%4d:\tGiven class %s,", i, ClassName[Class(Item[i])]); |
---|
| 212 | if ( FoundRule ) |
---|
| 213 | { |
---|
| 214 | printf(" rule %d [%.1f%%] gives class ", |
---|
| 215 | Bestr, 100 * BestRuleConfidence); |
---|
| 216 | } |
---|
| 217 | else |
---|
| 218 | { |
---|
| 219 | printf(" default class "); |
---|
| 220 | } |
---|
| 221 | printf("%s\n", ClassName[AssignedClass]); |
---|
| 222 | } |
---|
| 223 | } |
---|
| 224 | } |
---|
| 225 | |
---|
| 226 | printf("\nRule Size Error Used Wrong\t Advantage\n"); |
---|
| 227 | printf( "---- ---- ----- ---- -----\t ---------\n"); |
---|
| 228 | ForEach(ri, 1, NRules) |
---|
| 229 | { |
---|
| 230 | p = RuleIndex[ri]; |
---|
| 231 | if ( Rule[p].Used > 0 ) |
---|
| 232 | { |
---|
| 233 | ErrorRate = Rule[p].Incorrect / (float) Rule[p].Used; |
---|
| 234 | |
---|
| 235 | printf("%4d%6d%6.1f%%%6d%7d (%.1f%%)\t%6d (%d|%d) \t%s\n", |
---|
| 236 | p, Rule[p].Size, |
---|
| 237 | 100 * Rule[p].Error, Rule[p].Used, Rule[p].Incorrect, |
---|
| 238 | 100 * ErrorRate, |
---|
| 239 | Better[ri]-Worse[ri], Better[ri], Worse[ri], |
---|
| 240 | ClassName[Rule[p].Rhs]); |
---|
| 241 | |
---|
| 242 | /* See whether this rule should be dropped. Note: can only drop |
---|
| 243 | one rule at a time, because Better and Worse are affected */ |
---|
| 244 | |
---|
| 245 | if ( DeleteRules && ! riDrop && Worse[ri] > Better[ri] ) |
---|
| 246 | { |
---|
| 247 | riDrop = ri; |
---|
| 248 | } |
---|
| 249 | } |
---|
| 250 | } |
---|
| 251 | |
---|
| 252 | cfree(Better); |
---|
| 253 | cfree(Worse); |
---|
| 254 | |
---|
| 255 | if ( riDrop ) |
---|
| 256 | { |
---|
| 257 | printf("\nDrop rule %d\n", RuleIndex[riDrop]); |
---|
| 258 | |
---|
| 259 | ForEach(ri, riDrop+1, NRules) |
---|
| 260 | { |
---|
| 261 | RuleIndex[ri-1] = RuleIndex[ri]; |
---|
| 262 | } |
---|
| 263 | NRules--; |
---|
| 264 | |
---|
| 265 | if ( CMInfo ) free(ConfusionMat); |
---|
| 266 | return Interpret(Fp, Lp, DeleteRules, true, Arrow); |
---|
| 267 | } |
---|
| 268 | else |
---|
| 269 | { |
---|
| 270 | printf("\nTested %d, errors %d (%.1f%%)%s\n", |
---|
| 271 | Tested, Errors, 100 * Errors / (float) Tested, |
---|
| 272 | ( Arrow ? " <<" : "" )); |
---|
| 273 | } |
---|
| 274 | |
---|
| 275 | if ( CMInfo ) |
---|
| 276 | { |
---|
| 277 | PrintConfusionMatrix(ConfusionMat); |
---|
| 278 | free(ConfusionMat); |
---|
| 279 | } |
---|
| 280 | |
---|
| 281 | return Errors; |
---|
| 282 | } |
---|
| 283 | |
---|
| 284 | |
---|
| 285 | |
---|
| 286 | /*************************************************************************/ |
---|
| 287 | /* */ |
---|
| 288 | /* Find the best rule for the given case, leaving probability */ |
---|
| 289 | /* in Confidence */ |
---|
| 290 | /* */ |
---|
| 291 | /*************************************************************************/ |
---|
| 292 | |
---|
| 293 | |
---|
| 294 | RuleNo BestRuleIndex(CaseDesc, Start) |
---|
| 295 | /* --------------- */ |
---|
| 296 | Description CaseDesc; |
---|
| 297 | RuleNo Start; |
---|
| 298 | { |
---|
| 299 | RuleNo r, ri; |
---|
| 300 | float Strength(); |
---|
| 301 | |
---|
| 302 | ForEach(ri, Start, NRules) |
---|
| 303 | { |
---|
| 304 | r = RuleIndex[ri]; |
---|
| 305 | Confidence = Strength(Rule[r], CaseDesc); |
---|
| 306 | |
---|
| 307 | if ( Confidence > 0.1 ) |
---|
| 308 | { |
---|
| 309 | return ri; |
---|
| 310 | } |
---|
| 311 | } |
---|
| 312 | |
---|
| 313 | Confidence = 0.0; |
---|
| 314 | return 0; |
---|
| 315 | } |
---|