- Timestamp:
- Jan 7, 2010, 9:00:08 AM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
proiecte/Parallel-DT/R8/Src/besttree.c
r26 r63 5 5 /* */ 6 6 /*************************************************************************/ 7 8 7 9 8 #include "defns.i" … … 11 10 #include "extern.i" 12 11 13 14 ItemNo *TargetClassFreq; 15 Tree *Raw; 16 extern Tree *Pruned; 17 18 12 ItemNo *TargetClassFreq; 13 Tree *Raw; 14 extern Tree *Pruned; 19 15 20 16 /*************************************************************************/ … … 24 20 /*************************************************************************/ 25 21 26 27 OneTree() 22 OneTree() 28 23 /* --------- */ 29 24 { 30 Tree FormTree(), CopyTree(); 31 Boolean Prune(); 32 33 InitialiseTreeData(); 34 InitialiseWeights(); 35 36 Raw = (Tree *) calloc(1, sizeof(Tree)); 37 Pruned = (Tree *) calloc(1, sizeof(Tree)); 38 39 AllKnown = true; 40 Raw[0] = FormTree(0, MaxItem); 41 printf("\n"); 42 PrintTree(Raw[0]); 43 44 SaveTree(Raw[0], ".unpruned"); 45 46 Pruned[0] = CopyTree(Raw[0]); 47 if ( Prune(Pruned[0]) ) 48 { 49 printf("\nSimplified "); 50 PrintTree(Pruned[0]); 51 } 52 } 53 54 55 25 Tree FormTree(), CopyTree(); 26 Boolean Prune(); 27 28 InitialiseTreeData(); 29 InitialiseWeights(); 30 31 Raw = (Tree *) calloc(1, sizeof(Tree)); 32 Pruned = (Tree *) calloc(1, sizeof(Tree)); 33 34 AllKnown = true; 35 Raw[0] = FormTree(0, MaxItem); 36 //printf("\n"); 37 //PrintTree(Raw[0]); 38 39 SaveTree(Raw[0], ".unpruned"); 40 41 Pruned[0] = CopyTree(Raw[0]); 42 if (Prune(Pruned[0])) { 43 printf("\nSimplified "); 44 PrintTree(Pruned[0]); 45 } 46 } 47 48 OneTree_Discr() 49 /* --------- */ 50 { 51 Tree FormTree_Discr(), CopyTree(); 52 Boolean Prune(); 53 54 55 InitialiseTreeData(); 56 InitialiseWeights(); 57 58 Raw = (Tree *) calloc(1, sizeof(Tree)); 59 Pruned = (Tree *) calloc(1, sizeof(Tree)); 60 61 62 AllKnown = true; 63 Raw[0] = FormTree_Discr(0, MaxItem); 64 //printf("\n"); 65 //PrintTree(Raw[0]); 66 67 SaveTree(Raw[0], ".unpruned"); 68 69 Pruned[0] = CopyTree(Raw[0]); 70 if (Prune(Pruned[0])) { 71 printf("\nSimplified "); 72 PrintTree(Pruned[0]); 73 } 74 } 56 75 /*************************************************************************/ 57 76 /* */ … … 59 78 /* */ 60 79 /*************************************************************************/ 61 62 80 63 81 short BestTree() 64 82 /* -------- */ 65 83 { 66 Tree CopyTree(), Iterate(); 67 Boolean Prune(); 68 short t, Best=0; 69 70 InitialiseTreeData(); 71 72 TargetClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo)); 73 74 Raw = (Tree *) calloc(TRIALS, sizeof(Tree)); 75 Pruned = (Tree *) calloc(TRIALS, sizeof(Tree)); 76 77 /* If necessary, set initial size of window to 20% (or twice 78 the sqrt, if this is larger) of the number of data items, 79 and the maximum number of items that can be added to the 80 window at each iteration to 20% of the initial window size */ 81 82 if ( ! WINDOW ) 83 { 84 WINDOW = Max(2 * sqrt(MaxItem+1.0), (MaxItem+1) / 5); 85 } 86 87 if ( ! INCREMENT ) 88 { 89 INCREMENT = Max(WINDOW / 5, 1); 90 } 91 92 FormTarget(WINDOW); 93 94 /* Form set of trees by iteration and prune */ 95 96 ForEach(t, 0, TRIALS-1 ) 97 { 98 FormInitialWindow(); 99 100 printf("\n--------\nTrial %d\n--------\n\n", t); 101 102 Raw[t] = Iterate(WINDOW, INCREMENT); 103 printf("\n"); 104 PrintTree(Raw[t]); 105 106 SaveTree(Raw[t], ".unpruned"); 107 108 Pruned[t] = CopyTree(Raw[t]); 109 if ( Prune(Pruned[t]) ) 110 { 111 printf("\nSimplified "); 112 PrintTree(Pruned[t]); 113 } 114 115 if ( Pruned[t]->Errors < Pruned[Best]->Errors ) 116 { 117 Best = t; 118 } 119 } 120 printf("\n--------\n"); 121 122 return Best; 123 } 124 125 84 Tree CopyTree(), Iterate(); 85 Boolean Prune(); 86 short t, Best = 0; 87 88 InitialiseTreeData(); 89 90 TargetClassFreq = (ItemNo *) calloc(MaxClass + 1, sizeof(ItemNo)); 91 92 Raw = (Tree *) calloc(TRIALS, sizeof(Tree)); 93 Pruned = (Tree *) calloc(TRIALS, sizeof(Tree)); 94 95 /* If necessary, set initial size of window to 20% (or twice 96 the sqrt, if this is larger) of the number of data items, 97 and the maximum number of items that can be added to the 98 window at each iteration to 20% of the initial window size */ 99 100 if (!WINDOW) { 101 WINDOW = Max(2 * sqrt(MaxItem+1.0), (MaxItem+1) / 5); 102 } 103 104 if (!INCREMENT) { 105 INCREMENT = Max(WINDOW / 5, 1); 106 } 107 108 FormTarget(WINDOW); 109 110 /* Form set of trees by iteration and prune */ 111 112 ForEach(t, 0, TRIALS-1 ) { 113 FormInitialWindow(); 114 115 printf("\n--------\nTrial %d\n--------\n\n", t); 116 117 Raw[t] = Iterate(WINDOW, INCREMENT); 118 printf("\n"); 119 PrintTree(Raw[t]); 120 121 SaveTree(Raw[t], ".unpruned"); 122 123 Pruned[t] = CopyTree(Raw[t]); 124 if (Prune(Pruned[t])) { 125 printf("\nSimplified "); 126 PrintTree(Pruned[t]); 127 } 128 129 if (Pruned[t]->Errors < Pruned[Best]->Errors) { 130 Best = t; 131 } 132 } 133 printf("\n--------\n"); 134 135 return Best; 136 } 126 137 127 138 /*************************************************************************/ … … 134 145 /*************************************************************************/ 135 146 136 137 FormTarget(Size) 138 /* ----------- */ 139 ItemNo Size; 140 { 141 ItemNo i, *ClassFreq; 142 ClassNo c, Smallest, ClassesLeft=0; 143 144 ClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo)); 145 146 /* Generate the class frequency distribution */ 147 148 ForEach(i, 0, MaxItem) 149 { 150 ClassFreq[ Class(Item[i]) ]++; 151 } 152 153 /* Calculate the no. of classes of which there are items */ 154 155 ForEach(c, 0, MaxClass) 156 { 157 if ( ClassFreq[c] ) 158 { 159 ClassesLeft++; 160 } 161 else 162 { 163 TargetClassFreq[c] = 0; 164 } 165 } 166 167 while ( ClassesLeft ) 168 { 169 /* Find least common class of which there are some items */ 170 171 Smallest = -1; 172 ForEach(c, 0, MaxClass) 173 { 174 if ( ClassFreq[c] && 175 ( Smallest < 0 || ClassFreq[c] < ClassFreq[Smallest] ) ) 176 { 177 Smallest = c; 178 } 179 } 180 181 /* Allocate the no. of items of this class to use in the window */ 182 183 TargetClassFreq[Smallest] = Min(ClassFreq[Smallest], Round(Size/ClassesLeft)); 184 185 ClassFreq[Smallest] = 0; 186 187 Size -= TargetClassFreq[Smallest]; 188 ClassesLeft--; 189 } 190 191 cfree(ClassFreq); 192 } 193 194 147 FormTarget(Size) 148 /* ----------- */ 149 ItemNo Size; { 150 ItemNo i, *ClassFreq; 151 ClassNo c, Smallest, ClassesLeft = 0; 152 153 ClassFreq = (ItemNo *) calloc(MaxClass + 1, sizeof(ItemNo)); 154 155 /* Generate the class frequency distribution */ 156 157 ForEach(i, 0, MaxItem) { 158 ClassFreq[Class(Item[i])]++; 159 } 160 161 /* Calculate the no. of classes of which there are items */ 162 163 ForEach(c, 0, MaxClass) { 164 if (ClassFreq[c]) { 165 ClassesLeft++; 166 } else { 167 TargetClassFreq[c] = 0; 168 } 169 } 170 171 while (ClassesLeft) { 172 /* Find least common class of which there are some items */ 173 174 Smallest = -1; 175 ForEach(c, 0, MaxClass) { 176 if (ClassFreq[c] && (Smallest < 0 || ClassFreq[c] 177 < ClassFreq[Smallest])) { 178 Smallest = c; 179 } 180 } 181 182 /* Allocate the no. of items of this class to use in the window */ 183 184 TargetClassFreq[Smallest] 185 = Min(ClassFreq[Smallest], Round(Size/ClassesLeft)); 186 187 ClassFreq[Smallest] = 0; 188 189 Size -= TargetClassFreq[Smallest]; 190 ClassesLeft--; 191 } 192 193 cfree(ClassFreq); 194 } 195 195 196 196 /*************************************************************************/ … … 202 202 /*************************************************************************/ 203 203 204 205 FormInitialWindow() 204 FormInitialWindow() 206 205 /* ------------------- */ 207 206 { 208 ItemNo i, Start=0, More; 209 ClassNo c; 210 void Swap(); 211 212 Shuffle(); 213 214 ForEach(c, 0, MaxClass) 215 { 216 More = TargetClassFreq[c]; 217 218 for ( i = Start ; More ; i++ ) 219 { 220 if ( Class(Item[i]) == c ) 221 { 222 Swap(Start, i); 223 Start++; 224 More--; 225 } 226 } 227 } 228 } 229 230 207 ItemNo i, Start = 0, More; 208 ClassNo c; 209 void Swap(); 210 211 Shuffle(); 212 213 ForEach(c, 0, MaxClass) { 214 More = TargetClassFreq[c]; 215 216 for (i = Start; More; i++) { 217 if (Class(Item[i]) == c) { 218 Swap(Start, i); 219 Start++; 220 More--; 221 } 222 } 223 } 224 } 231 225 232 226 /*************************************************************************/ … … 236 230 /*************************************************************************/ 237 231 238 239 Shuffle() 232 Shuffle() 240 233 /* ------- */ 241 234 { 242 ItemNo This, Alt, Left; 243 Description Hold; 244 245 This = 0; 246 for( Left = MaxItem+1 ; Left ; ) 247 { 248 Alt = This + (Left--) * Random; 249 Hold = Item[This]; 250 Item[This++] = Item[Alt]; 251 Item[Alt] = Hold; 252 } 253 } 254 255 235 ItemNo This, Alt, Left; 236 Description Hold; 237 238 This = 0; 239 for (Left = MaxItem + 1; Left;) { 240 Alt = This + (Left--) * Random; 241 Hold = Item[This]; 242 Item[This++] = Item[Alt]; 243 Item[Alt] = Hold; 244 } 245 } 256 246 257 247 /*************************************************************************/ … … 272 262 /*************************************************************************/ 273 263 274 275 264 Tree Iterate(Window, IncExceptions) 276 /* ------- */ 277 ItemNo Window, IncExceptions; 278 { 279 Tree Classifier, BestClassifier=Nil, FormTree(); 280 ItemNo i, Errors, TotalErrors, BestTotalErrors=MaxItem+1, 281 Exceptions, Additions; 282 ClassNo Assigned, Category(); 283 short Cycle=0; 284 void Swap(); 285 286 printf("Cycle Tree -----Cases----"); 287 printf(" -----------------Errors-----------------\n"); 288 printf(" size window other"); 289 printf(" window rate other rate total rate\n"); 290 printf("----- ---- ------ ------"); 291 printf(" ------ ---- ------ ---- ------ ----\n"); 292 293 do 294 { 295 /* Build a classifier tree with the first Window items */ 296 297 InitialiseWeights(); 298 AllKnown = true; 299 Classifier = FormTree(0, Window-1); 300 301 /* Error analysis */ 302 303 Errors = Round(Classifier->Errors); 304 305 /* Move all items that are incorrectly classified by the 306 classifier tree to immediately after the items in the 307 current window. */ 308 309 Exceptions = Window; 310 ForEach(i, Window, MaxItem) 311 { 312 Assigned = Category(Item[i], Classifier); 313 if ( Assigned != Class(Item[i]) ) 314 { 315 Swap(Exceptions, i); 316 Exceptions++; 317 } 318 } 319 Exceptions -= Window; 320 TotalErrors = Errors + Exceptions; 321 322 /* Print error analysis */ 323 324 printf("%3d %7d %8d %6d %8d%5.1f%% %6d%5.1f%% %6d%5.1f%%\n", 325 ++Cycle, TreeSize(Classifier), Window, MaxItem-Window+1, 326 Errors, 100*(float)Errors/Window, 327 Exceptions, 100*Exceptions/(MaxItem-Window+1.001), 328 TotalErrors, 100*TotalErrors/(MaxItem+1.0)); 329 330 /* Keep track of the most successful classifier tree so far */ 331 332 if ( ! BestClassifier || TotalErrors < BestTotalErrors ) 333 { 334 if ( BestClassifier ) ReleaseTree(BestClassifier); 335 BestClassifier = Classifier; 336 BestTotalErrors = TotalErrors; 337 } 338 else 339 { 340 ReleaseTree(Classifier); 341 } 342 343 /* Increment window size */ 344 345 Additions = Min(Exceptions, IncExceptions); 346 Window = Min(Window + Max(Additions, Exceptions / 2), MaxItem + 1); 347 } 348 while ( Exceptions ); 349 350 return BestClassifier; 351 } 352 353 265 /* ------- */ 266 ItemNo Window, IncExceptions; { 267 Tree Classifier, BestClassifier = Nil, FormTree(); 268 ItemNo i, Errors, TotalErrors, BestTotalErrors = MaxItem + 1, Exceptions, 269 Additions; 270 ClassNo Assigned, Category(); 271 short Cycle = 0; 272 void Swap(); 273 274 printf("Cycle Tree -----Cases----"); 275 printf(" -----------------Errors-----------------\n"); 276 printf(" size window other"); 277 printf(" window rate other rate total rate\n"); 278 printf("----- ---- ------ ------"); 279 printf(" ------ ---- ------ ---- ------ ----\n"); 280 281 do { 282 /* Build a classifier tree with the first Window items */ 283 284 InitialiseWeights(); 285 AllKnown = true; 286 Classifier = FormTree(0, Window - 1); 287 288 /* Error analysis */ 289 290 Errors = Round(Classifier->Errors); 291 292 /* Move all items that are incorrectly classified by the 293 classifier tree to immediately after the items in the 294 current window. */ 295 296 Exceptions = Window; 297 ForEach(i, Window, MaxItem) { 298 Assigned = Category(Item[i], Classifier); 299 if (Assigned != Class(Item[i])) { 300 Swap(Exceptions, i); 301 Exceptions++; 302 } 303 } 304 Exceptions -= Window; 305 TotalErrors = Errors + Exceptions; 306 307 /* Print error analysis */ 308 309 printf("%3d %7d %8d %6d %8d%5.1f%% %6d%5.1f%% %6d%5.1f%%\n", 310 ++Cycle, TreeSize(Classifier), Window, MaxItem - Window + 1, 311 Errors, 100 * (float) Errors / Window, Exceptions, 100 312 * Exceptions / (MaxItem - Window + 1.001), TotalErrors, 313 100 * TotalErrors / (MaxItem + 1.0)); 314 315 /* Keep track of the most successful classifier tree so far */ 316 317 if (!BestClassifier || TotalErrors < BestTotalErrors) { 318 if (BestClassifier) 319 ReleaseTree(BestClassifier); 320 BestClassifier = Classifier; 321 BestTotalErrors = TotalErrors; 322 } else { 323 ReleaseTree(Classifier); 324 } 325 326 /* Increment window size */ 327 328 Additions = Min(Exceptions, IncExceptions); 329 Window = Min(Window + Max(Additions, Exceptions / 2), MaxItem + 1); 330 } while (Exceptions); 331 332 return BestClassifier; 333 } 354 334 355 335 /*************************************************************************/ … … 359 339 /*************************************************************************/ 360 340 361 362 Evaluate(CMInfo, Saved) 363 /* -------- */ 364 Boolean CMInfo; 365 short Saved; 366 { 367 ClassNo RealClass, PrunedClass, Category(); 368 short t; 369 ItemNo *ConfusionMat, i, RawErrors, PrunedErrors; 370 371 if ( CMInfo ) 372 { 373 ConfusionMat = (ItemNo *) calloc((MaxClass+1)*(MaxClass+1), sizeof(ItemNo)); 374 } 375 376 printf("\n"); 377 378 if ( TRIALS > 1 ) 379 { 380 printf("Trial\t Before Pruning After Pruning\n"); 381 printf("-----\t---------------- ---------------------------\n"); 382 } 383 else 384 { 385 printf("\t Before Pruning After Pruning\n"); 386 printf("\t---------------- ---------------------------\n"); 387 } 388 printf("\tSize Errors Size Errors Estimate\n\n"); 389 390 ForEach(t, 0, TRIALS-1) 391 { 392 RawErrors = PrunedErrors = 0; 393 394 ForEach(i, 0, MaxItem) 395 { 396 RealClass = Class(Item[i]); 397 398 if ( Category(Item[i], Raw[t]) != RealClass ) RawErrors++; 399 400 PrunedClass = Category(Item[i], Pruned[t]); 401 402 if ( PrunedClass != RealClass ) PrunedErrors++; 403 404 if ( CMInfo && t == Saved ) 405 { 406 ConfusionMat[RealClass*(MaxClass+1)+PrunedClass]++; 407 } 408 } 409 410 if ( TRIALS > 1 ) 411 { 412 printf("%4d", t); 413 } 414 415 printf("\t%4d %3d(%4.1f%%) %4d %3d(%4.1f%%) (%4.1f%%)%s\n", 416 TreeSize(Raw[t]), RawErrors, 100.0*RawErrors / (MaxItem+1.0), 417 TreeSize(Pruned[t]), PrunedErrors, 100.0*PrunedErrors / (MaxItem+1.0), 418 100 * Pruned[t]->Errors / Pruned[t]->Items, 419 ( t == Saved ? " <<" : "" )); 420 } 421 422 if ( CMInfo ) 423 { 424 PrintConfusionMatrix(ConfusionMat); 425 free(ConfusionMat); 426 } 427 } 341 Evaluate(CMInfo, Saved) 342 /* -------- */ 343 Boolean CMInfo;short Saved; { 344 ClassNo RealClass, PrunedClass, Category(); 345 short t; 346 ItemNo *ConfusionMat, i, RawErrors, PrunedErrors; 347 348 if (CMInfo) { 349 ConfusionMat = (ItemNo *) calloc((MaxClass + 1) * (MaxClass + 1), 350 sizeof(ItemNo)); 351 } 352 353 printf("\n"); 354 355 if (TRIALS > 1) { 356 printf("Trial\t Before Pruning After Pruning\n"); 357 printf("-----\t---------------- ---------------------------\n"); 358 } else { 359 printf("\t Before Pruning After Pruning\n"); 360 printf("\t---------------- ---------------------------\n"); 361 } 362 printf("\tSize Errors Size Errors Estimate\n\n"); 363 364 ForEach(t, 0, TRIALS-1) { 365 RawErrors = PrunedErrors = 0; 366 367 ForEach(i, 0, MaxItem) { 368 RealClass = Class(Item[i]); 369 370 if (Category(Item[i], Raw[t]) != RealClass) 371 RawErrors++; 372 373 PrunedClass = Category(Item[i], Pruned[t]); 374 375 if (PrunedClass != RealClass) 376 PrunedErrors++; 377 378 if (CMInfo && t == Saved) { 379 ConfusionMat[RealClass * (MaxClass + 1) + PrunedClass]++; 380 } 381 } 382 383 if (TRIALS > 1) { 384 printf("%4d", t); 385 } 386 387 printf("\t%4d %3d(%4.1f%%) %4d %3d(%4.1f%%) (%4.1f%%)%s\n", 388 TreeSize(Raw[t]), RawErrors, 100.0 * RawErrors 389 / (MaxItem + 1.0), TreeSize(Pruned[t]), PrunedErrors, 390 100.0 * PrunedErrors / (MaxItem + 1.0), 100 * Pruned[t]->Errors 391 / Pruned[t]->Items, (t == Saved ? " <<" : "")); 392 } 393 394 if (CMInfo) { 395 PrintConfusionMatrix(ConfusionMat); 396 free(ConfusionMat); 397 } 398 }
Note: See TracChangeset
for help on using the changeset viewer.