[66] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Main routine, c4.5 */ |
---|
| 4 | /* ------------------ */ |
---|
| 5 | /* */ |
---|
| 6 | /*************************************************************************/ |
---|
| 7 | |
---|
| 8 | #include "defns.i" |
---|
| 9 | #include "types.i" |
---|
| 10 | |
---|
| 11 | |
---|
| 12 | #include <omp.h> |
---|
| 13 | |
---|
| 14 | |
---|
| 15 | /* External data, described in extern.i */ |
---|
| 16 | |
---|
| 17 | short MaxAtt, MaxClass, MaxDiscrVal = 2; |
---|
| 18 | |
---|
| 19 | ItemNo MaxItem; |
---|
| 20 | |
---|
| 21 | Description *Item; |
---|
| 22 | |
---|
| 23 | DiscrValue *MaxAttVal; |
---|
| 24 | |
---|
| 25 | char *SpecialStatus; |
---|
| 26 | |
---|
| 27 | String *ClassName, *AttName, **AttValName, FileName = "DF"; |
---|
| 28 | |
---|
| 29 | short VERBOSITY = 0, TRIALS = 10; |
---|
| 30 | |
---|
| 31 | Boolean GAINRATIO = true, SUBSET = false, BATCH = true, UNSEENS = false, |
---|
| 32 | PROBTHRESH = false; |
---|
| 33 | |
---|
| 34 | ItemNo MINOBJS = 2, WINDOW = 0, INCREMENT = 0; |
---|
| 35 | |
---|
| 36 | float CF = 0.25; |
---|
| 37 | |
---|
| 38 | Tree *Pruned; |
---|
| 39 | |
---|
| 40 | Boolean AllKnown = true; |
---|
| 41 | |
---|
| 42 | main(Argc, Argv) |
---|
| 43 | /* ---- */ |
---|
| 44 | int Argc;char *Argv[]; { |
---|
| 45 | int o; |
---|
| 46 | extern char *optarg; |
---|
| 47 | extern int optind; |
---|
| 48 | Boolean FirstTime = true; |
---|
| 49 | short Best, BestTree(); |
---|
| 50 | |
---|
| 51 | PrintHeader("decision tree generator"); |
---|
| 52 | |
---|
| 53 | /* |
---|
| 54 | (void) omp_set_dynamic(0); |
---|
| 55 | if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} |
---|
| 56 | (void) omp_set_num_threads(2); |
---|
| 57 | */ |
---|
| 58 | /* Process options */ |
---|
| 59 | |
---|
| 60 | while ((o = getopt(Argc, Argv, "f:bupv:t:w:i:gsm:c:")) != EOF) { |
---|
| 61 | if (FirstTime) { |
---|
| 62 | printf("\n Options:\n"); |
---|
| 63 | FirstTime = false; |
---|
| 64 | } |
---|
| 65 | |
---|
| 66 | switch (o) { |
---|
| 67 | case 'f': |
---|
| 68 | FileName = optarg; |
---|
| 69 | printf("\tFile stem <%s>\n", FileName); |
---|
| 70 | break; |
---|
| 71 | case 'b': |
---|
| 72 | BATCH = true; |
---|
| 73 | printf("\tWindowing disabled (now the default)\n"); |
---|
| 74 | break; |
---|
| 75 | case 'u': |
---|
| 76 | UNSEENS = true; |
---|
| 77 | printf("\tTrees evaluated on unseen cases\n"); |
---|
| 78 | break; |
---|
| 79 | case 'p': |
---|
| 80 | PROBTHRESH = true; |
---|
| 81 | printf("\tProbability thresholds used\n"); |
---|
| 82 | break; |
---|
| 83 | case 'v': |
---|
| 84 | VERBOSITY = atoi(optarg); |
---|
| 85 | printf("\tVerbosity level %d\n", VERBOSITY); |
---|
| 86 | break; |
---|
| 87 | case 't': |
---|
| 88 | TRIALS = atoi(optarg); |
---|
| 89 | printf("\tWindowing enabled with %d trials\n", TRIALS); |
---|
| 90 | Check(TRIALS, 1, 10000) |
---|
| 91 | ; |
---|
| 92 | BATCH = false; |
---|
| 93 | break; |
---|
| 94 | case 'w': |
---|
| 95 | WINDOW = atoi(optarg); |
---|
| 96 | printf("\tInitial window size of %d items\n", WINDOW); |
---|
| 97 | Check(WINDOW, 1, 1000000) |
---|
| 98 | ; |
---|
| 99 | BATCH = false; |
---|
| 100 | break; |
---|
| 101 | case 'i': |
---|
| 102 | INCREMENT = atoi(optarg); |
---|
| 103 | printf("\tMaximum window increment of %d items\n", INCREMENT); |
---|
| 104 | Check(INCREMENT, 1, 1000000) |
---|
| 105 | ; |
---|
| 106 | BATCH = false; |
---|
| 107 | break; |
---|
| 108 | case 'g': |
---|
| 109 | GAINRATIO = false; |
---|
| 110 | printf("\tGain criterion used\n"); |
---|
| 111 | break; |
---|
| 112 | case 's': |
---|
| 113 | SUBSET = true; |
---|
| 114 | printf("\tTests on discrete attribute groups\n"); |
---|
| 115 | break; |
---|
| 116 | case 'm': |
---|
| 117 | MINOBJS = atoi(optarg); |
---|
| 118 | printf("\tSensible test requires 2 branches with >=%d cases\n", |
---|
| 119 | MINOBJS); |
---|
| 120 | Check(MINOBJS, 1, 1000000) |
---|
| 121 | ; |
---|
| 122 | break; |
---|
| 123 | case 'c': |
---|
| 124 | CF = atof(optarg); |
---|
| 125 | printf("\tPruning confidence level %g%%\n", CF); |
---|
| 126 | Check(CF, Epsilon, 100) |
---|
| 127 | ; |
---|
| 128 | CF /= 100; |
---|
| 129 | break; |
---|
| 130 | case '?': |
---|
| 131 | printf("unrecognised option\n"); |
---|
| 132 | exit(1); |
---|
| 133 | } |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | /* Initialise */ |
---|
| 137 | |
---|
| 138 | GetNames(); |
---|
| 139 | GetData(".data"); |
---|
| 140 | printf("\nRead %d cases (%d attributes) from %s.data\n", MaxItem + 1, |
---|
| 141 | MaxAtt + 1, FileName); |
---|
| 142 | |
---|
| 143 | /* Build decision trees */ |
---|
| 144 | |
---|
| 145 | if (BATCH) { |
---|
| 146 | TRIALS = 1; |
---|
| 147 | OneTree(); |
---|
| 148 | Best = 0; |
---|
| 149 | } else { |
---|
| 150 | Best = BestTree(); |
---|
| 151 | } |
---|
| 152 | |
---|
| 153 | /* Soften thresholds in best tree */ |
---|
| 154 | |
---|
| 155 | if (PROBTHRESH) { |
---|
| 156 | printf("Softening thresholds"); |
---|
| 157 | if (!BATCH) |
---|
| 158 | printf(" for best tree from trial %d", Best); |
---|
| 159 | printf("\n"); |
---|
| 160 | SoftenThresh(Pruned[Best]); |
---|
| 161 | printf("\n"); |
---|
| 162 | PrintTree(Pruned[Best]); |
---|
| 163 | } |
---|
| 164 | |
---|
| 165 | /* Save best tree */ |
---|
| 166 | |
---|
| 167 | if (BATCH || TRIALS == 1) { |
---|
| 168 | printf("\nTree saved\n"); |
---|
| 169 | } else { |
---|
| 170 | printf("\nBest tree from trial %d saved\n", Best); |
---|
| 171 | } |
---|
| 172 | SaveTree(Pruned[Best], ".tree"); |
---|
| 173 | |
---|
| 174 | /* Evaluation */ |
---|
| 175 | |
---|
| 176 | printf("\n\nEvaluation on training data (%d items):\n", MaxItem + 1); |
---|
| 177 | Evaluate(false, Best); |
---|
| 178 | |
---|
| 179 | if (UNSEENS) { |
---|
| 180 | GetData(".test"); |
---|
| 181 | printf("\nEvaluation on test data (%d items):\n", MaxItem + 1); |
---|
| 182 | Evaluate(true, Best); |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | exit(0); |
---|
| 186 | } |
---|