1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Main routine, c4.5 */ |
---|
4 | /* ------------------ */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | |
---|
9 | #include "defns.i" |
---|
10 | #include "types.i" |
---|
11 | |
---|
12 | |
---|
13 | /* External data, described in extern.i */ |
---|
14 | |
---|
15 | short MaxAtt, MaxClass, MaxDiscrVal = 2; |
---|
16 | |
---|
17 | ItemNo MaxItem; |
---|
18 | |
---|
19 | Description *Item; |
---|
20 | |
---|
21 | DiscrValue *MaxAttVal; |
---|
22 | |
---|
23 | char *SpecialStatus; |
---|
24 | |
---|
25 | String *ClassName, |
---|
26 | *AttName, |
---|
27 | **AttValName, |
---|
28 | FileName = "DF"; |
---|
29 | |
---|
30 | short VERBOSITY = 0, |
---|
31 | TRIALS = 10; |
---|
32 | |
---|
33 | Boolean GAINRATIO = true, |
---|
34 | SUBSET = false, |
---|
35 | BATCH = true, |
---|
36 | UNSEENS = false, |
---|
37 | PROBTHRESH = false; |
---|
38 | |
---|
39 | ItemNo MINOBJS = 2, |
---|
40 | WINDOW = 0, |
---|
41 | INCREMENT = 0; |
---|
42 | |
---|
43 | float CF = 0.25; |
---|
44 | |
---|
45 | Tree *Pruned; |
---|
46 | |
---|
47 | Boolean AllKnown = true; |
---|
48 | |
---|
49 | |
---|
50 | main(Argc, Argv) |
---|
51 | /* ---- */ |
---|
52 | int Argc; |
---|
53 | char *Argv[]; |
---|
54 | { |
---|
55 | int o; |
---|
56 | extern char *optarg; |
---|
57 | extern int optind; |
---|
58 | Boolean FirstTime=true; |
---|
59 | short Best, BestTree(); |
---|
60 | |
---|
61 | PrintHeader("decision tree generator"); |
---|
62 | |
---|
63 | /* Process options */ |
---|
64 | |
---|
65 | while ( (o = getopt(Argc, Argv, "f:bupv:t:w:i:gsm:c:")) != EOF ) |
---|
66 | { |
---|
67 | if ( FirstTime ) |
---|
68 | { |
---|
69 | printf("\n Options:\n"); |
---|
70 | FirstTime = false; |
---|
71 | } |
---|
72 | |
---|
73 | switch (o) |
---|
74 | { |
---|
75 | case 'f': FileName = optarg; |
---|
76 | printf("\tFile stem <%s>\n", FileName); |
---|
77 | break; |
---|
78 | case 'b': BATCH = true; |
---|
79 | printf("\tWindowing disabled (now the default)\n"); |
---|
80 | break; |
---|
81 | case 'u': UNSEENS = true; |
---|
82 | printf("\tTrees evaluated on unseen cases\n"); |
---|
83 | break; |
---|
84 | case 'p': PROBTHRESH = true; |
---|
85 | printf("\tProbability thresholds used\n"); |
---|
86 | break; |
---|
87 | case 'v': VERBOSITY = atoi(optarg); |
---|
88 | printf("\tVerbosity level %d\n", VERBOSITY); |
---|
89 | break; |
---|
90 | case 't': TRIALS = atoi(optarg); |
---|
91 | printf("\tWindowing enabled with %d trials\n", TRIALS); |
---|
92 | Check(TRIALS, 1, 10000); |
---|
93 | BATCH = false; |
---|
94 | break; |
---|
95 | case 'w': WINDOW = atoi(optarg); |
---|
96 | printf("\tInitial window size of %d items\n", WINDOW); |
---|
97 | Check(WINDOW, 1, 1000000); |
---|
98 | BATCH = false; |
---|
99 | break; |
---|
100 | case 'i': INCREMENT = atoi(optarg); |
---|
101 | printf("\tMaximum window increment of %d items\n", |
---|
102 | INCREMENT); |
---|
103 | Check(INCREMENT, 1, 1000000); |
---|
104 | BATCH = false; |
---|
105 | break; |
---|
106 | case 'g': GAINRATIO = false; |
---|
107 | printf("\tGain criterion used\n"); |
---|
108 | break; |
---|
109 | case 's': SUBSET = true; |
---|
110 | printf("\tTests on discrete attribute groups\n"); |
---|
111 | break; |
---|
112 | case 'm': MINOBJS = atoi(optarg); |
---|
113 | printf("\tSensible test requires 2 branches with >=%d cases\n", |
---|
114 | MINOBJS); |
---|
115 | Check(MINOBJS, 1, 1000000); |
---|
116 | break; |
---|
117 | case 'c': CF = atof(optarg); |
---|
118 | printf("\tPruning confidence level %g%%\n", CF); |
---|
119 | Check(CF, Epsilon, 100); |
---|
120 | CF /= 100; |
---|
121 | break; |
---|
122 | case '?': printf("unrecognised option\n"); |
---|
123 | exit(1); |
---|
124 | } |
---|
125 | } |
---|
126 | |
---|
127 | /* Initialise */ |
---|
128 | |
---|
129 | GetNames(); |
---|
130 | printf(">>>the force is with us\n"); |
---|
131 | GetData(".data"); |
---|
132 | printf(">>>the force is with us222\n"); |
---|
133 | printf("\nRead %d cases (%d attributes) from %s.data\n", |
---|
134 | MaxItem+1, MaxAtt+1, FileName); |
---|
135 | |
---|
136 | /* Build decision trees */ |
---|
137 | |
---|
138 | if ( BATCH ) |
---|
139 | { |
---|
140 | TRIALS = 1; |
---|
141 | OneTree(); |
---|
142 | Best = 0; |
---|
143 | } |
---|
144 | else |
---|
145 | { |
---|
146 | Best = BestTree(); |
---|
147 | } |
---|
148 | |
---|
149 | /* Soften thresholds in best tree */ |
---|
150 | |
---|
151 | if ( PROBTHRESH ) |
---|
152 | { |
---|
153 | printf("Softening thresholds"); |
---|
154 | if ( ! BATCH ) printf(" for best tree from trial %d", Best); |
---|
155 | printf("\n"); |
---|
156 | SoftenThresh(Pruned[Best]); |
---|
157 | printf("\n"); |
---|
158 | PrintTree(Pruned[Best]); |
---|
159 | } |
---|
160 | |
---|
161 | /* Save best tree */ |
---|
162 | |
---|
163 | if ( BATCH || TRIALS == 1 ) |
---|
164 | { |
---|
165 | printf("\nTree saved\n"); |
---|
166 | } |
---|
167 | else |
---|
168 | { |
---|
169 | printf("\nBest tree from trial %d saved\n", Best); |
---|
170 | } |
---|
171 | SaveTree(Pruned[Best], ".tree"); |
---|
172 | |
---|
173 | /* Evaluation */ |
---|
174 | |
---|
175 | printf("\n\nEvaluation on training data (%d items):\n", MaxItem+1); |
---|
176 | Evaluate(false, Best); |
---|
177 | |
---|
178 | if ( UNSEENS ) |
---|
179 | { |
---|
180 | GetData(".test"); |
---|
181 | printf("\nEvaluation on test data (%d items):\n", MaxItem+1); |
---|
182 | Evaluate(true, Best); |
---|
183 | } |
---|
184 | |
---|
185 | exit(0); |
---|
186 | } |
---|