1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Generate all rulesets from the decision trees */ |
---|
4 | /* --------------------------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | |
---|
9 | #include "defns.i" |
---|
10 | #include "types.i" |
---|
11 | #include "extern.i" |
---|
12 | #include "rulex.i" |
---|
13 | |
---|
14 | |
---|
15 | /*************************************************************************/ |
---|
16 | /* */ |
---|
17 | /* For each tree, form a set of rules and process them, then form a */ |
---|
18 | /* composite set of rules from all of these sets. */ |
---|
19 | /* If there is only one tree, then no composite set is formed. */ |
---|
20 | /* */ |
---|
21 | /* Rulesets are stored in PRSet[0] to PRSet[TRIALS], where */ |
---|
22 | /* PRSet[TRIALS] contains the composite ruleset. */ |
---|
23 | /* */ |
---|
24 | /* On completion, the current ruleset is the composite ruleset (if one */ |
---|
25 | /* has been made), otherwise the ruleset from the single tree. */ |
---|
26 | /* */ |
---|
27 | /*************************************************************************/ |
---|
28 | |
---|
29 | |
---|
30 | GenerateRules() |
---|
31 | /* ------------- */ |
---|
32 | { |
---|
33 | Tree DecisionTree, GetTree(); |
---|
34 | short t=0, RuleSetSpace=0, r; |
---|
35 | |
---|
36 | /* Find bits to encode attributes and branches */ |
---|
37 | |
---|
38 | FindTestCodes(); |
---|
39 | |
---|
40 | /* Now process each decision tree */ |
---|
41 | |
---|
42 | while ( DecisionTree = GetTree(".unpruned") ) |
---|
43 | { |
---|
44 | printf("\n------------------\n"); |
---|
45 | printf("Processing tree %d\n", t); |
---|
46 | |
---|
47 | /* Form a set of rules from the next tree */ |
---|
48 | |
---|
49 | FormRules(DecisionTree); |
---|
50 | |
---|
51 | /* Process the set of rules for this trial */ |
---|
52 | |
---|
53 | ConstructRuleset(); |
---|
54 | |
---|
55 | printf("\nFinal rules from tree %d:\n", t); |
---|
56 | PrintIndexedRules(); |
---|
57 | |
---|
58 | /* Make sure there is enough room for the new ruleset */ |
---|
59 | |
---|
60 | if ( t + 1 >= RuleSetSpace ) |
---|
61 | { |
---|
62 | RuleSetSpace += 10; |
---|
63 | |
---|
64 | if ( RuleSetSpace > 10 ) |
---|
65 | { |
---|
66 | PRSet = (RuleSet *) realloc(PRSet, RuleSetSpace * sizeof(RuleSet)); |
---|
67 | } |
---|
68 | else |
---|
69 | { |
---|
70 | PRSet = (RuleSet *) malloc(RuleSetSpace * sizeof(RuleSet)); |
---|
71 | } |
---|
72 | |
---|
73 | } |
---|
74 | |
---|
75 | PRSet[t].SNRules = NRules; |
---|
76 | PRSet[t].SRule = Rule; |
---|
77 | PRSet[t].SRuleIndex = RuleIndex; |
---|
78 | PRSet[t].SDefaultClass = DefaultClass; |
---|
79 | |
---|
80 | ++t; |
---|
81 | } |
---|
82 | |
---|
83 | if ( ! t ) |
---|
84 | { |
---|
85 | printf("\nERROR: can't find any decision trees\n"); |
---|
86 | exit(1); |
---|
87 | } |
---|
88 | |
---|
89 | TRIALS = t; |
---|
90 | |
---|
91 | /* If there is more than one tree in the trees file, |
---|
92 | make a composite ruleset of the rules from all trees */ |
---|
93 | |
---|
94 | if ( TRIALS > 1 ) |
---|
95 | { |
---|
96 | CompositeRuleset(); |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | |
---|
101 | |
---|
102 | /*************************************************************************/ |
---|
103 | /* */ |
---|
104 | /* Determine code lengths for attributes and branches */ |
---|
105 | /* */ |
---|
106 | /*************************************************************************/ |
---|
107 | |
---|
108 | |
---|
109 | FindTestCodes() |
---|
110 | /* ------------- */ |
---|
111 | { |
---|
112 | Attribute Att; |
---|
113 | DiscrValue v, V; |
---|
114 | ItemNo i, *ValFreq; |
---|
115 | int PossibleCuts; |
---|
116 | float Sum, SumBranches=0, p; |
---|
117 | void SwapUnweighted(); |
---|
118 | |
---|
119 | BranchBits = (float *) malloc((MaxAtt+1) * sizeof(float)); |
---|
120 | |
---|
121 | ForEach(Att, 0, MaxAtt) |
---|
122 | { |
---|
123 | if ( (V = MaxAttVal[Att]) ) |
---|
124 | { |
---|
125 | ValFreq = (ItemNo *) calloc(V+1, sizeof(ItemNo)); |
---|
126 | |
---|
127 | ForEach(i, 0, MaxItem) |
---|
128 | { |
---|
129 | ValFreq[DVal(Item[i],Att)]++; |
---|
130 | } |
---|
131 | |
---|
132 | Sum = 0; |
---|
133 | ForEach(v, 1, V) |
---|
134 | { |
---|
135 | if ( ValFreq[v] ) |
---|
136 | { |
---|
137 | Sum += (ValFreq[v] / (MaxItem+1.0)) * |
---|
138 | (LogItemNo[MaxItem+1] - LogItemNo[ValFreq[v]]); |
---|
139 | } |
---|
140 | } |
---|
141 | free(ValFreq); |
---|
142 | |
---|
143 | BranchBits[Att] = Sum; |
---|
144 | } |
---|
145 | else |
---|
146 | { |
---|
147 | Quicksort(0, MaxItem, Att, SwapUnweighted); |
---|
148 | |
---|
149 | PossibleCuts = 1; |
---|
150 | ForEach(i, 1, MaxItem) |
---|
151 | { |
---|
152 | if ( CVal(Item[i],Att) > CVal(Item[i-1],Att) ) |
---|
153 | { |
---|
154 | PossibleCuts++; |
---|
155 | } |
---|
156 | } |
---|
157 | |
---|
158 | BranchBits[Att] = PossibleCuts > 1 ? |
---|
159 | 1 + LogItemNo[PossibleCuts] / 2 : 0 ; |
---|
160 | } |
---|
161 | |
---|
162 | SumBranches += BranchBits[Att]; |
---|
163 | } |
---|
164 | |
---|
165 | AttTestBits = 0; |
---|
166 | ForEach(Att, 0, MaxAtt) |
---|
167 | { |
---|
168 | if ( (p = BranchBits[Att] / SumBranches) > 0 ) |
---|
169 | { |
---|
170 | AttTestBits -= p * log(p) / log(2.0); |
---|
171 | } |
---|
172 | } |
---|
173 | } |
---|
174 | |
---|
175 | |
---|
176 | |
---|
177 | /*************************************************************************/ |
---|
178 | /* */ |
---|
179 | /* Exchange items at a and b. Note: unlike the similar routine in */ |
---|
180 | /* buildtree, this does not assume that items have a Weight to be */ |
---|
181 | /* swapped as well! */ |
---|
182 | /* */ |
---|
183 | /*************************************************************************/ |
---|
184 | |
---|
185 | |
---|
186 | void SwapUnweighted(a, b) |
---|
187 | /* -------------- */ |
---|
188 | ItemNo a, b; |
---|
189 | { |
---|
190 | Description Hold; |
---|
191 | |
---|
192 | Hold = Item[a]; |
---|
193 | Item[a] = Item[b]; |
---|
194 | Item[b] = Hold; |
---|
195 | } |
---|
196 | |
---|
197 | |
---|
198 | |
---|
199 | /*************************************************************************/ |
---|
200 | /* */ |
---|
201 | /* Form composite ruleset for all trials */ |
---|
202 | /* */ |
---|
203 | /*************************************************************************/ |
---|
204 | |
---|
205 | |
---|
206 | CompositeRuleset() |
---|
207 | /* ---------------- */ |
---|
208 | { |
---|
209 | RuleNo r; |
---|
210 | short t, ri; |
---|
211 | Boolean NewRule(); |
---|
212 | |
---|
213 | InitialiseRules(); |
---|
214 | |
---|
215 | /* Lump together all the rules from each ruleset */ |
---|
216 | |
---|
217 | ForEach(t, 0, TRIALS-1) |
---|
218 | { |
---|
219 | ForEach(ri, 1, PRSet[t].SNRules) |
---|
220 | { |
---|
221 | r = PRSet[t].SRuleIndex[ri]; |
---|
222 | NewRule(PRSet[t].SRule[r].Lhs, PRSet[t].SRule[r].Size, |
---|
223 | PRSet[t].SRule[r].Rhs, PRSet[t].SRule[r].Error); |
---|
224 | } |
---|
225 | } |
---|
226 | |
---|
227 | /* ... and select a subset in the usual way */ |
---|
228 | |
---|
229 | ConstructRuleset(); |
---|
230 | |
---|
231 | printf("\nComposite ruleset:\n"); |
---|
232 | PrintIndexedRules(); |
---|
233 | |
---|
234 | PRSet[TRIALS].SNRules = NRules; |
---|
235 | PRSet[TRIALS].SRule = Rule; |
---|
236 | PRSet[TRIALS].SRuleIndex = RuleIndex; |
---|
237 | PRSet[TRIALS].SDefaultClass = DefaultClass; |
---|
238 | } |
---|