1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Main routine for constructing sets of production rules from trees */ |
---|
4 | /* ----------------------------------------------------------------- */ |
---|
5 | /* */ |
---|
6 | /*************************************************************************/ |
---|
7 | |
---|
8 | |
---|
9 | #include "defns.i" |
---|
10 | #include "types.i" |
---|
11 | |
---|
12 | |
---|
13 | /* External data. Note: uncommented variables have the same meaning |
---|
14 | as for decision trees */ |
---|
15 | |
---|
16 | short MaxAtt, MaxClass, MaxDiscrVal; |
---|
17 | |
---|
18 | ItemNo MaxItem; |
---|
19 | |
---|
20 | Description *Item; |
---|
21 | |
---|
22 | DiscrValue *MaxAttVal; |
---|
23 | |
---|
24 | char *SpecialStatus; |
---|
25 | |
---|
26 | String *ClassName, |
---|
27 | *AttName, |
---|
28 | **AttValName, |
---|
29 | FileName = "DF"; |
---|
30 | |
---|
31 | short VERBOSITY = 0, |
---|
32 | TRIALS; |
---|
33 | |
---|
34 | Boolean UNSEENS = false, |
---|
35 | SIGTEST = false, /* use significance test in rule pruning */ |
---|
36 | SIMANNEAL = false; /* use simulated annealing */ |
---|
37 | |
---|
38 | float SIGTHRESH = 0.05, |
---|
39 | CF = 0.25, |
---|
40 | REDUNDANCY = 1.0; /* factor that guesstimates the |
---|
41 | amount of redundancy and |
---|
42 | irrelevance in the attributes */ |
---|
43 | |
---|
44 | PR *Rule; /* current rules */ |
---|
45 | |
---|
46 | RuleNo NRules = 0, /* number of current rules */ |
---|
47 | *RuleIndex; /* rule index */ |
---|
48 | |
---|
49 | short RuleSpace = 0; /* space allocated for rules */ |
---|
50 | |
---|
51 | ClassNo DefaultClass; /* current default class */ |
---|
52 | |
---|
53 | RuleSet *PRSet; /* sets of rulesets */ |
---|
54 | |
---|
55 | float AttTestBits, /* bits to encode tested att */ |
---|
56 | *BranchBits; /* ditto attribute value */ |
---|
57 | |
---|
58 | |
---|
59 | |
---|
60 | main(Argc, Argv) |
---|
61 | /* ---- */ |
---|
62 | int Argc; |
---|
63 | char *Argv[]; |
---|
64 | { |
---|
65 | int o; |
---|
66 | extern char *optarg; |
---|
67 | extern int optind; |
---|
68 | Boolean FirstTime=true; |
---|
69 | |
---|
70 | PrintHeader("rule generator"); |
---|
71 | |
---|
72 | /* Process options */ |
---|
73 | |
---|
74 | while ( (o = getopt(Argc, Argv, "f:uv:c:r:F:a")) != EOF ) |
---|
75 | { |
---|
76 | if ( FirstTime ) |
---|
77 | { |
---|
78 | printf("\n Options:\n"); |
---|
79 | FirstTime = false; |
---|
80 | } |
---|
81 | |
---|
82 | switch (o) |
---|
83 | { |
---|
84 | case 'f': FileName = optarg; |
---|
85 | printf("\tFile stem <%s>\n", FileName); |
---|
86 | break; |
---|
87 | case 'u': UNSEENS = true; |
---|
88 | printf("\tRulesets evaluated on unseen cases\n"); |
---|
89 | break; |
---|
90 | case 'v': VERBOSITY = atoi(optarg); |
---|
91 | printf("\tVerbosity level %d\n", VERBOSITY); |
---|
92 | break; |
---|
93 | case 'c': CF = atof(optarg); |
---|
94 | printf("\tPruning confidence level %g%%\n", CF); |
---|
95 | Check(CF, 0, 100); |
---|
96 | CF /= 100; |
---|
97 | break; |
---|
98 | case 'r': REDUNDANCY = atof(optarg); |
---|
99 | printf("\tRedundancy factor %g\n", REDUNDANCY); |
---|
100 | Check(REDUNDANCY, 0, 10000); |
---|
101 | break; |
---|
102 | case 'F': SIGTHRESH = atof(optarg); |
---|
103 | printf("\tSignificance test in rule pruning, "); |
---|
104 | printf("threshold %g%%\n", SIGTHRESH); |
---|
105 | Check(SIGTHRESH, 0, 100); |
---|
106 | SIGTHRESH /= 100; |
---|
107 | SIGTEST = true; |
---|
108 | break; |
---|
109 | case 'a': SIMANNEAL = true; |
---|
110 | printf("\tSimulated annealing for selecting rules\n"); |
---|
111 | break; |
---|
112 | case '?': printf("unrecognised option\n"); |
---|
113 | exit(1); |
---|
114 | } |
---|
115 | } |
---|
116 | |
---|
117 | /* Initialise */ |
---|
118 | |
---|
119 | GetNames(); |
---|
120 | GetData(".data"); |
---|
121 | printf("\nRead %d cases (%d attributes) from %s\n", |
---|
122 | MaxItem+1, MaxAtt+1, FileName); |
---|
123 | |
---|
124 | GenerateLogs(); |
---|
125 | |
---|
126 | /* Construct rules */ |
---|
127 | |
---|
128 | GenerateRules(); |
---|
129 | |
---|
130 | /* Evaluations */ |
---|
131 | |
---|
132 | printf("\n\nEvaluation on training data (%d items):\n", MaxItem+1); |
---|
133 | EvaluateRulesets(true); |
---|
134 | |
---|
135 | /* Save current ruleset */ |
---|
136 | |
---|
137 | SaveRules(); |
---|
138 | |
---|
139 | if ( UNSEENS ) |
---|
140 | { |
---|
141 | GetData(".test"); |
---|
142 | printf("\nEvaluation on test data (%d items):\n", MaxItem+1); |
---|
143 | EvaluateRulesets(false); |
---|
144 | } |
---|
145 | |
---|
146 | exit(0); |
---|
147 | } |
---|