[26] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Main routine for constructing sets of production rules from trees */ |
---|
| 4 | /* ----------------------------------------------------------------- */ |
---|
| 5 | /* */ |
---|
| 6 | /*************************************************************************/ |
---|
| 7 | |
---|
| 8 | |
---|
| 9 | #include "defns.i" |
---|
| 10 | #include "types.i" |
---|
| 11 | |
---|
| 12 | |
---|
| 13 | /* External data. Note: uncommented variables have the same meaning |
---|
| 14 | as for decision trees */ |
---|
| 15 | |
---|
| 16 | short MaxAtt, MaxClass, MaxDiscrVal; |
---|
| 17 | |
---|
| 18 | ItemNo MaxItem; |
---|
| 19 | |
---|
| 20 | Description *Item; |
---|
| 21 | |
---|
| 22 | DiscrValue *MaxAttVal; |
---|
| 23 | |
---|
| 24 | char *SpecialStatus; |
---|
| 25 | |
---|
| 26 | String *ClassName, |
---|
| 27 | *AttName, |
---|
| 28 | **AttValName, |
---|
| 29 | FileName = "DF"; |
---|
| 30 | |
---|
| 31 | short VERBOSITY = 0, |
---|
| 32 | TRIALS; |
---|
| 33 | |
---|
| 34 | Boolean UNSEENS = false, |
---|
| 35 | SIGTEST = false, /* use significance test in rule pruning */ |
---|
| 36 | SIMANNEAL = false; /* use simulated annealing */ |
---|
| 37 | |
---|
| 38 | float SIGTHRESH = 0.05, |
---|
| 39 | CF = 0.25, |
---|
| 40 | REDUNDANCY = 1.0; /* factor that guesstimates the |
---|
| 41 | amount of redundancy and |
---|
| 42 | irrelevance in the attributes */ |
---|
| 43 | |
---|
| 44 | PR *Rule; /* current rules */ |
---|
| 45 | |
---|
| 46 | RuleNo NRules = 0, /* number of current rules */ |
---|
| 47 | *RuleIndex; /* rule index */ |
---|
| 48 | |
---|
| 49 | short RuleSpace = 0; /* space allocated for rules */ |
---|
| 50 | |
---|
| 51 | ClassNo DefaultClass; /* current default class */ |
---|
| 52 | |
---|
| 53 | RuleSet *PRSet; /* sets of rulesets */ |
---|
| 54 | |
---|
| 55 | float AttTestBits, /* bits to encode tested att */ |
---|
| 56 | *BranchBits; /* ditto attribute value */ |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | |
---|
| 60 | main(Argc, Argv) |
---|
| 61 | /* ---- */ |
---|
| 62 | int Argc; |
---|
| 63 | char *Argv[]; |
---|
| 64 | { |
---|
| 65 | int o; |
---|
| 66 | extern char *optarg; |
---|
| 67 | extern int optind; |
---|
| 68 | Boolean FirstTime=true; |
---|
| 69 | |
---|
| 70 | PrintHeader("rule generator"); |
---|
| 71 | |
---|
| 72 | /* Process options */ |
---|
| 73 | |
---|
| 74 | while ( (o = getopt(Argc, Argv, "f:uv:c:r:F:a")) != EOF ) |
---|
| 75 | { |
---|
| 76 | if ( FirstTime ) |
---|
| 77 | { |
---|
| 78 | printf("\n Options:\n"); |
---|
| 79 | FirstTime = false; |
---|
| 80 | } |
---|
| 81 | |
---|
| 82 | switch (o) |
---|
| 83 | { |
---|
| 84 | case 'f': FileName = optarg; |
---|
| 85 | printf("\tFile stem <%s>\n", FileName); |
---|
| 86 | break; |
---|
| 87 | case 'u': UNSEENS = true; |
---|
| 88 | printf("\tRulesets evaluated on unseen cases\n"); |
---|
| 89 | break; |
---|
| 90 | case 'v': VERBOSITY = atoi(optarg); |
---|
| 91 | printf("\tVerbosity level %d\n", VERBOSITY); |
---|
| 92 | break; |
---|
| 93 | case 'c': CF = atof(optarg); |
---|
| 94 | printf("\tPruning confidence level %g%%\n", CF); |
---|
| 95 | Check(CF, 0, 100); |
---|
| 96 | CF /= 100; |
---|
| 97 | break; |
---|
| 98 | case 'r': REDUNDANCY = atof(optarg); |
---|
| 99 | printf("\tRedundancy factor %g\n", REDUNDANCY); |
---|
| 100 | Check(REDUNDANCY, 0, 10000); |
---|
| 101 | break; |
---|
| 102 | case 'F': SIGTHRESH = atof(optarg); |
---|
| 103 | printf("\tSignificance test in rule pruning, "); |
---|
| 104 | printf("threshold %g%%\n", SIGTHRESH); |
---|
| 105 | Check(SIGTHRESH, 0, 100); |
---|
| 106 | SIGTHRESH /= 100; |
---|
| 107 | SIGTEST = true; |
---|
| 108 | break; |
---|
| 109 | case 'a': SIMANNEAL = true; |
---|
| 110 | printf("\tSimulated annealing for selecting rules\n"); |
---|
| 111 | break; |
---|
| 112 | case '?': printf("unrecognised option\n"); |
---|
| 113 | exit(1); |
---|
| 114 | } |
---|
| 115 | } |
---|
| 116 | |
---|
| 117 | /* Initialise */ |
---|
| 118 | |
---|
| 119 | GetNames(); |
---|
| 120 | GetData(".data"); |
---|
| 121 | printf("\nRead %d cases (%d attributes) from %s\n", |
---|
| 122 | MaxItem+1, MaxAtt+1, FileName); |
---|
| 123 | |
---|
| 124 | GenerateLogs(); |
---|
| 125 | |
---|
| 126 | /* Construct rules */ |
---|
| 127 | |
---|
| 128 | GenerateRules(); |
---|
| 129 | |
---|
| 130 | /* Evaluations */ |
---|
| 131 | |
---|
| 132 | printf("\n\nEvaluation on training data (%d items):\n", MaxItem+1); |
---|
| 133 | EvaluateRulesets(true); |
---|
| 134 | |
---|
| 135 | /* Save current ruleset */ |
---|
| 136 | |
---|
| 137 | SaveRules(); |
---|
| 138 | |
---|
| 139 | if ( UNSEENS ) |
---|
| 140 | { |
---|
| 141 | GetData(".test"); |
---|
| 142 | printf("\nEvaluation on test data (%d items):\n", MaxItem+1); |
---|
| 143 | EvaluateRulesets(false); |
---|
| 144 | } |
---|
| 145 | |
---|
| 146 | exit(0); |
---|
| 147 | } |
---|