Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

contin.c @ 62

Last change on this file since 62 was 26, checked in by (none), 15 years ago
blabla
File size: 5.7 KB

Rev	Line
[26]	1	/*************************************************************************/
	2	/* */
	3	/* Evaluation of a test on a continuous valued attribute */
	4	/* ----------------------------------------------------- */
	5	/* */
	6	/*************************************************************************/
	7
	8
	9	#include "buildex.i"
	10
	11
	12	float
	13	SplitGain, / SplitGain[i] = gain with att value of item i as threshold */
	14	SplitInfo; / SplitInfo[i] = potential info ditto */
	15
	16
	17
	18	/*************************************************************************/
	19	/* */
	20	/* Continuous attributes are treated as if they have possible values */
	21	/* 0 (unknown), 1 (less than cut), 2(greater than cut) */
	22	/* This routine finds the best cut for items Fp through Lp and sets */
	23	/* Info[], Gain[] and Bar[] */
	24	/* */
	25	/*************************************************************************/
	26
	27
	28	EvalContinuousAtt(Att, Fp, Lp)
	29	/* ----------------- */
	30	Attribute Att;
	31	ItemNo Fp, Lp;
	32	{
	33	ItemNo i, BestI, Xp, Tries=0;
	34	ItemCount Items, KnownItems, LowItems, MinSplit, CountItems();
	35	ClassNo c;
	36	float AvGain=0, Val, BestVal, BaseInfo, ThreshCost,
	37	ComputeGain(), TotalInfo(), Worth();
	38	void Swap();
	39
	40	Verbosity(2) printf("\tAtt %s", AttName[Att]);
	41	Verbosity(3) printf("\n");
	42
	43	ResetFreq(2);
	44
	45	/* Omit and count unknown values */
	46
	47	Items = CountItems(Fp, Lp);
	48	Xp = Fp;
	49	ForEach(i, Fp, Lp)
	50	{
	51	if ( CVal(Item[i],Att) == Unknown )
	52	{
	53	Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
	54	Swap(Xp, i);
	55	Xp++;
	56	}
	57	}
	58
	59	ValFreq[0] = 0;
	60	ForEach(c, 0, MaxClass)
	61	{
	62	ValFreq[0] += Freq[0][c];
	63	}
	64
	65	KnownItems = Items - ValFreq[0];
	66	UnknownRate[Att] = 1.0 - KnownItems / Items;
	67
	68	/* Special case when very few known values */
	69
	70	if ( KnownItems < 2 * MINOBJS )
	71	{
	72	Verbosity(2) printf("\tinsufficient cases with known values\n");
	73
	74	Gain[Att] = -Epsilon;
	75	Info[Att] = 0.0;
	76	return;
	77	}
	78
	79	Quicksort(Xp, Lp, Att, Swap);
	80
	81	/* Count base values and determine base information */
	82
	83	ForEach(i, Xp, Lp)
	84	{
	85	Freq[ 2 ][ Class(Item[i]) ] += Weight[i];
	86	SplitGain[i] = -Epsilon;
	87	SplitInfo[i] = 0;
	88	}
	89
	90	BaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;
	91
	92	/* Try possible cuts between items i and i+1, and determine the
	93	information and gain of the split in each case. We have to be wary
	94	of splitting a small number of items off one end, as we can always
	95	split off a single item, but this has little predictive power. */
	96
	97	MinSplit = 0.10 * KnownItems / (MaxClass + 1);
	98	if ( MinSplit <= MINOBJS ) MinSplit = MINOBJS;
	99	else
	100	if ( MinSplit > 25 ) MinSplit = 25;
	101
	102	LowItems = 0;
	103	ForEach(i, Xp, Lp - 1)
	104	{
	105	c = Class(Item[i]);
	106	LowItems += Weight[i];
	107	Freq[1][c] += Weight[i];
	108	Freq[2][c] -= Weight[i];
	109
	110	if ( LowItems < MinSplit ) continue;
	111	else
	112	if ( LowItems > KnownItems - MinSplit ) break;
	113
	114	if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )
	115	{
	116	ValFreq[1] = LowItems;
	117	ValFreq[2] = KnownItems - LowItems;
	118	SplitGain[i] = ComputeGain(BaseInfo, UnknownRate[Att], 2, KnownItems);
	119	SplitInfo[i] = TotalInfo(ValFreq, 0, 2) / Items;
	120	AvGain += SplitGain[i];
	121	Tries++;
	122
	123	Verbosity(3)
	124	{ printf("\t\tCut at %.3f (gain %.3f, val %.3f):",
	125	( CVal(Item[i],Att) + CVal(Item[i+1],Att) ) / 2,
	126	SplitGain[i],
	127	Worth(SplitInfo[i], SplitGain[i], Epsilon));
	128	PrintDistribution(Att, 2, true);
	129	}
	130	}
	131	}
	132
	133	/* Find the best attribute according to the given criterion */
	134
	135	ThreshCost = Log(Tries) / Items;
	136
	137	BestVal = 0;
	138	BestI = None;
	139	ForEach(i, Xp, Lp - 1)
	140	{
	141	if ( (Val = SplitGain[i] - ThreshCost) > BestVal )
	142	{
	143	BestI = i;
	144	BestVal = Val;
	145	}
	146	}
	147
	148	/* If a test on the attribute is able to make a gain,
	149	set the best break point, gain and information */
	150
	151	if ( BestI == None )
	152	{
	153	Gain[Att] = -Epsilon;
	154	Info[Att] = 0.0;
	155
	156	Verbosity(2) printf("\tno gain\n");
	157	}
	158	else
	159	{
	160	Bar[Att] = (CVal(Item[BestI],Att) + CVal(Item[BestI+1],Att)) / 2;
	161	Gain[Att] = BestVal;
	162	Info[Att] = SplitInfo[BestI];
	163
	164	Verbosity(2)
	165	printf("\tcut=%.3f, inf %.3f, gain %.3f\n",
	166	Bar[Att], Info[Att], Gain[Att]);
	167	}
	168	}
	169
	170
	171
	172	/*************************************************************************/
	173	/* */
	174	/* Change a leaf into a test on a continuous attribute */
	175	/* */
	176	/*************************************************************************/
	177
	178
	179	ContinTest(Node, Att)
	180	/* ---------- */
	181	Tree Node;
	182	Attribute Att;
	183	{
	184	float Thresh, GreatestValueBelow();
	185	ItemCount CountItems();
	186
	187	Sprout(Node, 2);
	188
	189	Thresh = GreatestValueBelow(Att, Bar[Att]);
	190
	191	Node->NodeType = ThreshContin;
	192	Node->Tested = Att;
	193	Node->Cut =
	194	Node->Lower =
	195	Node->Upper = Thresh;
	196	Node->Errors = 0;
	197	}
	198
	199
	200
	201	/*************************************************************************/
	202	/* */
	203	/* Return the greatest value of attribute Att below threshold t */
	204	/* */
	205	/*************************************************************************/
	206
	207
	208	float GreatestValueBelow(Att, t)
	209	/* ------------------ */
	210	Attribute Att;
	211	float t;
	212	{
	213	ItemNo i;
	214	float v, Best;
	215	Boolean NotYet=true;
	216
	217	ForEach(i, 0, MaxItem)
	218	{
	219	v = CVal(Item[i], Att);
	220	if ( v != Unknown && v <= t && ( NotYet \|\| v > Best ) )
	221	{
	222	Best = v;
	223	NotYet = false;
	224	}
	225	}
	226
	227	return Best;
	228	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/Parallel-DT/R8/Src/contin.c @ 62

Download in other formats: