/*************************************************************************/
/*                                                                  	 */
/*	Evaluation of a test on a continuous valued attribute	         	 */
/*	-----------------------------------------------------	        	 */
/*								                                    	 */
/*************************************************************************/


#include "buildex.h"




/*************************************************************************/
/*								                                    	 */
/*  The importance for Continuous attributes are calculated              */
/*	according to chisquare statistic method         	             	 */
/*								  	                                     */
/*************************************************************************/

double	ContinuousAttChiValue(Att, Fp, Lp)
		Attribute Att;
		ItemNo Fp, Lp; 
{
	
	ItemNo i, BestI, Xp;
	DiscrValue v;
    ItemCount Items, KnownItems, LowItems, MinSplit, CountItems();
	ItemCount **TrueFreq,*TrueValFreq;
    ClassNo c;

	float ChiQuare=0,*PvalueSplitGain;
	long N;
	float BestVal, PvalueBaseInfo,ComputeGain(), TotalInfo();
    void Swap(); 

	ItemNo *ClassFrequcy;
	float **T;

	ClassFrequcy = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));
    
	T  = (float **) calloc(3, sizeof(float *));
    ForEach(v, 0, 2)
    {
		T[v]  = (float *) calloc(MaxClass+1, sizeof(float));
    }


	TrueFreq  = (ItemCount **) calloc(3, sizeof(ItemCount *));
	ForEach(v, 0, 2)
    {
	TrueFreq[v]  = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    }

    TrueValFreq = (ItemCount *) calloc(3, sizeof(ItemCount));

	PvalueSplitGain = (float *) calloc(MaxItem+1, sizeof(float));

	//Initialize
	ResetFreq(2);

	ForEach(c, 0, MaxClass)
	{
	    ClassFrequcy[c] = 0;
	}

	ForEach(v, 0, 2) 
    { 
	ForEach(c, 0, MaxClass)
	{
	    T[v][c] = 0.00;
	}
    }


    Items = CountItems(Fp, Lp);
    Xp = Fp;
    ForEach(i, Fp, Lp)
    {
	if ( CVal(Item[i],Att) == Unknown )
	{
	    Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
	    Swap(Xp, i);
	    Xp++;
	}
    }

    ValFreq[0] = 0;
    ForEach(c, 0, MaxClass)
    {
	ValFreq[0] += Freq[0][c];
    }

    KnownItems = Items - ValFreq[0];

    /*  Special case when very few known values  */

    if ( KnownItems < 2 * MINOBJS )
    {
	return 0.000000;//  1/chiquare;
    }

	Quicksort(Xp, Lp, Att, Swap);

    /*  Count base values and determine base information  */

    ForEach(i, Xp, Lp)
    {
	Freq[ 2 ][ Class(Item[i]) ] += Weight[i];
	PvalueSplitGain[i] = 0.000;//?
    }

    PvalueBaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;


	/*  Find the best attribute according to the given criterion  */

	BestVal = 0;
    BestI   = None;

    MinSplit = 0.10 * KnownItems / (MaxClass + 1);
    if ( MinSplit <= MINOBJS ) MinSplit = MINOBJS;
    else
    if ( MinSplit > 25 ) MinSplit = 25;

    LowItems = 0;


	ForEach(i, Xp, Lp)
	{
		c = Class(Item[i]);
		ClassFrequcy[ c ]++;
	}


    ForEach(i, Xp, Lp - 1)
    {
		c = Class(Item[i]);
		LowItems   += Weight[i];
		Freq[1][c] += Weight[i];
		Freq[2][c] -= Weight[i];

	//	ClassFrequcy[ c ]++;

		if ( LowItems < MinSplit ) continue;
		else
		if ( LowItems > KnownItems - MinSplit ) break;

		if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )
		{
			ValFreq[1] = LowItems;
			ValFreq[2] = KnownItems - LowItems;
			PvalueSplitGain[i] = ComputeGain(PvalueBaseInfo, UnknownRate[Att], 2, KnownItems);
			
			if ( PvalueSplitGain[i] > BestVal )
			{
				BestI   = i;
				BestVal = PvalueSplitGain[i];

				ForEach(v, 1, 2) 
				{ 
				ForEach(c, 0, MaxClass)
				{
					TrueFreq[v][c] = Freq[v][c];
				}
				}
				TrueValFreq[1] = ValFreq[1];
				TrueValFreq[2] = ValFreq[2];

			}

		}

    }


	ForEach(v, 1, 2) 
    { 
	ForEach(c, 0, MaxClass)
	{
	    T[v][c] = TrueValFreq[v] * ClassFrequcy[c]/KnownItems;//knownItemsδ֪
	}
    }

	

	ForEach(v, 1, 2) 
    { 
	ForEach(c, 0, MaxClass)
	{
		if(T[v][c]==0)
		{
			continue;
		}

		 ChiQuare += (TrueFreq[v][c]-T[v][c])*(TrueFreq[v][c]-T[v][c])/T[v][c];

	}
    }


	
	free(ClassFrequcy);
	ClassFrequcy = NULL;

	ForEach(v, 0, 2)
	{
	  free(T[v]);
	  T[v] = NULL;
	}	
	free(T);
	T = NULL;

	ForEach(v, 0, 2)
    {
	  free(TrueFreq[v]);
	  TrueFreq[v] = NULL;
    }
	free(TrueFreq);
	TrueFreq = NULL;

   	free(TrueValFreq);
	TrueValFreq = NULL;


	free(PvalueSplitGain);
	PvalueSplitGain = NULL;



	if(ChiQuare == 0)
	{
		return 0.00000;
	}

	return (double)(ChiQuare);
}




/*************************************************************************/
/*								                                    	 */
/*  The importance for Continuous attributes are calculated              */
/*	according to information gain ratio method         	             	 */
/*								  	                                     */
/*************************************************************************/

double	  ContinuousAttGRValueDistr(Att, Fp, Lp)
/*  -----------------  */ 
    Attribute Att;
    ItemNo Fp, Lp; 
{ 
    ItemNo i, BestI, Xp, Tries=0;
    ItemCount Items, KnownItems, LowItems, MinSplit, CountItems();
    ClassNo c;
	float
	*GRSplitGain,	/* SplitGain[i] = gain with att value of item i as threshold */
	*GRSplitInfo;	/* SplitInfo[i] = potential info ditto */
	float GRGain,GRInfo,GRValue;
    float AvGain=0, Val, BestVal, BaseInfo, ThreshCost,
	ComputeGain(), TotalInfo(), Worth();
    void Swap();


	GRSplitGain = (float *) calloc(MaxItem+1, sizeof(float));
	GRSplitInfo = (float *) calloc(MaxItem+1, sizeof(float));



    ResetFreq(2);

    /*  Omit and count unknown values */

    Items = CountItems(Fp, Lp);
    Xp = Fp;
    ForEach(i, Fp, Lp)
    {
	if ( CVal(Item[i],Att) == Unknown )
	{
	    Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
	    Swap(Xp, i);
	    Xp++;
	}
    }

    ValFreq[0] = 0;
    ForEach(c, 0, MaxClass)
    {
	ValFreq[0] += Freq[0][c];
    }

    KnownItems = Items - ValFreq[0];
    UnknownRate[Att] = 1.0 - KnownItems / Items;

    /*  Special case when very few known values  */

    if ( KnownItems < 2 * MINOBJS )
    {
	return 0.00000;
    }

    Quicksort(Xp, Lp, Att, Swap);

    /*  Count base values and determine base information  */

    ForEach(i, Xp, Lp)
    {
	Freq[ 2 ][ Class(Item[i]) ] += Weight[i];
	GRSplitGain[i] = -Epsilon;
	GRSplitInfo[i] = 0;
    }

    BaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;

    /*  Try possible cuts between items i and i+1, and determine the
	information and gain of the split in each case.  We have to be wary
	of splitting a small number of items off one end, as we can always
	split off a single item, but this has little predictive power.  */

    MinSplit = 0.10 * KnownItems / (MaxClass + 1);
    if ( MinSplit <= MINOBJS ) MinSplit = MINOBJS;
    else
    if ( MinSplit > 25 ) MinSplit = 25;

    LowItems = 0;
    ForEach(i, Xp, Lp - 1)
    {
		c = Class(Item[i]);
		LowItems   += Weight[i];
		Freq[1][c] += Weight[i];
		Freq[2][c] -= Weight[i];

		if ( LowItems < MinSplit ) continue;
		else
		if ( LowItems > KnownItems - MinSplit ) break;

		if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )
		{
			ValFreq[1] = LowItems;
			ValFreq[2] = KnownItems - LowItems;
			GRSplitGain[i] = ComputeGain(BaseInfo, UnknownRate[Att], 2, KnownItems);
			GRSplitInfo[i] = TotalInfo(ValFreq, 0, 2) / Items;
			AvGain += GRSplitGain[i];
			Tries++;

		}
    }


    /*  Find the best attribute according to the given criterion  */

    ThreshCost = Log(Tries) / Items;

    BestVal = 0;
    BestI   = None;
    ForEach(i, Xp, Lp - 1)
    {
	if ( (Val = GRSplitGain[i] - ThreshCost) > BestVal )
	{
	    BestI   = i;
	    BestVal = Val;
	}
    }

    /*  If a test on the attribute is able to make a gain,
	set the best break point, gain and information  */ 

    if ( BestI == None )
    {
		GRGain = -Epsilon;
		GRInfo = 0.0;

		return 0.00000;
    }
    else
    {
		GRGain = BestVal;
		GRInfo = GRSplitInfo[BestI];

		//Gain Ratio
		if ( GRGain >= Epsilon && GRInfo > Epsilon )
		{
			GRValue = GRGain / GRInfo;
		}
		else
		{
			GRValue = -Epsilon;
		}

    }


	free(GRSplitGain);
	free(GRSplitInfo);

	GRSplitGain =NULL;
	GRSplitInfo =NULL;



	if(GRValue < Epsilon)
	{
		return 0.000000;
	}
	return (double)(GRValue);

} 




