/*************************************************************************/
/*								                                     	 */
/*	Evaluation of a test on a discrete valued attribute		             */
/*      ---------------------------------------------------	             */
/*									                                     */
/*************************************************************************/


#include "buildex.h"
/*************************************************************************/
/*								                                    	 */
/*  Set Info[] and Gain[] for discrete partition of items Fp to Lp	     */
/*									                                     */
/*************************************************************************/


    EvalDiscreteAtt(Att, Fp, Lp, Items)
/*  ---------------  */ 
    Attribute Att;
    ItemNo Fp, Lp; 
    ItemCount Items;
{ 
    ItemCount KnownItems;
    float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo();

    ComputeFrequencies(Att, Fp, Lp);

    KnownItems = Items - ValFreq[0];

    /*  Special case when no known values of the attribute  */

    if ( Items <= ValFreq[0] )
    {
	Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]);

	Gain[Att] = -Epsilon;
	Info[Att] = 0.0;
	return;
    }

    Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]),
			    UnknownRate[Att], MaxAttVal[Att], KnownItems);
    Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items;

    Verbosity(2)
    {
    	printf("\tAtt %s", AttName[Att]);
    	Verbosity(3) PrintDistribution(Att, MaxAttVal[Att], true);
    	printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]);
    }

} 





/*************************************************************************/
/*								                                    	 */
/*  The importance for discrete attributes are calculated                */
/*	according to chisquare statistic method         	             	 */
/*								  	                                     */
/*************************************************************************/

//Ľ
double DiscreteAttChiValue(Att, Fp, Lp, Items)
    Attribute Att;
    ItemNo Fp, Lp; 
    ItemCount Items;
{
	ClassNo c,BestClass;
	ItemNo i;
    DiscrValue v;
	ItemNo *ClassFrequcy;
	ItemCount KnownItems;
    float **T;
	float ChiQuare=0;

	ClassFrequcy = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));
    
	T  = (float **) calloc( MaxAttVal[Att]+1, sizeof(float *));
    ForEach(v, 0, MaxAttVal[Att])
    {
		T[v]  = (float *) calloc(MaxClass+1, sizeof(float));
    }

	ComputeFrequencies(Att, Fp, Lp);	

	KnownItems = Items - ValFreq[0];


	if ( Items <= ValFreq[0] || KnownItems < 2 * MINOBJS )
    {
	  return 0.00000;
    }


    ForEach(i, Fp, Lp)
    {
		if( DVal(Item[i],Att) != 0 )
		{
			c = Class(Item[i]);
			ClassFrequcy[ c ]++;		
		}		
    }


	ForEach(v, 1, MaxAttVal[Att]) 
    { 
		ForEach(c, 0, MaxClass)
		{
			T[v][c] = ValFreq[v] * ClassFrequcy[c] / KnownItems;
		}
    }	


	ForEach(v, 1, MaxAttVal[Att]) 
    { 
		ForEach(c, 0, MaxClass)
		{

			if(T[v][c]==0)
			{
				continue;
			}

			ChiQuare += (Freq[v][c]-T[v][c])*(Freq[v][c]-T[v][c])/T[v][c];

		}
		
    }


	//Free Memory
	free(ClassFrequcy);
	ClassFrequcy = NULL;

	ForEach(v, 0, MaxAttVal[Att])
    {
	  free(T[v]);
	  T[v] = NULL;
    }
	free(T);
	T = NULL;


	if(ChiQuare == 0)
	{
		return 0.000000;
	}

	return (double)(ChiQuare);

}




/*************************************************************************/
/*								                                    	 */
/*  The importance for Discrete attributes are calculated                */
/*	according to information gain ratio method         	             	 */
/*								  	                                     */
/*************************************************************************/

double DiscreteAttGRValue(Att, Fp, Lp, Items)
/*  ---------------  */ 
    Attribute Att;
    ItemNo Fp, Lp; 
    ItemCount Items;
{ 
    ItemCount KnownItems;
	float GRGain,GRInfo,GRValue;
 
    float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo();

    ComputeFrequencies(Att, Fp, Lp);

    KnownItems = Items - ValFreq[0];


	if ( Items <= ValFreq[0] )
    {
	Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]);

	GRGain = -Epsilon;
	GRInfo = 0.0;
	return;
    }


	GRGain = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]),
			    UnknownRate[Att], MaxAttVal[Att], KnownItems);

	GRInfo = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items;


	if ( GRGain >= Epsilon && GRInfo > Epsilon )
	{
	    GRValue = GRGain / GRInfo;
	}
	else
	{
	    GRValue = -Epsilon;
	}


	if(GRValue < Epsilon)
	{
		return 0.000000;
	}
	return (double)(GRValue);

} 




/*************************************************************************/
/*								                                      	 */
/*  Compute frequency tables Freq[][] and ValFreq[] for attribute	     */
/*  Att from items Fp to Lp, and set the UnknownRate for Att		     */
/*									                                     */
/*************************************************************************/


    ComputeFrequencies(Att, Fp, Lp)
/*  ------------------  */
    Attribute Att;
    ItemNo Fp, Lp;
{
    Description Case; 
    ClassNo c;
    DiscrValue v;
    ItemCount CountItems();
    ItemNo p;

    ResetFreq(MaxAttVal[Att]);

    ForEach(p, Fp, Lp)
    { 
	Case = Item[p];
	Freq[ DVal(Case,Att) ][ Class(Case) ] += Weight[p];
    } 


    ForEach(v, 0, MaxAttVal[Att]) 
    { 
		ForEach(c, 0, MaxClass)
		{
			ValFreq[v] += Freq[v][c];
		}	
	}

    UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp);
}



/*************************************************************************/
/*								                                     	 */
/*  Return the base info for items with known values of a discrete	     */
/*  attribute, using the frequency table Freq[][]		             	 */
/*	 								                                     */
/*************************************************************************/

//C4.5
float DiscrKnownBaseInfo(KnownItems, MaxVal)
/*    ------------------  */
    DiscrValue MaxVal;
    ItemCount KnownItems;
{
    ClassNo c;
    ItemCount ClassCount;
    double Sum=0;
    DiscrValue v;
	float x=0;

    ForEach(c, 0, MaxClass)
    {
	ClassCount = 0;
	ForEach(v, 1, MaxVal)
	{
	    ClassCount += Freq[v][c];
	}
	Sum += ClassCount * Log(ClassCount);
    }

    x = (KnownItems * Log(KnownItems) - Sum) / KnownItems;

    return x;
}



/*************************************************************************/
/*									                                     */
/*  Construct and return a node for a test on a discrete attribute	     */
/*									                                     */
/*************************************************************************/


    DiscreteTest(Node, Att)
/*  ----------  */
    Tree Node;
    Attribute Att;
{
    ItemCount CountItems();

    Sprout(Node, MaxAttVal[Att]);

    Node->NodeType	= BrDiscr;
    Node->Tested	= Att;
    Node->Errors	= 0;
}
