/*************************************************************************/
/*									                                     */
/*	Get names of classes, attributes and attribute values		         */
/*	-----------------------------------------------------		         */
/*								                                     	 */
/*************************************************************************/


#include "defns.h"
#include "types.h"
#include "extern.h"


#define  Space(s)	(s == ' ' || s == '\n' || s == '\t')
#define  SkipComment	while ( ( c = getc(f) ) != '\n' )

char	Delimiter;
String	CopyString();



/*************************************************************************/
/*									                                     */
/*  Read a name from file f into string s, setting Delimiter.		     */
/*									                                     */
/*  - Embedded periods are permitted, but periods followed by space	     */
/*    characters act as delimiters.					                     */
/*  - Embedded spaces are permitted, but multiple spaces are replaced	 */
/*    by a single space.						                         */
/*  - Any character can be escaped by '\'.			                 	 */
/*  - The remainder of a line following '|' is ignored.	        		 */
/*									                                     */
/*************************************************************************/


Boolean ReadName(f, s)
/*      ---------  */
    FILE *f;
    String s;
{
    register char *Sp=s;
    register int c;

    /*  Skip to first non-space character  */

    while ( ( c = getc(f) ) == '|' || Space(c) )
    {
	if ( c == '|' ) SkipComment;
    }

    /*  Return false if no names to read  */

    if ( c == EOF )
    {
	Delimiter = EOF;
	return false;
    }

    /*  Read in characters up to the next delimiter  */

    while ( c != ':' && c != ',' && c != '\n' && c != '|' && c != EOF )
    {
		if ( c == '.' )
		{
			if ( ( c = getc(f) ) == '|' || Space(c) ) break;
			*Sp++ = '.';
		}

		if ( c == '\\' )
		{
			c = getc(f);
		}

		*Sp++ = c;

		if ( c == ' ' )
		{
			while ( ( c = getc(f) ) == ' ' )
			;
		}
		else
		{
			c = getc(f);
		}
    }

    if ( c == '|' ) SkipComment;
    Delimiter = c;

    /*  Strip trailing spaces  */

    while ( Space(*(Sp-1)) ) Sp--;

    *Sp++ = '\0';
    return true;
}



/*************************************************************************/
/*									                                     */
/*  Read the names of classes, attributes and legal attribute values.	 */
/*  On completion, these names are stored in:			             	 */
/*	ClassName	-  class names					                         */
/*	UnSampleAttName		-  attribute names			                   	 */
/*	UnSampleAttValName	-  attribute value names		               	 */
/*  with:							                                 	 */
/*	UnSampleMaxAttVal	-  number of values for each attribute	       	 */
/*									                                     */
/*  Other global variables set are:				                       	 */
/*	UnSampleMaxAtt		-  maximum attribute number		               	 */
/*	MaxClass	-  maximum class number			                     	 */
/*	MaxDiscrVal	-  maximum discrete values for any attribute	         */
/*									                                     */
/*  Note:  until the number of attributes is known, the name		     */
/*	   information is assembled in local arrays			                 */
/*									                                     */
/*************************************************************************/


    GetNames()
/*  ---------  */
{
    FILE *Nf, *fopen();
    char Fn[100], Buffer[1000];
    DiscrValue v;


    int AttCeiling=100, ClassCeiling=100, ValCeiling;

    /*  Open names file  */

    strcpy(Fn, FileName);
    strcat(Fn, ".names");
    if ( ! ( Nf = fopen(Fn, "r") ) ) Error(0, Fn, "");

    /*  Get class names from names file  */

    ClassName = (String *) calloc(ClassCeiling, sizeof(String));
    MaxClass = -1;
    do
    {
	ReadName(Nf, Buffer);

	if ( ++MaxClass >= ClassCeiling)
	{
	    ClassCeiling += 100;
	    ClassName = (String *) realloc(ClassName, ClassCeiling*sizeof(String));
	}
	ClassName[MaxClass] = CopyString(Buffer);
    }
    while ( Delimiter == ',' );

    /*  Get attribute and attribute value names from names file  */

    UnSampleAttName = (String *) calloc(AttCeiling, sizeof(String));
    UnSampleMaxAttVal = (DiscrValue *) calloc(AttCeiling, sizeof(DiscrValue));
    UnSampleAttValName = (String **) calloc(AttCeiling, sizeof(String *));
    UnSampleSpecialStatus = (char *) malloc(AttCeiling);

    UnSampleMaxAtt = -1;


    while ( ReadName(Nf, Buffer) )
    {
	if ( Delimiter != ':' ) Error(1, Buffer, "");

	if ( ++UnSampleMaxAtt >= AttCeiling )
	{
	    AttCeiling += 100;
	    UnSampleAttName = (String *) realloc(UnSampleAttName, AttCeiling*sizeof(String));
	    UnSampleMaxAttVal = (DiscrValue *) realloc(UnSampleMaxAttVal, AttCeiling*sizeof(DiscrValue));
	    UnSampleAttValName = (String **) realloc(UnSampleAttValName, AttCeiling*sizeof(String *));
	    UnSampleSpecialStatus = (char *) realloc(UnSampleSpecialStatus, AttCeiling);
	}

	UnSampleAttName[UnSampleMaxAtt] = CopyString(Buffer);
	UnSampleSpecialStatus[UnSampleMaxAtt] = Nil;
	UnSampleMaxAttVal[UnSampleMaxAtt] = 0;
	ValCeiling = 100;
	UnSampleAttValName[UnSampleMaxAtt] = (String *) calloc(ValCeiling, sizeof(String));

	do
	{
	    if ( ! ( ReadName(Nf, Buffer) ) ) Error(2, UnSampleAttName[UnSampleMaxAtt], "");

	    if ( ++UnSampleMaxAttVal[UnSampleMaxAtt] >= ValCeiling )
	    {
		ValCeiling += 100;
		UnSampleAttValName[UnSampleMaxAtt] =
		    (String *) realloc(UnSampleAttValName[UnSampleMaxAtt], ValCeiling*sizeof(String));
	    }

	    UnSampleAttValName[UnSampleMaxAtt][UnSampleMaxAttVal[UnSampleMaxAtt]] = CopyString(Buffer);
	}
	while ( Delimiter == ',' );

	if ( UnSampleMaxAttVal[UnSampleMaxAtt] == 1 )
	{
	    /*  Check for special treatment  */

	    if ( ! strcmp(Buffer, "continuous") )
	    {}
	    else
	    if ( ! memcmp(Buffer, "discrete", 8) )
	    {
	    	UnSampleSpecialStatus[UnSampleMaxAtt] = DISCRETE;

		/*  Read max values, reserve space and check MaxDiscrVal  */

		v = atoi(&Buffer[8]);
		if ( v < 2 )
		{
		    printf("** %s: illegal number of discrete values\n",
		    		UnSampleAttName[UnSampleMaxAtt]);
		    exit(1);
		}

		AttValName[UnSampleMaxAtt] =
		    (String *) realloc(AttValName[UnSampleMaxAtt], (v+2)*sizeof(String));
		AttValName[UnSampleMaxAtt][0] = (char *) v;
		if ( v > MaxDiscrVal ) MaxDiscrVal = v;
	    }
	    else
	    if ( ! strcmp(Buffer, "ignore") )
	    {
	    	UnSampleSpecialStatus[UnSampleMaxAtt] = IGNORE;
	    }
	    else
	    {
		/*  Cannot have only one discrete value for an attribute  */

		Error(3, UnSampleAttName[UnSampleMaxAtt], "");
	    }

	    UnSampleMaxAttVal[UnSampleMaxAtt] = 0;
	}
	else
	if ( UnSampleMaxAttVal[UnSampleMaxAtt] > MaxDiscrVal ) MaxDiscrVal = UnSampleMaxAttVal[UnSampleMaxAtt];


    }

    fclose(Nf);
}





/*************************************************************************/
/*									                                     */
/*  Renew get the information of attribute.                            	 */
/*									                                     */
/*************************************************************************/


    GetMatrixNames()
{
    DiscrValue v;
    Attribute i;

    long SampleAttCeiling=100, SampleValCeiling;

	MaxAtt = UnSampleMaxAtt;

    AttName = (String *) calloc(SampleAttCeiling, sizeof(String));
    MaxAttVal = (DiscrValue *) calloc(SampleAttCeiling, sizeof(DiscrValue));
    AttValName = (String **) calloc(SampleAttCeiling, sizeof(String *));
    SpecialStatus = (char *) malloc(SampleAttCeiling);

	for (i=0;i<=MaxAtt;++i)
	{
		if ( i >= SampleAttCeiling )
		{
			SampleAttCeiling = SampleAttCeiling +100;
		    AttName = (String *) realloc(AttName, SampleAttCeiling*sizeof(String));
		    MaxAttVal = (DiscrValue *) realloc(MaxAttVal, SampleAttCeiling*sizeof(DiscrValue));
	        AttValName = (String **) realloc(AttValName, SampleAttCeiling*sizeof(String *));
		    SpecialStatus = (char *) realloc(SpecialStatus, SampleAttCeiling);
		}

		AttName[i] =UnSampleAttName[i];
		SpecialStatus[i] = Nil;
		MaxAttVal[i] = 0;
		SampleValCeiling = 100;
		AttValName[i] = (String *) calloc(SampleValCeiling, sizeof(String));

		do
		{
		    if ( ++MaxAttVal[i] >= SampleValCeiling )
		    {
		    	SampleValCeiling = SampleValCeiling+100;
		    	AttValName[i] = (String *) realloc(AttValName[i], SampleValCeiling*sizeof(String));
		    }

		    AttValName[i][MaxAttVal[i]] = UnSampleAttValName[i][MaxAttVal[i]];

		}
		while( (MaxAttVal[i]+1)<= UnSampleMaxAttVal[i] );


	    if ( MaxAttVal[i] == 1 )
	    {

	    	if ( ! strcmp(AttValName[i][MaxAttVal[i]], "continuous") )
	    	 {}
	    	else
	    	if ( ! memcmp(AttValName[i][MaxAttVal[i]], "discrete", 8) )
	    	{
				SpecialStatus[i] = DISCRETE;

				v = atoi(AttValName[i][MaxAttVal[i]]+8);
				if ( v < 2 )
				{
					printf("** %s: illegal number of discrete values\n",
						   AttName[i]);
					exit(1);
				}

				AttValName[i] =
						(String *) realloc(AttValName[i], (v+2)*sizeof(String));
				AttValName[i][0] = (char *) v;
				if ( v > MaxDiscrVal ) MaxDiscrVal = v;
	    	}
	    	else
	    	if ( ! strcmp(AttValName[i][MaxAttVal[i]], "ignore") )
	    	{
	    		SpecialStatus[i] = IGNORE;
	    	}
	    	else
	    	{
	    		Error(3, AttName[i], "");
	    	}

	    	MaxAttVal[i] = 0;
	    	}
	    	else
	    	if ( MaxAttVal[i] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[i];

	}

}



/*************************************************************************/
/*									                                     */
/*  Subspace sampling method: 1.Breiman method                           */
/*		     Sampling a number of attributes without replacement         */
/*									                                     */
/*************************************************************************/

    GetSampleNames()
{
    long i,k,*a;

    srand(randomSeed+rand());
    randomSeed++;

    a = (long *) calloc(MaxAtt+1, sizeof(long));
	SampleAttr = (long *) calloc(SampleAttNum+1, sizeof(long));

	/*    attribute sampleing    */

	for(i=0;i<=MaxAtt;++i)
	{
		a[i]= 1;
	}

	for (i=0;i<=SampleAttNum;)
	{
    	k = rand() % (UnSampleMaxAtt+1) ;

    	if(a[k])
    	{
    		SampleAttr[i]=k;
    		a[k]=0;
    		i++;
    	}
	}

	free(a);
	a = NULL;
}



/*************************************************************************/
/*									                                     */
/*  Subspace sampling method: weighting sampling method                  */
/*		                 2.chisquare statistic method                    */
/*									                                     */
/*************************************************************************/

	GetChiQuareEnrichName(Fp, Lp,Cases)
    ItemNo Fp, Lp;
	ItemCount Cases;
{
	double *PAvalue, SumPAvalue=0;
	long Att, i, j, k, *HundredPoint, *a,*b;
	long MaxSampleNumber;
	double DiscreteAttChiValue(),ContinuousAttChiValue();

	double zjbl=0;

	long weightNum = 0;
	double SumPAvalueWeight = 0.0, AvgPAvalueWeight = 0.0;

    PAvalue = (double *) calloc(MaxAtt+1, sizeof(double));
	HundredPoint = (long *) calloc(MaxAtt+1, sizeof(long));

	a = (long *) calloc(30000+1, sizeof(long));//**7
	b = (long *) calloc(MaxAtt+1, sizeof(long));

	MaxSampleNumber = MaxAtt;


	ForEach(Att, 0, MaxAtt)
    {
		PAvalue[Att] = 0;

		if ( SpecialStatus[Att] == IGNORE ) continue;

		if ( MaxAttVal[Att])
		{
			if ( MaxAttVal[Att] >= 2 )
			{
				/*  discrete valued attribute  */
				PAvalue[Att] = DiscreteAttChiValue(Att, Fp, Lp, Cases);
			}
		}
		else
		{
			/*  continuous attribute  */
			PAvalue[Att] = ContinuousAttChiValue(Att, Fp, Lp, Cases);

		}

		PAvalue[Att] = pow(PAvalue[Att],1.0/2);

		if ( PAvalue[Att] > 0.000000 )
		{
			SumPAvalue += PAvalue[Att];
		}

    }


	if(SumPAvalue<=0)
	{
		return;
	}

	HundredPoint[0] = (long)(30000 * PAvalue[0] / SumPAvalue + 0.5);//**


    ForEach(Att,1,MaxAtt)
	{
		zjbl = 0;


		if ( PAvalue[Att] >= 0.000000 && SumPAvalue> 0.00000 )
		{
			zjbl = 30000*PAvalue[Att]/SumPAvalue;//**

			HundredPoint[Att] = (long)(HundredPoint[Att-1] + zjbl + 0.5);


			if(HundredPoint[Att]>30000)//**
			{
				HundredPoint[Att] = 30000;//**
			}

			if(HundredPoint[Att-1]==HundredPoint[Att])
			{
				MaxSampleNumber--;
			}


		}

	}

	if( MaxSampleNumber <= SampleAttNum)
	{
		return;
	}

	SampleAttr = (long *) calloc(SampleAttNum+1, sizeof(long));


	//weighting sampling
	srand(randomSeed+rand());
    randomSeed++;


	for(i=0; i<30000; ++i)//**
	{
		a[i]= 1;
	}

	ForEach(Att, 0, MaxAtt)
	{
		b[Att] = 1;
	}

	for (i=0;i<=SampleAttNum;)
	{
    	srand(randomSeed+rand());
		randomSeed++;
		k = rand() % 30000;//**

		if(a[k])
    	{
			ForEach(Att, 0, MaxAtt)
			{
				if( k<=HundredPoint[0] && Att==0 )
				{


					SampleAttr[i] = 0;
					b[Att] =0;
					i++;

					break;
				}
				else if( k<=HundredPoint[1] && k>HundredPoint[0] && Att==1 )
				{

					SampleAttr[i] = 1;
					b[Att] =0;
					i++;

					break;
				}
				else if(k<=HundredPoint[Att] && k>HundredPoint[Att-1] && b[Att])
				{
					SampleAttr[i] = Att;
					b[Att] =0;
					i++;
					break;
				}
			}
    		a[k] = 0;
    	}

	}

	free(PAvalue);
	PAvalue = NULL;

	free(HundredPoint);
	HundredPoint = NULL;

	free(a);
	a = NULL;

	free(b);
	b = NULL;

}


/*************************************************************************/
/*									                                     */
/*  Subspace sampling method: weighting sampling method                  */
/*		                 3.Information gain ratio method                 */
/*									                                     */
/*************************************************************************/

	GetInfoGaEnrichName(Fp, Lp,Cases)
    ItemNo Fp, Lp;
	ItemCount Cases;
{
	double *GRvalue, SumGRvalue=0;
	long Att, i, j, k, *HundredPoint, *a,*b;
	long MaxSampleNumber;
	double DiscreteAttGRValue(),ContinuousAttGRValue();

	double zjbl=0;
	long weightNum = 0;
	double SumGRvalueWeight = 0.0, AvgGRvalueWeight = 0.0;

    GRvalue = (double *) calloc(MaxAtt+1, sizeof(double));
	HundredPoint = (long *) calloc(MaxAtt+1, sizeof(long));

	a = (long *) calloc(30000+1, sizeof(long));//**
	b = (long *) calloc(MaxAtt+1, sizeof(long));

	MaxSampleNumber = MaxAtt;

	ForEach(Att, 0, MaxAtt)
    {
		GRvalue[Att] = 0;

		if ( SpecialStatus[Att] == IGNORE ) continue;

		if ( MaxAttVal[Att])
		{
			if ( MaxAttVal[Att] >= 2 )
			{
				/*  discrete valued attribute  */
				GRvalue[Att] = DiscreteAttGRValue(Att, Fp, Lp, Cases);
			}
		}
		else
		{
			/*  continuous attribute  */
			GRvalue[Att] = ContinuousAttGRValue(Att, Fp, Lp, Cases);


		}

		GRvalue[Att] = pow(GRvalue[Att],1.0/2);

		if ( GRvalue[Att] > 0.000000 )
		{
			SumGRvalue += GRvalue[Att];
		}

    }

	if(SumGRvalue<=0)
	{
		return;
	}

	HundredPoint[0] = (long)(30000 * GRvalue[0] / SumGRvalue + 0.5);//**


    ForEach(Att, 1, MaxAtt)
	{
		zjbl = 0;


		if ( GRvalue[Att] >= 0.000000 && SumGRvalue> 0.000000 )
		{
			zjbl = 30000*GRvalue[Att]/SumGRvalue;

			HundredPoint[Att] = (long)(HundredPoint[Att-1] + zjbl + 0.5);

			if(HundredPoint[Att]>30000)
			{
				HundredPoint[Att] = 30000;//**
			}

			if(HundredPoint[Att-1]==HundredPoint[Att])
			{
				MaxSampleNumber--;
			}

		}

	}

	if( MaxSampleNumber <= SampleAttNum)
	{
		return;
	}

	SampleAttr = (long *) calloc(SampleAttNum+1, sizeof(long));

    //Weighting sampling

	srand(randomSeed+rand());
    randomSeed++;


	for(i=0; i<30000; ++i)
	{
		a[i]= 1;
	}

	ForEach(Att, 0, MaxAtt)
	{
		b[Att] = 1;
	}



	for (i=0;i<=SampleAttNum;)
	{
    	srand(randomSeed+rand());
		randomSeed++;
		k = rand() % 30000;

		if(a[k])
    	{
			ForEach(Att, 0, MaxAtt)
			{
				if( k<=HundredPoint[0] && Att==0 )
				{
					SampleAttr[i] = 0;
					b[Att] =0;
					i++;
					break;
				}
				else if( k<=HundredPoint[1] && k>HundredPoint[0] && Att==1 )
				{
					SampleAttr[i] = 1;
					b[Att] =0;
					i++;
					break;
				}
				else if(k<=HundredPoint[Att] && k>HundredPoint[Att-1] && b[Att])
				{
					SampleAttr[i] = Att;
					b[Att] =0;
					i++;
					break;
				}
			}
    		a[k] = 0;
    	}

	}


	free(GRvalue);
	GRvalue = NULL;

	free(HundredPoint);
	HundredPoint = NULL;

	free(a);
	a = NULL;

	free(b);
	b = NULL;

}




/*************************************************************************/
/*									                                     */
/*	Locate value Val in List[First] to List[Last]	              		 */
/*								                                    	 */
/*************************************************************************/


int Which(Val, List, First, Last)
/*  -----  */
    String Val, List[];
    long First, Last;
{
    long n=First;

    while ( n <= Last && strcmp(Val, List[n]) ) n++;

    return ( n <= Last ? n : First-1 );
}



/*************************************************************************/
/*								                                      	 */
/*	Allocate space then copy string into it		                   		 */
/*								                                       	 */
/*************************************************************************/

String CopyString(x)
/*     -----------  */
    String x;
{
    char *s;

    s = (char *) calloc(strlen(x)+1, sizeof(char));
    strcpy(s, x);
    return s;
}



/*************************************************************************/
/*								                                    	 */
/*			Error messages				                             	 */
/*								                                       	 */
/*************************************************************************/

    Error(n, s1, s2)
/*  -----  */
    long n;
    String s1, s2;
{
    static char Messages=0;

    printf("\nERROR:  ");
    switch(n)
    {
	case 0: printf("cannot open file %s%s\n", s1, s2);
		exit(1);

	case 1:	printf("colon expected after attribute name %s\n", s1);
		break;

	case 2:	printf("unexpected eof while reading attribute %s\n", s1);
		break;

	case 3: printf("attribute %s has only one value\n", s1);
		break;

	case 4: printf("case %d's value of '%s' for attribute %s is illegal\n",
		    MaxItem+1, s2, s1);
		break;

	case 5: printf("case %d's class of '%s' is illegal\n", MaxItem+1, s2);
    }

    if ( ++Messages > 10 )
    {
	printf("Error limit exceeded\n");
	exit(1);
    }
}






