/*  -*- c++ -*-  (for Emacs)
 *
 *  multiclass_svm.cpp
 *  Digest
 * 
 *  Created by Adrian Bickerstaffe on Wed Jan 18 2006.
 *  Copyright (c) 2005-2006 Optimisation and Constraint Solving Group,
 *  Monash University. All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#include <iostream>
#include <fstream>
#include <set>
#include <list>
#include <string>
#include <algorithm>	// for STL sort()
#include <iomanip>		// for formatted output
#include <cassert>
#include <cmath>
#include <utility>		// for pair<x, y> class

#include <fcntl.h>		// for O_RDONLY, O_WRONLY and O_CREAT
#include <unistd.h>		// for unlink and rmdir
#include <sys/types.h>	// for mkdir
#include <sys/stat.h>	// for mkdir
#include <time.h>		// for time() when initializing RNG
  
using namespace std;

#include <libtar.h>
#include "svm.h"
#include "multiclass_svm.h"

#define SEARCH_KERNEL_PARAMS 1	// grid search for optimal RBF parameters?
#define MAX_GRID_ZOOM 5			// levels of zoom in kernel parameter search (5)
#define N_FOLD 6				// n-fold cross validation parameter (6)
#define USE_MML_PROBABILITY 1	// use MML87 multi-state estimator for leaf distributions?
#define BIG_NUM 1000000			// a symbollic "large number"

/////////////////////////

// Less-than operator so that STL sort() can be used to quickly sort
// MCST edges by weight.
bool operator<(const mcst_edge & a, const mcst_edge & b)
{
    return a.get_weight() < b.get_weight();
}

/////////////////////////

// Constructor defaults to one feature per sample with a feature scaling
// range of [-1, 1].
feature_scaler::feature_scaler(void)
{
	num_features = 1;	
	// default scale ranges
	scale_range_low = -1;
	scale_range_high = 1;
}

// Function to access the limits of the dataset scaling range.
void feature_scaler::get_scale_range(double & lower, double & upper) const
{
	lower = scale_range_low;
	upper = scale_range_high;
}

// Function to scale a single feature value linearly.
void feature_scaler::scale_single_feature(double & feature_value, 
										   double feature_lower,
										   double feature_upper) const
{	
	// skip single-valued attribute
	if(feature_lower == feature_upper)
		return;

	// if the feature value is the lowest observed, set it to the lower
	// point of the scale range
	if(feature_value == feature_lower)
		feature_value = scale_range_low;
	// if the feature value is the largest observed, set it to the
	// high point of the scale range
	else if(feature_value == feature_upper)
		feature_value = scale_range_high;
	// feature values lies somewhere between the two scale range end-points,
	// so scale it linearly
	else
	{
		feature_value = scale_range_low + (scale_range_high - scale_range_low) * 
						(feature_value - feature_lower) /
						(feature_upper - feature_lower);
	}
}

// Function to write feature details and other header information to a
// file used to store model details.
void feature_scaler::write_feature_details(ofstream & output_file) const
{
	int i;

	// write the number of features per sample and the number of target classes
	output_file << num_features << " " << targets.size() << endl;
	
	// write the list of classifier targets (i.e. classes)
	for(i = 0; i < targets.size(); i++)
		output_file << targets[i] << " ";
	output_file << endl;
	
	// write the feature scaling limits
	output_file	<< scale_range_low << " " << scale_range_high << endl;
	
	// write the observed lower limits of each feature
	for(i = 0; i < lower_limits.size(); i++)
		output_file << lower_limits[i] << " ";
	output_file << endl;

	// write the observed upper limits of each feature
	for(i = 0; i < upper_limits.size(); i++)
		output_file << upper_limits[i] << " ";
	output_file << endl;
}

// Function to read feature details and other header information from
// an input file.
void feature_scaler::read_feature_details(ifstream & input_file)
{
	int i, num_targets, curr_target;
	// current feature limit
	double curr_limit;
	
	// read in the number of features per sample, number of target classes,
	// and the scaling limits
	input_file >> num_features >> num_targets;
	
	// read in the list of classifier targets (i.e. classes)
	for(i = 0; i < num_targets; i++)
	{
		input_file >> curr_target;
		targets.push_back(curr_target);
	}	
	
	// read the feature scaling limits
	input_file >> scale_range_low >> scale_range_high;
	
	// read in the lower limits of each feature as observed in the training data
	for(i = 0; i < num_features; i++)
	{
		input_file >> curr_limit;
		lower_limits.push_back(curr_limit);
	}
	
	// read in the upper limits of each feature as observed in the training data
	for(i = 0; i < num_features; i++)
	{
		input_file >> curr_limit;
		upper_limits.push_back(curr_limit);
	}
}

// Function to print the feature scaling details derived from the training
// data.
void feature_scaler::print_scaling_details(void) const
{
	int i;
	
	cout << "Data is scaled using the range [" 
		 << scale_range_low << ", " << scale_range_high 
		 << "], with observed feature limits:" << endl << endl;

	// pretty print the table
	cout << setprecision(3) << fixed;
	assert(num_features == lower_limits.size());
	assert(num_features == upper_limits.size());
	// print a table of observed feature limits
	cout << "Feature\tLower limit\tUpper limit" << endl
		 << "------------------------------------------" << endl;
	for(i = 0; i < num_features; i++)
	{
		cout << noshowpos << i + 1 << "\t";
		cout << showpos <<  lower_limits[i] << "\t\t" << upper_limits[i] << endl;
	}
	cout << endl;
	
	// reset the output stream
	cout.unsetf(ios::fixed | ios::scientific);
	cout << noshowpos;	
}

/////////////////////////

// Default constructor assumes each sample has only one feature value.
svm_dataset::svm_dataset(void)
{
	num_samples = 0;
}

// Constructor simply sets the number of samples in the dataset
// to zero.
svm_dataset::svm_dataset(int max_features)
{
	set_num_features(max_features);	
	// no samples initially observed
	num_samples = 0;
}

bool svm_dataset::set_num_features(int max_features)
{
	// sanity check the number of features in each sample
	if(max_features > 0)
	{
		num_features = max_features;
		return true;
	}
	else
	{
		cout << "Error:- " << max_features << " specified when creating a dataset, " 
			 << " must be > 0" << endl;

		return false;
	}
}

// Dataset destructor does nothing for now.
svm_dataset::~svm_dataset(void)
{
	// empty for now :-)
}

// Function returns the number of features per sample in the dataset.
int svm_dataset::get_num_features(void) const
{
	return num_features;
}

// Function to set the range for feature scaling.
void svm_dataset::set_scale_range(double lower, double upper)
{
	// no swap necessary
	if(lower <= upper)
	{
		scale_range_low = lower;
		scale_range_high = upper;
	}
	// swapping limits is necessary
	else
	{
		scale_range_low = upper;
		scale_range_high = lower;
	}
}

// Function to access the observed limits of each feature.
void svm_dataset::get_feature_limits(vector<double> & lower,
									 vector<double> & upper) const
{
	lower = lower_limits;
	upper = upper_limits;
}

// Function to add a sample to the SVM dataset.
bool svm_dataset::add_sample(int target, const vector<double> & sample)
{
	// if the feature vector is of correct size, add it to the dataset
	if(sample.size() == num_features)
	{
		// store the vector of features
		data[target].push_back(sample);
		// one more sample is in the dataset
		num_samples++;
		return true;
	}
	// incorrect feature vector size
	else
	{
		cout << "Error:- attempted to add a sample with " << sample.size()
		     << " features instead of " << num_features << ", ignoring operation..."
		     << endl;
		return false;
	}
}

// Function to add a vector of samples to the SVM dataset.  This function assumes that
// all parameter samples belong to the same target class.
bool svm_dataset::add_samples(int target, const vector<vector<double> > & new_samples)
{
	int i;
	bool result = true;
	
	// add each new sample (i.e. vector of doubles) iteratively
	for(i = 0; i < new_samples.size(); i++)
		result = result && add_sample(target, new_samples[i]);
		
	return result;
}

// Function to get all samples given a target label.
vector<vector<double> > svm_dataset::get_samples(int target) const
{
	vector<vector<double> > the_samples;
	
	// if target not found, report error
	if(data.find(target) == data.end())
	{
		cout << "Error:- attempted to get SVM samples with an unknown target, "
			 << "returning empty vector..." << endl;
	}
	// extract the samples using an iterator since [] cannot be const for maps
	else
	{
		map<int, vector<vector<double> > >::const_iterator iter = data.find(target);
		the_samples = iter->second;
	}
	
	return the_samples;
}

// Function to linearly scale all data features so that they lie within
// [lower_limit, upper_limit].
void svm_dataset::scale(void)
{
	int i, j;
	// iterator for the map of data
	map<int, vector<vector<double> > >::iterator map_iter;

	// find the upper and lower limits of each feature
	find_feature_limits(lower_limits, upper_limits);
	
	// for each target class, scale...
	for(map_iter = data.begin(); map_iter != data.end(); map_iter++)
	{
		// create a reference to the target set
		vector<vector<double> > & curr_target_set = map_iter->second;
		
		// for each sample in the target set
		for(i = 0; i < curr_target_set.size(); i++)
		{
			// create a reference to the i-th member of the current target set
			vector<double> & curr_sample = curr_target_set[i];
			
			// scale each feature of the current sample
			for(j = 0; j < curr_sample.size(); j++)
				scale_single_feature(curr_sample[j], lower_limits[j], upper_limits[j]);
		}
	}
}

// Function to find the lower and upper limits for all features across
// all samples. 
void svm_dataset::find_feature_limits(vector<double> & lower_limits, 
									  vector<double> & upper_limits)
{
	int i;
	
	// for each feature
	for(i = 0; i < num_features; i++)
	{
		// get the complete _sorted_ set of features at feature vector index `i'
		vector<double> complete_feature_set = collect_featurevals(i);
		// store the smallest value which must be at the front of the vector
		lower_limits.push_back(complete_feature_set.front());
		// store the largest value which must be at the end of the vector
		upper_limits.push_back(complete_feature_set.back());
	}
}

// Function to concatentate all values of a certain feature value
// and form a _sorted_ list of these values.  Values are taken from
// all samples in every class.
vector<double> svm_dataset::collect_featurevals(int index)
{
	int i;
	// the resultant set of all feature values at vector position `index'
	vector<double> result;
	// iterator for the data map
	map<int, vector<vector<double> > >::iterator map_iter;
	
	// for every labelled target set
	for(map_iter = data.begin(); map_iter != data.end(); map_iter++)
	{
		// extract the current feature set
		const vector<vector<double> > & curr_feature_set = map_iter->second;
		// for each feature vector in the current feature set
		for(i = 0; i < curr_feature_set.size(); i++)
		{
			// extract a feature vector
			const vector<double> & curr_feature = curr_feature_set[i];
			// store the feature value at index	`index'
			result.push_back(curr_feature[index]);
		}
	}
	
	// sort the results before returning
	sort(result.begin(), result.end());
	
	return result;
}

// Function returns the size of the dataset.
int svm_dataset::size(void) const
{
	return num_samples;
}

// Function returns the number of unique targets in the dataset.
int svm_dataset::num_unique_targets(void) const
{
	return data.size();
}

// Function returns a sorted vector of all target labels.
vector<int> svm_dataset::get_targets(void) const
{
	// the set of unique target labels
	vector<int> the_targets;
	// iterator for the data map
	map<int, vector<vector<double> > >::const_iterator map_iter;
	
	// for every labelled target set, store the target label
	for(map_iter = data.begin(); map_iter != data.end(); map_iter++)
		the_targets.push_back(map_iter->first);
	
	return the_targets;
}

// Function returns a libsvm `problem' struct conversion of the dataset.
struct svm_problem svm_dataset::to_libsvm_prob(double *param_gamma) const
{
	int i, j;
	// current sample and current feature value 
	// (across all contained samples and features)
	int sample_num = 0, feature_num = 0;
	// total number of feature values in the dataset
	int elements = num_samples * (num_features + 1);
	// the libsvm struct, i.e. the result
	struct svm_problem problem;
	// iterator for the map of data
	map<int, vector<vector<double> > >::const_iterator map_iter;
	
	// length of the problem (i.e. how many samples)
	problem.l = num_samples;
	// allocate memory for the sample labels
	problem.y = (double *)malloc(problem.l * sizeof(double));
	// allocate pointers to each sub-array of features
	problem.x = (struct svm_node**)malloc(problem.l * sizeof(struct svm_node*));
	// allocate memory for _all_ observed feature values
	problem.x_space = (struct svm_node*)malloc(elements * sizeof(struct svm_node));
	
	// check whether any of the memory allocation operations failed
	if(problem.y == NULL || problem.x == NULL || problem.x_space == NULL)
	{
		cout << "Error:- memory allocation failed in function to_libsvm_prob() "
			 << "returning unitialised structure ..." << endl;
		return problem;
	}
	
	// for each set of samples with the sample target label
	for(map_iter = data.begin(); map_iter != data.end(); map_iter++)
	{
		// extract the current target set
		const vector<vector<double> > & curr_target_set = map_iter->second;
		// for each member of the current target set
		for(i = 0; i < curr_target_set.size(); i++, sample_num++)
		{
			// extract a single feature in the target set
			const vector<double> & curr_feature = curr_target_set[i];
			// set the libsvm struct parameters
			problem.x[sample_num] = &(problem.x_space[feature_num]);
			problem.y[sample_num] = (double)map_iter->first;
			
			// for each feature
			for(j = 0; j < curr_feature.size(); j++, feature_num++)
			{
				// set the index and recorded value
				problem.x_space[feature_num].index = j + 1;
				problem.x_space[feature_num].value = curr_feature[j];
			}
			// store an end of feature marker and move to the next free
			// feature index
			problem.x_space[feature_num].index = -1;
			feature_num++;
		}
	}

	// set the gamme parameter if necessary
	if(*param_gamma == 0 && num_features > 0)
		*param_gamma = 1.0 / num_features;
	
	return problem;
}

// Function to copy the feature scaling details from a source dataset.
void svm_dataset::copy_feature_details(const svm_dataset & source_dataset)
{
	// scaling range of the source dataset
	double source_range_low, source_range_high;
	// observed feature limits in the source dataset
	vector<double> source_lower_lims, source_upper_lims;
	// copy the source's scaling range
	source_dataset.get_scale_range(source_range_low, source_range_high);
	scale_range_low = source_range_low;
	scale_range_high = source_range_high;
	// copy the source's observed feature limits
	source_dataset.get_feature_limits(source_lower_lims, source_upper_lims);
	lower_limits = source_lower_lims;
	upper_limits = source_upper_lims;
}

// Function to split a dataset into training and testing subsets.
bool svm_dataset::split(float training_portion,
						pair<svm_dataset, svm_dataset> & split_dataset)
{
	svm_dataset training_dataset(num_features), testing_dataset(num_features);
	// make a temporary copy of the data for working
	map<int, vector<vector<double> > > data_copy = data;
	map<int, vector<vector<double> > >::iterator iter;	
	// copy the feature details of the source dataset
	training_dataset.copy_feature_details(*this);
	testing_dataset.copy_feature_details(*this);	
	// initialize RNG
	srand(time(NULL));

	// for each target class
	for(iter = data_copy.begin(); iter != data_copy.end(); iter++)
	{
		int i;
		// calculate the number of training samples to take from this class
		long int num_training = lrint(training_portion * iter->second.size());
		// ensure that there's at least one training sample
		if(num_training < 1)
		{
			cout << "Error:- must have at least one sample per class during training!"
				 << endl;
			return false;
		}
		
		// for each training sample to extract
		for(i = 0; i < num_training; i++)
		{
			// generate a random vector index
			int rand_index = rand() % iter->second.size();
			// add the sample to the training dataset
			training_dataset.add_sample(iter->first,
										iter->second[rand_index]);
			// delete this sample so that it cannot be selected again
			iter->second.erase(iter->second.begin() + rand_index);
		}
		// only testing samples now remain to add
		testing_dataset.add_samples(iter->first, iter->second);
	}
	// set the pair members before returning the result
	split_dataset.first = training_dataset;
	split_dataset.second = testing_dataset;
	
	return true;
}

// Function to write the dataset to file.
bool svm_dataset::save(const string & filename)
{
	map<int, vector<vector<double> > >::const_iterator iter;
	// open the output file and check for success
	ofstream output_file(filename.c_str());
	if(!output_file.is_open())
		return false;
	// first line of the file contains the number of samples
	// in the dataset
	output_file << num_samples << endl;
	// prepare the vector of target class labels for writing
	targets = get_targets();
	// write details about feature limits and scaling
	write_feature_details(output_file);
	// main body: for each target class
	for(iter = data.begin(); iter != data.end(); iter++)
	{	
		int i, j;
		// extract all samples belonging to this target class
		const vector<vector<double> > & curr_samples = iter->second;
		// for each sample of this class
		for(i = 0; i < curr_samples.size(); i++)
		{
			const vector<double> & curr_sample = curr_samples[i];
			// output the class label
			output_file << iter->first << " ";
			// then output each feature value separated by a tab
			for(j = 0; j <  curr_sample.size(); j++)
			{
				output_file << curr_sample[j];
				// dont output a tab after the last feature
				if(j != curr_sample.size() - 1)
					output_file << " ";
			}
			// output a newline after each sample
			output_file << endl;
		}
	}

	output_file.close();
	return true;
}

// Function to write the dataset to file.
bool svm_dataset::load(const string & filename)
{
	long int i;
	// open the input file and test for success
	ifstream input_file(filename.c_str());
	if(!input_file.is_open())
		return false;
	// read the number of samples comprising the dataset	
	input_file >> num_samples;
	// read in the number of features per sample, the target class
	// labels, and the feature scaling details
	read_feature_details(input_file);
	// for each sample in the dataset
	for(i = 0; i < num_samples; i++)
	{
		int j, target_class;
		double curr_feature;
		vector<double> feature_set;
		// read the target class label first
		input_file >> target_class;
		// then read the feature values in order
		for(j = 0; j < num_features; j++)
		{
			input_file >> curr_feature;
			feature_set.push_back(curr_feature);
		}
		// store the sample by mapping the target class to the
		// feature set
		data[target_class].push_back(feature_set);
	}
	
	input_file.close();
	return true;	
}

// Function to output the complete contents of an SVM dataset.
void svm_dataset::print(void) const
{
	int i, j;
	// iterator for the map of data
	map<int, vector<vector<double> > >::const_iterator map_iter;
			
	cout << "Dataset has " << data.size() << " classes "
		 << "and " << num_samples << " samples of "
		 << num_features << " features each." << endl << endl;
		 
	// output the details of linear feature scaling
	print_scaling_details();	 
	
	// setup the output stream for pretty formatting
	cout << setprecision(6) << fixed << showpos;
	
	// for each target class in the dataset
	for(map_iter = data.begin(); map_iter != data.end(); map_iter++)
	{
		// output the class label
		cout << "Class: " << static_cast<int>(map_iter->first) << endl 
			 << "--------" << endl;
			 
		vector<vector<double> > curr_target_set = map_iter->second;
		
		// for each member of the target set samples
		for(i = 0; i < curr_target_set.size(); i++)
		{
			// extract the current sample
			vector<double> curr_sample = curr_target_set[i];
			
			// output each feature value
			for(j = 0; j < curr_sample.size(); j++)
				cout << curr_sample[j] << "\t";
			
			cout << endl;
		}
		
		cout << endl;
	}
	
	// reset the output stream
	cout.unsetf(ios::fixed | ios::scientific);
	cout << noshowpos;
}

// Function to reset the svm_dataset for re-use.
void svm_dataset::clear(void)
{
	// assume a single feature
	num_features = 1;
	num_samples = 0;
	
	// reset default scale ranges
	scale_range_low = -1;
	scale_range_high = 1;
	// clear the observed lower and upper limits of each feature
	lower_limits.clear();
	upper_limits.clear();
	
	// clear all samples
	data.clear();
}

/////////////////

// Default constructor just sets some basic SVM parameters.
generic_svm::generic_svm(void)
{
	setup_defaults();
}

// Constructor sets up SVM default parameters and also
// sets the libsvm 'problem' (prob) struct.
generic_svm::generic_svm(const svm_dataset & the_dataset)
{
	setup_defaults();
	prob = the_dataset.to_libsvm_prob(&(param.gamma));
	problem_setup = true;
}

// Function to setup the SVM parameter struct.
void generic_svm::setup_defaults(void)
{
	// default parameter values
	param.svm_type = C_SVC;
	param.kernel_type = RBF;
	param.degree = 3;
	param.gamma = 0;			// later set to 1 / k
	param.coef0 = 0;
	param.nu = 0.5;
	param.cache_size = 100;
	param.C = 1;
	param.eps = 1e-3;
	param.p = 0.1;
	param.shrinking = 1;
	param.nr_weight = 0;
	param.weight_label = NULL;
	param.weight = NULL;
	
	model = NULL;
	model_setup = false;
	problem_setup = false;
}

// Destructor frees up dynamically allocated memory.
generic_svm::~generic_svm(void)
{
	clear();
}

// Function to reset a generic SVM object so that it may be re-used.
void generic_svm::clear(void)
{
	// free memory within the parameter struct
	svm_destroy_param(&param);
	
	// if necessary, free dynamic memory within the model
	if(model_setup)
		svm_destroy_model(model);

	// if necessary, free the problem (i.e. data) memory
	if(problem_setup)
	{
		free(prob.y);			// free target labels
		free(prob.x);			// free pointers to feature values
		free(prob.x_space);		// free feature values
	}
	
	model = NULL;				// reset pointer, model and problem
	model_setup = false;		// are no longer setup
	problem_setup = false;
	
}

// Function to set the libsvm `problem' struct by converting
// a `svm_dataset' object.
void generic_svm::set_problem(const svm_dataset & the_dataset)
{
	prob = the_dataset.to_libsvm_prob(&(param.gamma));
	problem_setup = true;
}

// Function to train the SVM given some training data.
bool generic_svm::train(void)
{
	const char *error_msg = svm_check_parameter(&prob, &param);
	
	// check whether the problem and parameter sets are mismatched
	if(error_msg)
	{
		cout << "Error:- " << error_msg << endl;
		return false;
	}
	
#if SEARCH_KERNEL_PARAMS
	// search for optimal kernel parameters before training
	search_kernel_params();
//	cout << "%%% final values of gamma and C: "
//		 << param.gamma << ", " << param.C << " %%%" << endl;
#endif
	
	// train the model using a libsvm function call
	model = svm_train(&prob, &param);
	model_setup = true;
	
	return true;
}

// Function performs a two-dimensional grid-search to improve the choice of
// RBF kernel parameters.  Cross validation results are used to guide the
// search process.
void generic_svm::search_kernel_params(void)
{
	// "zoom" level on the grid
	int zoom;
	// map of powers of 2 to grid values
	map<double, double> gamma_points, C_points;
	double gamma_power, C_power;
	
	// for every zoom level
	for(zoom = 1; zoom <= MAX_GRID_ZOOM; zoom++)
	{	
		cerr << "..discovering best kernel parameters at zoom level "
			 << zoom << " of " << MAX_GRID_ZOOM << endl;

		// zoom in on the grid around the best (gamma, C) pair
		if(zoom > 1)
			update_grid(gamma_points, C_points, gamma_power, C_power);	
		// create the initial grid values
		else
			setup_parameter_grid(gamma_points, C_points);			
		
//		cout << "--> searching for (gamma, C) at zoom level " << zoom << ", "
//			 << "grid is currently:" << endl;

//		print_grid(gamma_points, C_points);
		// find the best (gamma, C) pair using the current grid
		find_best_parameters(gamma_points, C_points, gamma_power, C_power);
		
//		cout << "-> This zoom: the best pair found was (" 
//			 << gamma_points[gamma_power] << ", " << C_points[C_power] << ")" 
//			 << endl;
	}
	
//	cout << "*-*-* Ultimately, the best pair found was (" 
//		 << gamma_points[gamma_power] << ", " << C_points[C_power] << ")" 
//		 << endl;
		 
	// assign the best (gamma, C) values for use in training and classification
	param.gamma = gamma_points[gamma_power];
	param.C = C_points[C_power];
}

// Function to print the details of the kernel parameter grid.
void generic_svm::print_grid(const map<double, double> & gamma_points,
						     const map<double, double> & C_points)
{
	map<double, double>::const_iterator iter;
	
	// output the gamma powers of two
	cout << "gamma:" << endl;
	for(iter = gamma_points.begin(); iter != gamma_points.end(); iter++)
		cout << "2^" << iter->first << "\t";
	cout << endl;
	// output the gamma values
	for(iter = gamma_points.begin(); iter != gamma_points.end(); iter++)
		cout << iter->second << "\t";
	cout << endl << endl;
	
	// output the C powers of two
	cout << "C:" << endl;
	for(iter = C_points.begin(); iter != C_points.end(); iter++)
		cout << "2^" << iter->first << "\t";
	cout << endl;
	// output the C values
	for(iter = C_points.begin(); iter != C_points.end(); iter++)
		cout << iter->second << "\t";
	cout << endl << endl;
}

// Function to create the initial 2D kernel parameter grid.  The initial values
// are taken from the tutorial paper "A practical guide to support vector machine
// classification" (Hsu, Chang, Lin).
void generic_svm::setup_parameter_grid(map<double, double> & gamma_points,
									   map<double, double> & C_points)
{
	int power;
	
	// create the initial gamma grid points
	for(power = -15; power <= 5; power++)
		gamma_points[power] = pow(2.0, power);
		
	// create the initial C grid points
	for(power = -5; power <= 15; power++)
		C_points[power] = pow(2.0, power);
}

// Function to find the (gamma, C) parameter pair which provides the highest
// cross-validation accuracy.  A simple 2D grid search is performed.
void generic_svm::find_best_parameters(const map<double, double> & gamma_points,
									   const map<double, double> & C_points,
									   double & gamma_power, double & C_power)
{
	// grid indices in each dimension
	map<double, double>::const_iterator gamma_iter, C_iter;
	// 
	double curr_validation_score, best_validation_score = -1;

	// for each gamma grid point
	for(gamma_iter = gamma_points.begin(); gamma_iter != gamma_points.end(); gamma_iter++)
	{
		// and each C grid point
		for(C_iter = C_points.begin(); C_iter != C_points.end(); C_iter++)
		{
			// calculate the cross-validation success of this parameter pair
			curr_validation_score = cross_validate(gamma_iter->second, C_iter->second);
												   
//			cout << "..pairing (" << gamma_iter->second << ", " << C_iter->second
//				 << ") has cross validation accuracy: " << curr_validation_score << endl;
				 
			// if the result beats previous results
			if(curr_validation_score > best_validation_score)
			{
				// update the best observed cross-validation result
				best_validation_score = curr_validation_score;
				// store the parameter values at this grid point
				gamma_power = gamma_iter->first;
				C_power = C_iter->first;
				
//				cout << "-) " << best_validation_score << " is the best validation score found "
//					 << "thus far with parameter values (" << gamma_iter->second << ", "
//					 << C_iter->second << ")" << endl;
			}
		}
	}
//	cout << "@@@@ best cross-validation score at current zoom is: " << best_validation_score << endl;
}

// Function returns the cross validation success of this classifier using
// the specified RBF parameter values.  The cross validation success is 
// expressed as a percentage of correctly predicted samples.
// Warning:- the problem struct `prob' *must* be setup prior to calling 
// this function.
double generic_svm::cross_validate(double gamma, double C)
{
	int i;
	// total correct predictions and cross validation success
	double total_correct = 0, result = 0.0;
	// vector of prediction results
	double *target = new double[prob.l];
	
	struct svm_parameter temp_param	= param;
	temp_param.gamma = gamma;
	temp_param.C = C;
	temp_param.probability = 0;
	
	// set the parameter values
//	param.gamma = gamma;
//	param.C = C;
	// perform n-fold cross validation 
	svm_cross_validation(&prob, &temp_param, N_FOLD, target);
	
	// for each prediction made
	for(i = 0; i < prob.l; i++)
	{
		// sum all the correction predictions
		if(target[i] == prob.y[i])
			total_correct++;
	}
	
	// calculate the cross validation accuracy as a percentage
	result = 100.0 * total_correct / prob.l;

	// clean up memory and return cross validation accuracy
	delete[] target;
	return result;
}

// Function to update the kernel parameter grid.  Effectively, a "zoom"
// is performed around each parameter centre.
void generic_svm::update_grid(map<double, double> & gamma_points,
							  map<double, double> & C_points,
							  const double & gamma_power, 
							  const double & C_power)
{
	// find the new powers of two for the gamma range
	vector<double> new_powers = find_new_powers(gamma_points, gamma_power);
	// generate new test values for gamma
	generate_new_points(gamma_points, new_powers);
	// find the new powers of two for the C range
	new_powers = find_new_powers(C_points, C_power);
	// generate new test values for C
	generate_new_points(C_points, new_powers);	
}

// Function to zoom in on an area of the grid and generate new parameter
// values around a central point.
vector<double> generic_svm::find_new_powers(const map<double, double> & points, 
											const double & central_power)
{
	// store the number of grid points in this dimension
	int i, divisions = points.size();
	// the resultant powers of two
	vector<double> result;

	// if the central power happens to be the smallest power
	if(points.find(central_power) == points.begin())
	{
		// extract the first and second power values
		double curr_power = points.begin()->first;
		double next_power = (++points.begin())->first;
		// calculate the range and new step size
		double power_range = (next_power - curr_power) / 2.0;
		double power_step = power_range / divisions;
		// create a new vector of powers
		for(i = 0; i < divisions; i++)
		{
			result.push_back(curr_power);
			curr_power += power_step;
		}
	}
	// if the central power happens to be the largest power
	else if(points.find(central_power) == --points.end())
	{
		// get an iterator pointing to the last power
		map<double, double>::const_iterator iter = --points.end();
		// get the larger of the two adjacent powers
		double next_power = iter->first;
		// get the smaller of the two adjacent powers
		double curr_power = (--iter)->first;
		// calculate the range and new step size
		double power_range = (next_power - curr_power) / 2.0;
		double power_step = power_range / divisions;
		// create a vector of new powers
		for(i = 0, curr_power += power_range + power_step; i < divisions; i++)
		{
			result.push_back(curr_power);
			curr_power += power_step;
		}
	}
	// the central power is somewhere between the lowest and highest powers
	else
	{
		// get an iterator pointing to the central power
		map<double, double>::const_iterator iter = points.find(central_power);
		// calculate the range and step size
		double curr_power = iter->first;
		double next_power = (++iter)->first;
		double power_step = (next_power - curr_power) / divisions;
		// set the new power starting point
		curr_power -= (next_power - curr_power) / 2.0;
		// create a vector of new powers
		for(i = 0; i < divisions + 1; i++)
		{
			result.push_back(curr_power);
			curr_power += power_step;
		}
	}
		
	return result;
}

// Function to generate new parameter values given a vector of powers.
void generic_svm::generate_new_points(map<double, double> & points, 
									  const vector<double> & new_powers)
{
	int i;
	// clear the old parameter values before replacement
	points.clear();
	// for each power of two, calculate and store a new value
	for(i = 0; i < new_powers.size(); i++)
		points[new_powers[i]] = pow(2.0, new_powers[i]);
}

// Function to save a trained SVM to file.
bool generic_svm::save(const char *output_filename) const
{
	// only save in the model has been trained or
	// previously loaded
	if(model_setup)
	{
		svm_save_model(output_filename, model);
		return true;
	}
	else
	{
		cout << "Error:- attempted to save a model that has not been "
			 << "trained or previously loaded..." << endl;
		
		return false; 
	}
}

// Function to load a saved SVM model from file.
bool generic_svm::load(const char *input_filename)
{
	bool result = true;	// assume operation succedes
	
	// if a model has already been trained or loaded, warn that it
	// will be overwritten
	if(model_setup)
	{
		cout << "Warning:- loading new model over an existing model." << endl;
		svm_destroy_model(model);
	}
	
	// load the model and check for failure
	if((model = svm_load_model(input_filename)) == 0)
		result = false;
	
	// in the model was successly loaded, flag as such
	if(result)
		model_setup = true;
		
	return result;
}

/////////////////

// Default constructor sets up an SVM classifier for nonprobabilistic classification.
nonprobabilistic_svm::nonprobabilistic_svm(void)
{
	param.probability = 0;
}

// Default constructor sets up an SVM classifier for nonprobabilistic classification
// and passes the dataset parameter onto the base SVM class.
nonprobabilistic_svm::nonprobabilistic_svm(const svm_dataset & the_dataset)
					 : generic_svm(the_dataset)
{
	param.probability = 0;
}

// Function to classify a single observation represented as a vector of features.
double nonprobabilistic_svm::classify_sample(const vector<double> & features) const
{	
	int i;
	// classification result
	double result = 0;
	struct svm_node *x = NULL;
	// check that a model has been trained or loaded before attempting
	// to make predictions
	if(!model_setup)
	{
		cout << "Error:- attemping to classify a sample using a model that has not"
			 << "been trained or loaded, returning nonsense prediction..." << endl;
		return 0;	
	}
	
	// allocate memory for an svmlib representation of the sample 
	// (was realloc() in original svmlib classifier code)
	x = (struct svm_node *)malloc((features.size() + 1) * sizeof(struct svm_node));

	// check whether memory allocation failed
	if(x == NULL)
	{
		cout << "Error:- memory allocation failed in classify_sample(), "
			 << "returning nonsense prediction..." << endl;
		return result;
	}
	
	// for each feature in the feature vector
	for(i = 0; i < features.size(); i++)
	{	
		// set the feature index and its value
		x[i].index = i + 1;
		x[i].value = features[i];
	}
	// store end-of-feature marker
	x[i].index = -1;
	
	// do the classification and store the result
	result = svm_predict(model, x);
	// clean up sample's memory
	free(x);
	
	return result;
}

/////////////////

// Default constructor sets up an SVM classifier for nonprobabilistic classification.
probabilistic_svm::probabilistic_svm(void)
{
	param.probability = 1;
}

// Default constructor sets up an SVM classifier for nonprobabilistic classification
// and passes the dataset parameter onto the base SVM class.
probabilistic_svm::probabilistic_svm(const svm_dataset & the_dataset)
				  : generic_svm(the_dataset)
{
	param.probability = 1;
}

// Function to classify a single observation represented as a vector of features.
double probabilistic_svm::classify_sample(const vector<double> & features,
										  double & prediction_probability) const
{	
	int i, num_classes;
	int *labels = NULL;
	double *prob_estimates = NULL;
	struct svm_node *x = NULL;	
	// classification result (RETHINK THIS TYPE AFTER TESTING IS OK'D)
	double result = 0;
	map<int, double> probability_map;
	
	// check that a model has been trained or loaded before attempting
	// to make predictions
	if(!model_setup)
	{
		cout << "Error:- attemping to classify a sample using a model that has not"
			 << "been trained or loaded, returning nonsense prediction..." << endl;
		return 0;	
	}
	// check that the model was trained for probabilistic predictions
	else if(svm_check_probability_model(model) == 0)
	{
		cout << "Error:- attempted probabilistic classification using a model "
			 << "which does not support probability estimates, returning "
			 << "nonsense prediction..." << endl;
		return 0;
	}

	// allocate memory for an svmlib representation of the sample 
	x = (struct svm_node *)malloc((features.size() + 1) * sizeof(struct svm_node));
	// get the number of target classes (usually 2)
	num_classes = svm_get_nr_class(model);
	// allocate memory for the target class labels and probability estimates
	labels = (int *)malloc(num_classes * sizeof(int));
	prob_estimates = (double *)malloc(num_classes * sizeof(double));

	// check whether memory allocation failed
	if(x == NULL || labels == NULL || prob_estimates == NULL)
	{
		cout << "Error:- memory allocation failed in classify_sample(), "
			 << "returning nonsense prediction..." << endl;
		return 0;
	}
	// get the target class labels from the model
	svm_get_labels(model, labels);
	// for each feature in the feature vector
	for(i = 0; i < features.size(); i++)
	{	
		// set the feature index and its value
		x[i].index = i + 1;
		x[i].value = features[i];
	}
	// store end-of-feature marker
	x[i].index = -1;
		
	// do the classification and store the result
	result = svm_predict_probability(model, x, prob_estimates);
	// map probability estimates to target class labels
	for(i = 0; i < num_classes; i++)
		probability_map[labels[i]] = prob_estimates[i];
	// get the prediction probability
	prediction_probability = probability_map[static_cast<int>(result)];
	
	// clean up sample and probability estimate memory
	free(x); free(labels); free(prob_estimates);
	
	return result;
}

////////////////////////

// Base class constructor flags that the model has not been trained or
// loaded.  This function also sets a default scaling range.
multiclass_classifier::multiclass_classifier(void)
{
	// no model has yet been trained or loaded from file
	model_setup = false;
	// set a default feature scaling range
	scale_range_low = -1;
	scale_range_high = 1;
	// default is a single feature
	num_features = 1;
}

// Function to get a subset of the training dataset.  The subset will contain two classes
// on return - one containing all samples from the targets given by `first_set' and
// another containing all samples from the targets given by `second_set'.
svm_dataset multiclass_classifier::get_data_subset(const svm_dataset & whole_dataset,
												   const set<int> & first_set,
												   const set<int> & second_set) const
{
	// the current agglomeration of samples
	vector<vector<double> > curr_class;
	// the resultant subset
	svm_dataset filtered_dataset(whole_dataset.get_num_features());
	// need to iterate over const sets
	set<int>::const_iterator iter;
	
	// for each target in the first set
	for(iter = first_set.begin(); iter != first_set.end(); iter++)
	{
		// extract samples belonging to this class
		curr_class = whole_dataset.get_samples(*iter);
		// add these samples as belonging to class "-1"
		filtered_dataset.add_samples(-1, curr_class);
	}

	// for each target in the second set
	for(iter = second_set.begin(); iter != second_set.end(); iter++)
	{
		// extract samples belonging to this class
		curr_class = whole_dataset.get_samples(*iter);
		// add these samples as belonging to class "+1"
		filtered_dataset.add_samples(+1, curr_class);
	}

	return filtered_dataset;
}

// Function to scale the features of a single sample prior to classification.
vector<double> multiclass_classifier::scale_sample(const vector<double> & unscaled_features) const
{
	int i;
	// initially make a copy of the unscaled features to scale
	vector<double> scaled_features = unscaled_features;
	// only attempt to scale a sample if the classifier has been trained or a previously
	// trained classifier has been loaded from file
	if(model_setup)
	{
		// scale each feature of the current sample
		for(i = 0; i < unscaled_features.size(); i++)
			scale_single_feature(scaled_features[i], lower_limits[i], upper_limits[i]);
		
		cout << "Scaled features..." << endl;
		cout << setprecision(6) << fixed;
		int feature_num = 1;
		for(i = 0; i < scaled_features.size(); i++, feature_num++)
		{
			cout << noshowpos << feature_num << ": ";
			cout << showpos << scaled_features[i] << "\t";
			
			if(feature_num % 5 == 0)
				cout << endl;
		}
		cout << endl << endl;
		cout.unsetf(ios::fixed | ios::scientific);
		cout << noshowpos;
	}
	
	return scaled_features;
}

// Function to create a tar archive given an absolute path to a directory
// containing some model files.
bool multiclass_classifier::create_archive(string archive_filename, string input_path)
{
	TAR *t = NULL;
	// warning - this conversion loses const qualifiers
	char *broken_archive_filename = const_cast<char*>(archive_filename.c_str());
	char *broken_input_path = const_cast<char*>(input_path.c_str());
	// used to squash a warning that tar_append_tree does not take const char*
	// as its last parameter
	char broken_empty_string[] = "";

	// create a tar structure
	if(tar_open(&t, broken_archive_filename, NULL, O_WRONLY | O_CREAT, 0644, 0) == -1)
		return false;

	// add files to the tar 
	if(tar_append_tree(t, broken_input_path, broken_empty_string) != 0)
	{
		// if failed, close the tar and return failure
		tar_close(t);
		return false;
	}

	// add EOF to the tar file
	if(tar_append_eof(t) != 0)
	{
		// check for failure again
		tar_close(t);
		return false;
	}

	// close the tar file which should now have some contents
	if (tar_close(t) != 0)
		return false;
		
	return true;
}

// Function to extract a tar archive given an asbolute path to an archive filename and 
// and an absolute output path.
bool multiclass_classifier::extract_archive(string archive_filename, string output_path)
{
	TAR *t = NULL;
	// warning - this conversion loses const qualifiers
	char *broken_archive_filename = const_cast<char*>( archive_filename.c_str() );
	char *broken_output_path = const_cast<char*>( output_path.c_str() );

	// attempt to open the tar file
	if(tar_open(&t, broken_archive_filename, NULL, O_RDONLY, 0, 0) == -1)
		return false;

	// extract the contents of the tar file to the output path
	if(tar_extract_all(t, broken_output_path) != 0)
	{
		// in the operation failed, close the tar file
		tar_close(t);
		return false;
	}
	
	// close the tar file having extract its contents
	if(tar_close(t) != 0)
		return false;
		
	return true;
}

// Function to setup probability distributions in the decision tree leaves.
// The probability distribution is based on frequency counts obtained by classifying all
// training data using the newly-trained classifier. 
void multiclass_classifier::setup_leaf_probabilities(const svm_dataset & the_dataset)
{
	int i, j;
	// cache storage for total observation counts of each target class leaf
	map<int, int> total_counts;
	// for each target class
	for(i = 0; i < targets.size(); i++)
	{
		// setup the set of frequency counts for each possible target class
		// at this leaf
		for(j = 0; j < targets.size(); j++)
			leaf_probabilities[targets[i]][targets[j]] = 0.0;
		// initialise the total observation count for this leaf
		total_counts[targets[i]] = 0;
	}
		
	// for each target class in the training set
	for(i = 0; i < targets.size(); i++)
	{
		// extract the training samples belonging to this class
		vector<vector<double> > curr_samples = the_dataset.get_samples(targets[i]);
		// for each sample from this class
		for(j = 0; j < curr_samples.size(); j++)
		{
			// classify the sample
			double curr_prediction = classify_sample(curr_samples[j],
													 nnsl_classifier::scaled);
			// record the result by incrementing the appropriate frequency count
			leaf_probabilities[targets[i]][static_cast<int>(curr_prediction)] += 1;
			// update the total observation count for the predicted class
			total_counts[static_cast<int>(curr_prediction)]++;
		}
	}
	
	// for each leaf in the decision tree
	for(i = 0; i < targets.size(); i++)
	{
		const int & curr_total = total_counts[targets[i]];
		map<int, double> & curr_probabilities = leaf_probabilities[targets[i]];
		// for each state of the probability distribution associated with this leaf 
		for(j = 0; j < targets.size(); j++)
		{
			// calculate the probability using the MML87 multinomial estimator
			const double & freq_count = curr_probabilities[targets[j]];
#if USE_MML_PROBABILITY
			curr_probabilities[targets[j]] = (freq_count + 0.5) / 
											 (curr_total + 
											  static_cast<double>(targets.size()) / 2.0);
#else
			curr_probabilities[targets[j]] = freq_count / curr_total;
#endif
		}
	}
}

// Function to write the leaf distribution details to file.
void multiclass_classifier::write_leaf_dists(ofstream & output_file) const
{
	// iterate over each leaf
	map<int, map<int, double> >::const_iterator leaves_iter;
	// and iterate over each target class of each leaf
	map<int, double>::const_iterator leaf_iter, temp_iter;	

	// for each leaf
	for(leaves_iter = leaf_probabilities.begin(); leaves_iter != leaf_probabilities.end();
		leaves_iter++)
	{
		// for each possible class at this leaf, print a probability
		for(leaf_iter = leaves_iter->second.begin(); leaf_iter != leaves_iter->second.end();
			leaf_iter++)
		{
			// output the probability
			output_file << leaf_iter->second;
			// dont write a tab after the last entry of the line
			temp_iter = leaf_iter;
			temp_iter++;			
			if(temp_iter != leaves_iter->second.end())
				output_file << "\t";	
		}
		// write a newline before moving to the next leaf
		output_file << endl;
	}
}

// Function to read the leaf distribution details from file.
void multiclass_classifier::read_leaf_dists(ifstream & input_file)
{
	int i, j;
	// for each leaf
	for(i = 0; i < targets.size(); i++)
	{
		// and each target class in that leaf
		for(j = 0; j < targets.size(); j++)
		{
			// input the probability of a sample being from targets[j] and 
			// arriving at the leaf for targets[i]
			double curr_probability;
			input_file >> curr_probability;
			// enter the probability into the probability map
			leaf_probabilities[targets[i]][targets[j]] = curr_probability;
		}
	}
}

// Function to output leaf probability distributions.
void multiclass_classifier::print_leaf_dists(void) const
{
	map<int, map<int, double> >::const_iterator leaves_iter;
	map<int, double>::const_iterator leaf_iter;

	cout << "Leaf probability distributions:" << endl
		 << "-------------------------------" << endl << endl;
		 
	// print the table header consisting of class labels
	cout << "\t";
	for(leaves_iter = leaf_probabilities.begin(); leaves_iter != leaf_probabilities.end();
		leaves_iter++)
		cout << leaves_iter->first << "\t";
	cout << endl;
		
	// for each leaf
	for(leaves_iter = leaf_probabilities.begin(); leaves_iter != leaf_probabilities.end();
		leaves_iter++)
	{
		// reset the output stream before printing the class label
		cout.unsetf(ios::fixed | ios::scientific);
		cout << noshowpos;	
		// output the leaf label
		cout << leaves_iter->first << "\t";
		
		// setup the output stream for pretty formatting of table contents
		cout << setprecision(5) << fixed;
	
		// for each possible class at this leaf, print a probability
		for(leaf_iter = leaves_iter->second.begin(); leaf_iter != leaves_iter->second.end();
			leaf_iter++)
			cout << leaf_iter->second << "\t";
		cout << endl;
	}
	cout << endl;
	// reset the output stream
	cout.unsetf(ios::fixed | ios::scientific);
	cout << noshowpos;	
}

// Function to classify a single sample.  This function assumes that
// the classifier has been trained or a model has been loaded.
double multiclass_classifier::classify_with_leaf(const vector<double> & features,
												 const sample_type & the_sample_type,
												 map<int, double> & leaf_distribution)
{
	// do the prediction
	double prediction = classify_sample(features, the_sample_type);
	// set the appropriate leaf distribution
	leaf_distribution = leaf_probabilities[static_cast<int>(prediction)];
	
	return prediction;
}

////////////////////////

// Default DAGSVM constructor is empty for the time being.
dag_svm::dag_svm(void)
{
	// do nothing for now :-)
}

// Constructor frees up the dynamically added DAG nodes
// (i.e. generic SVMs).
dag_svm::~dag_svm(void)
{
	clear_dag_nodes();
}

// Function to frees memory used by each node in the DAGSVM.
void dag_svm::clear_dag_nodes(void)
{
	map<vector<int>, nonprobabilistic_svm*>::iterator map_iter;
	
	// free memory of each DAG node
	for(map_iter = dag_nodes.begin(); map_iter != dag_nodes.end(); map_iter++)
		delete map_iter->second;
}

// Function to train the DAGSVM given a dataset parameter.
bool dag_svm::train(const svm_dataset & the_dataset)
{
	int i, iteration = 1;
	// check that we are not attempting to train an already-trained model
	if(model_setup)
	{
		cout << "Warning:- attempted to train a DAGSVM model that has already "
			 << "been setup, ignoring training operation...";
		
		return false;
	}
	
	// extract a vector of all unique target labels
	targets = the_dataset.get_targets();
	// setup the DAG node labels
	setup_node_labels();
	// store the number of features per sample in the dataset
	num_features = the_dataset.get_num_features();
	// extract and store the scaling range used on the training dataset
	the_dataset.get_scale_range(scale_range_low, scale_range_high);
	// extract and store the observed feature limits of training dataset
	the_dataset.get_feature_limits(lower_limits, upper_limits);
	
	// for each DAG node
	for(i = 0; i < node_labels.size(); i++)
	{
		cerr << "*-*-* Training node " << iteration << " of " 
			 << node_labels.size() << ", "
			 << "1 class size vs. 1 class size." << endl;
				 
		// get a subset of the complete dataset
		set<int> subset_class1, subset_class2;
		subset_class1.insert(node_labels[i].front());
		subset_class2.insert(node_labels[i].back());
		svm_dataset filtered_dataset = get_data_subset(the_dataset, subset_class1, subset_class2);
		// create a new dag node
		nonprobabilistic_svm *curr_node_ptr = new nonprobabilistic_svm(filtered_dataset);
		// check that the object was created successfully
		if(curr_node_ptr == NULL)
		{
			cout << "Error:- could not allocate memory for a new SVM during training..."
				 << endl;
			return false;
		}
		
		// train the node
		curr_node_ptr->train();
		// store the node in our map
		dag_nodes[node_labels[i]] = curr_node_ptr;
		iteration++;
	}
	
	// the model has been successfully setup and can be used to classify
	model_setup = true;	
	// setup probability distributions in the tree leaves
	setup_leaf_probabilities(the_dataset);
	return true;
}

// Function to classify a single sample.  This function assumes that
// the DAGSVM has been trained or a model has been loaded.
double dag_svm::classify_sample(const vector<double> & features,
								const sample_type & the_sample_type)
{
	// a vector of all target labels
	vector<int> target_list;
	vector<double> feature_set;
	// classification result (RETHINK DOUBLE TYPE ONCE TESTING IS DONE AND OK'D)
	double predicted_class = 0;

	// can only use the DAGSVM if it has been setup (i.e. trained or loaded)
	if(!model_setup)
	{
		cout << "Error:- attempted to classify data using a DAGSVM that was "
			 << "not setup, return nonsense prediction..." << endl;
		return predicted_class;
	}

	// create a scaled version of the feature vector if necessary
	if(the_sample_type == unscaled)
		feature_set = scale_sample(features);
	else
		feature_set = features;
	
	// copy the vector of targets for list-style manipulation
	target_list = targets;
	assert(target_list.size() > 1);
	
	// while a result has not been reached
	while(target_list.size() > 1)
	{
		// create the label pair to identify the DAG node being tested
		vector<int> class_pair;
		class_pair.push_back(target_list.front());
		class_pair.push_back(target_list.back());
		// classify the sample using the appropriate DAG node
		predicted_class = dag_nodes[class_pair]->classify_sample(feature_set);
		
		// delete the label at the end of the list if the sample is classified
		// as matching the label at the start of the list
		if(predicted_class == -1)
			target_list.erase(target_list.end() - 1);
		// delete the label at the start of the list if the sample is classified
		// as matching the label at the end of the list
		else if(predicted_class == +1)
			target_list.erase(target_list.begin());
	}
	
	// only element remaining is the predicted class at this stage
	return static_cast<double>(target_list.front());
}
						   
// Function to setup the pairs of class labels used to identify each
// DAG node (i.e. each SVM).  This process uses the sorted vector of target
// labels that is setup during training or loading from file.
void dag_svm::setup_node_labels(void)
{
	int i, j;
	
	// for each class label
	for(i = 0; i < targets.size(); i++)
	{
		// and each following class label
		for(j = i + 1; j < targets.size(); j++)
		{
			// create a label pair
			vector<int> curr_label;
			curr_label.push_back(targets[i]);
			curr_label.push_back(targets[j]);
			// store the label pair
			node_labels.push_back(curr_label);
			
			// initialise the SVM pointer to null so that the
			// destructor will work correctly
			dag_nodes[curr_label] = NULL;			
		}
	}
}

// Function to save the DAGSVM model to file.
bool dag_svm::save(const string & archive_filename)
{
	int i, node_number;
	// buffer used to create node filenames
	char converted_label[MAXLINE] = "";
	// storage for absolute model filenames
	list<string> abs_node_filenames;
	list<string>::const_iterator iter;
	// directory containing all DAGSVM model files
	string contents_path   = archive_filename + "_contents/";
	// absolute filename of the master file
	string master_filename = contents_path + "master.dat";
		
	// can only save a trained model, so sanity check
	if(!model_setup)
	{
		cout << "Error:- attempted to save a DAGSVM model that was not "
			 << "trained, ignoring operation..." << endl;
			 
		return false;
	}
	// create a temporary directory to store the model files before tarring
	mkdir(contents_path.c_str(), (mode_t)0755);
	// open the output filestream 
	ofstream master_file(master_filename.c_str());
	// check whether the file was opened successfully
	if(!master_file.is_open())
	{
		cout << "Error:- could not open training file for writing..."
			 << endl;
			 
		return false;
	}
	// write header information to the master file
	write_feature_details(master_file);
	// write information about the leaf distributions
	write_leaf_dists(master_file);
	// for each DAG node (i.e. each SVM)
	for(i = 0, node_number = 0; i < node_labels.size(); i++, node_number++)
	{
		// get this node's identifying labels and form a model filename
		const vector<int> & label_pair = node_labels[i];
		// create the unique node filename
		snprintf(converted_label, MAXLINE, "%d", node_number);		
		// convert the C-style strings to C++ string objects for concatenation
		string node_label(converted_label);
		// form the complete relative filename
		string node_filename = "node_" + node_label + ".svm";
		// form and store the absolute node filename
		string abs_node_filename = contents_path + node_filename;
		abs_node_filenames.push_back(abs_node_filename);
		// write the labels and relative SVM model filename
		master_file << label_pair.front() << " " << label_pair.back() << " "
					<< node_filename;
		
		// dont write a newline after the last node's record
		if(i != node_labels.size() - 1)
			master_file << endl;			
		// save the model
		if(!dag_nodes[node_labels[i]]->save(abs_node_filename.c_str()))
			return false;
	}
	
	// close the master file before tarring (and later, deletion)
	master_file.close();
	// create the tar archive of the contents directory
	if (!create_archive(archive_filename, contents_path))
		return false;
	
	// remove all temporary files
	for(iter = abs_node_filenames.begin(); iter != abs_node_filenames.end(); iter++)
	{
		if (unlink((*iter).c_str()) != 0)
			return false;
	}
	// delete the master file and then delete the contents directory
	if(unlink(master_filename.c_str()) != 0 || rmdir(contents_path.c_str()) != 0)
		return false;
	
	return true;
}

// Function to load the DAGSVM model from file.
bool dag_svm::load(const string & archive_filename)
{
	// absolute path to the model files
	string contents_path   = archive_filename + "_contents/";
	// absolute path to the master file
	string master_filename = contents_path + "master.dat";
	// storage for absolute filenames in the contents directory
	list<string> abs_node_filenames;
	list<string>::const_iterator iter;

	// check that we dont attempted to load over an already setup model
	if(model_setup)
	{
		cout << "Warning:- attempted to load a DAG-SVM model over an already "
			 << "trained model, ignoring operation..." << endl;
		
		return false;
	}
	// extract the tar archive
	if (!extract_archive(archive_filename, contents_path))
		return false;
		
	// open the input filestream
	ifstream master_file(master_filename.c_str());
	// check that the file opened successfully
	if(!master_file.is_open())
	{
		cout << "Error:- could not open training file for reading..."
			 << endl;
			 
		return false;
	}
	// read the header information from file
	read_feature_details(master_file);
	// read information about the leaf distributions
	read_leaf_dists(master_file);
	// while there are still more DAG nodes to create
	while(!master_file.eof())
	{
		// the identifying pair of labels
		int label1, label2;
		vector<int> label_pair;
		// saved SVM model's filename
		string node_filename;
	
		// input the labels and the relative node filename
		master_file >> label1 >> label2 >> node_filename;
		// form and store the absolute node filename
		string abs_node_filename = contents_path + node_filename;
		abs_node_filenames.push_back(abs_node_filename);
		
		// create a label pair
		label_pair.push_back(label1);
		label_pair.push_back(label2);
		// store the pair of labels to identify this node
		node_labels.push_back(label_pair);
		
		// create a new SVM and load it from the model file
		nonprobabilistic_svm *new_svm_ptr = new nonprobabilistic_svm;
		// check that the object was created
		if(new_svm_ptr == NULL)
		{
			cout << "Error:- could not allocate memory for a new SVM during "
				 << "load operation..." << endl;
			return false;
		}
		
		// load the SVM node from file
		if (!new_svm_ptr->load(abs_node_filename.c_str()))
			return false;
			
		// add the new node to DAG
		dag_nodes[label_pair] = new_svm_ptr;
		// clear the pair of class labels before the next node is created
		label_pair.clear();
	}
	
	// close master file before deletion
	master_file.close();
	
	// remove all temorary node files
	for (iter = abs_node_filenames.begin(); iter != abs_node_filenames.end(); iter++)
	{
		if(unlink((*iter).c_str()) != 0)
			return false;
	}
	// delete the master file and then the contents directory
	if(unlink(master_filename.c_str()) != 0 || rmdir(contents_path.c_str()) != 0)
		return false;

	// DAGSVM model is now setup and ready to classify
	model_setup = true;
	return true;
}

// Function to clear the DAGSVM so that it is ready for re-use.
void dag_svm::clear(void)
{
	model_setup = false;
	num_features = 1;
	
	// reset the feature scaling parameters
	scale_range_low = -1;
	scale_range_high =  1;
	lower_limits.clear();
	upper_limits.clear();
	
	// free memory used by nodes (generic SVMs)
	clear_dag_nodes();
	// clear pairs of node labels
	node_labels.clear();
	// clear the vector of target labels (i.e. classes)
	targets.clear();
}

// Function to print details about the DAGSVM.
void dag_svm::print(void)
{
	int i;
	
	if(!model_setup)
	{
		cout << "Error:- attempted to print the details of a DAG SVM "
			 << "which had not been trained or loaded, ignoring operation..."
			 << endl;
		return;
	}
	
	cout << "DAG-SVM classifier for data from " << targets.size() << " classes "
		 << "with " << node_labels.size() << " graph nodes, having labels:"
		 << endl << endl;
	
	// for each DAG node
	for(i = 0; i < node_labels.size(); i++)
	{
		// extract the current node
		const vector<int> & curr_pair = node_labels[i];
		cout << curr_pair.front() << " vs. " << curr_pair.back();
		// output ten pairs to each row
		if( (i + 1) % 10 == 0)
			cout << endl;
		else
			cout << ",\t";
	}
	
	cout << endl;
	// output leaf distribution details
	print_leaf_dists();
}

///////////////////////

// Default constructor simply clears all data members prior to use.
mcst_edge::mcst_edge(void)
{
	clear();
}

// Constructor initialises the edge vertices.
mcst_edge::mcst_edge(int first, int second)
{
	set_vertices(first, second);
	set_weight(0.0);
}

// Constructor initialises the edge vertices and weight.
mcst_edge::mcst_edge(int first, int second, long double the_weight)
{
	set_vertices(first, second);
	set_weight(the_weight);
}

// Function to set the end-points of the edge.
void mcst_edge::set_vertices(int first, int second)
{
	first_vertex = first;
	second_vertex = second;
}

// Function returns the end-points of the edge.
void mcst_edge::get_vertices(int & first, int & second) const
{
	first = first_vertex;
	second = second_vertex;
}

// Function to set the weight (i.e. length) of an edge.
void mcst_edge::set_weight(long double the_weight)
{
	// sanity check - weight cannot be < 0
	if(the_weight >= 0.0)
		weight = the_weight;
	else
	{
		cout << "Error:- attempted to set MCST edge weight < 0, "
			 << "ignoring operation..." << endl;
	}
}

// Function returns the edge weight.
long double mcst_edge::get_weight(void) const
{
	return weight;
}

// Function to reset the data members of the edge.
void mcst_edge::clear(void)
{
	first_vertex = 0;
	second_vertex = 0;
	weight = 0.0;
}

// Function to print the details of an edge.
void mcst_edge::print(void) const
{
	cout << first_vertex << "\t<->\t" << second_vertex << " :\t"
		 << weight << endl;
}

////////////////////////////

// Default constructor sets the node to have no children and no
// SVM classifier initially.
classifier_node::classifier_node(void)
{
	// no children of this node
	left_child = NULL;
	right_child = NULL;
	// no associated SVM classifier
	classifier = NULL;
}

// Destructor frees up the generic SVM.
classifier_node::~classifier_node(void)
{
	delete classifier;
}

// Function to add a set of classes to which data items may
// belong.
void classifier_node::add_class_set(const set<int> & class_ids)
{
	 class_sets.push_back(class_ids);
}

// Function returns all sets of classes being classified by this node.
vector<set<int> > classifier_node::get_class_sets(void) const
{
	return class_sets;
}

// Function returns the number of class sets discriminated by this
// node.  This value should always be two (2) for an MCST.
int classifier_node::num_class_sets(void) const
{
	return class_sets.size();
}

// Function returns true if there is only one class set of size one
// stored in the node, and returns false otherwise.
bool classifier_node::is_singleton(void) const
{
	bool result = false;
	
	// if only one class set exists
	if(class_sets.size() == 1)
	{
		// and the class set contains only one class id
		if(class_sets.front().size() == 1)
			result = true;
	}
	
	return result;
}

// Function to set the node pointer to the left or right child of this
// node.  The `direction' enum is local to the class classifier_node.
void classifier_node::set_child(direction the_dir, classifier_node *child_ptr)
{
	// set left child
	if(the_dir == left)
		left_child = child_ptr;
	// set right child
	else
		right_child = child_ptr;
}

// Function returns true if the node has a child in the direction
// specified, and returns false otherwise.
bool classifier_node::has_child(direction the_dir) const
{
	// check the pointer to the left child
	if(the_dir == left)
		return left_child != NULL;
	// check the pointer to the right child
	else
		return right_child != NULL;
}

// Function returns a pointer to the left or right child of this
// node depending on the direction specified.
classifier_node* classifier_node::get_child(direction the_dir) const
{
	// return left child pointer
	if(the_dir == left)
		return left_child;
	// return right child pointer
	else
		return right_child;	
}

// Function to train the MCST node for classification.
bool classifier_node::train(const svm_dataset & the_dataset)
{
	// create a new generic SVM
	classifier = new nonprobabilistic_svm(the_dataset);
	// train the SVM
	return classifier->train();
}

// Function returns a `flattened' set containing the contents
// of each class set.
set<int> classifier_node::get_flattened_classes(void) const
{
	// a complete of class labels
	set<int> flattened_classes;
	
	// for each class set...
	// extract the current class set
	const set<int> & first_set = class_sets.front();
	set<int>::const_iterator iter;

	// add each member of the first class set to the flat vector		
	for(iter = first_set.begin(); iter != first_set.end(); iter++)
		flattened_classes.insert(*iter);
	
	const set<int> & second_set = class_sets.back();
	// add each member of the second class set to the flat vector		
	for(iter = second_set.begin(); iter != second_set.end(); iter++)
		flattened_classes.insert(*iter);
			
	return flattened_classes;
}

// Function to write the details of this node to file.
bool classifier_node::write(ofstream & output_file, 
							const string & contents_path,
							list<string> & abs_node_filenames,
							const int & node_label)
{
	string node_filename, abs_node_filename;
	// should have a class set for each left and right direction
	assert(class_sets.size() == 2);
	
	// write the size of each class set
	output_file << class_sets.front().size() << " "
				<< class_sets.back().size() << endl;

	// write the members of the first class set
	const set<int> & first_set = class_sets.front();
	set<int>::const_iterator iter;
	
	for(iter = first_set.begin(); iter != first_set.end(); iter++)
		output_file << *iter << " ";
	output_file << endl;
	
	// write the members of the second class set
	const set<int> & second_set = class_sets.back();
	for(iter = second_set.begin(); iter != second_set.end(); iter++)
		output_file << *iter << " ";		
	output_file << endl;

	// create an absolute model filename and write this filename
	node_filename = form_filename(node_label);
	// form the absolute filenane and store it
	abs_node_filename = contents_path + node_filename;
	abs_node_filenames.push_back(abs_node_filename);

	// write the relative model filename
	output_file << node_filename << endl;
	// save the generic SVM to file
	if(!(classifier->save(abs_node_filename.c_str())))
		return false;

	return true;
}

// Function to form a filename based on the class sets of this node.
string classifier_node::form_filename(const int & node_label)
{
	string result = "node_";
	char temp_str[MAXLINE];

	// convert the integer class label to a string
	snprintf(temp_str, MAXLINE, "%d", node_label);
	string node_identifier(temp_str);
	// set an appropriate extension
	result += node_identifier + ".svm";
	
	return result;
}

// Function to read the details of this node from file.
bool classifier_node::read(ifstream & input_file,
						   const string & contents_path,
						   list<string> & abs_node_filenames)
{
	int i, curr_target, first_size, second_size;
	// class id sets
	set<int> first_set, second_set;
	string node_filename, abs_node_filename;
	
	// read the size of each class set
	input_file >> first_size >> second_size;
	
	// read in the classes of the first set
	for(i = 0; i < first_size; i++)
	{
		input_file >> curr_target;
		first_set.insert(curr_target);
	}
	
	// read in the classes of the second set
	for(i = 0; i < second_size; i++)
	{
		input_file >> curr_target;
		second_set.insert(curr_target);
	}
	
	// store the class id sets
	class_sets.push_back(first_set);
	class_sets.push_back(second_set);
	
	// read in the relative node filename
	input_file >> node_filename;
	// form the absolute node filename and store it
	abs_node_filename = contents_path + node_filename;
	abs_node_filenames.push_back(abs_node_filename);
	
	// create a new generic SVM and load the model from file	
	classifier = new nonprobabilistic_svm;
	if(!(classifier->load(abs_node_filename.c_str())))
		return false;

	return true;
}

// Function to classify a single sample.  The predicted class is stored in the second
// parameter.  The function also returns a pointer to the next node in the MCST if
// the predicted class must be further decomposed.  The variable predicted_class will
// always contain -1 in this case.
classifier_node* classifier_node::classify_sample(const vector<double> & sample, 
												  double & predicted_class)
{
	// pointer to the next node in the tree determined by the class prediction
	classifier_node *child_node = NULL;
	// classify the sample
	double binary_prediction = classifier->classify_sample(sample);
	// assume this classifier can be further decomposed by a child node
	predicted_class = -1;
	
	// if the sample belongs to the first class set
	if(binary_prediction == -1)
	{
		// the next class is in the child to the left
		child_node = left_child;
		// if there is no child in this direction, we have reached a final classification
		if(left_child == NULL)
		{
			const set<int> & predicted_set = class_sets.front();
			set<int>::const_iterator iter = predicted_set.begin();
			predicted_class = static_cast<int>(*iter);
		}
	}
	// if the sample belongs to the second class
	else if(binary_prediction == 1)
	{
		// the next class is in the child to the right
		child_node = right_child;
		// if there is no child in this direction, we have reached a final classification
		if(right_child == NULL)
		{
			const set<int> & predicted_set = class_sets.back();
			set<int>::const_iterator iter = predicted_set.begin();
			predicted_class = static_cast<int>(*iter);
		}
	}

	return child_node;
}

// Function to reset the MCST node for re-use.
void classifier_node::clear(void)
{
	// clear the records of class sets
	class_sets.clear();
	// node has no children
	left_child = NULL;
	right_child = NULL;
	// free up any memory allocated for a generic SVM
	delete classifier;
}

// Function to print the details of the MCST node.
void classifier_node::print(void) const
{
	cout << "Node classifies between the two sets of class IDs:" << endl;
	
	const set<int> & first_set = class_sets.front();
	set<int>::const_iterator iter;
	
	// output the first class set
	for(iter = first_set.begin(); iter != first_set.end(); iter++)
		cout << *iter << " ";
	
	cout << "vs. ";
	
	// output the second class set
	const set<int> & second_set = class_sets.back();
	for(iter = second_set.begin(); iter != second_set.end(); iter++)
		cout << *iter << " ";	
	
	cout << endl << endl;
	
	// check if the node is a leaf
	if(left_child == NULL && right_child == NULL)
	{
		cout << "This is a leaf node." << endl << endl;
	}
	// node is not a leaf, so output child details
	else
	{
		if(left_child != NULL)
			cout << "This node has a left child." << endl;
		if(right_child != NULL)
			cout << "This node has a right child." << endl << endl;
	
		cout << "----" << endl << endl;
	}
}

//////////////////

// Default constructor initialises the MCST to be empty and
// sets the type of distance to centroid.
hierarchical_classifier::hierarchical_classifier(void)
{
	root = NULL;
	representative = centroid;
}

// Default constructor initialises the MCST to be empty and
// sets the type of distance to measure between sample sets.
hierarchical_classifier::hierarchical_classifier(const distance_type &
												 the_representative)
{
	// no classifier nodes in the tree yet
	root = NULL;
	// set the type of class representative
	representative = the_representative;
}

// Constructor frees up the dynamically added MCST nodes
// (i.e. generic SVMs).
hierarchical_classifier::~hierarchical_classifier(void)
{
	free_tree_nodes(root);
}

// Function to frees memory used by each node in the MCST.
void hierarchical_classifier::free_tree_nodes(classifier_node *curr_node)
{
	// if a node has been created
	if(curr_node != NULL)
	{
		// traverse the left subtree if one exists
		if(curr_node->has_child(classifier_node::left))
			free_tree_nodes(curr_node->get_child(classifier_node::left));
		// traverse the right subtree if one exists
		if(curr_node->has_child(classifier_node::right))
			free_tree_nodes(curr_node->get_child(classifier_node::right));
		// clear the current node
		curr_node->clear();
	}
}

// Function to merge two sets of target classes.  The second set is merged
// into the first set and subsequently deleted.
void hierarchical_classifier::merge_sets(vector<set<int> > & sets,
										 int first_index, int second_index)
{
	set<int>::const_iterator iter;
	// extract the second set of target classes
	set<int> second_set = sets[second_index];
	
	// for each member of the second set, add it to the first set
	for(iter = second_set.begin(); iter != second_set.end(); iter++)
		sets[first_index].insert(*iter);

	// delete the second set
	sets.erase(sets.begin() + second_index);
}

// Function returns a pointer to the node which is a child of that idenitified
// by class_set.  The function returns NULL if no child node is found.
classifier_node* hierarchical_classifier::find_child(
									const vector<classifier_node*> & node_ptrs,
									const set<int> & class_set)
{
	int i;
	// assume no child is found and check for a match
	classifier_node *matching_node = NULL;
	
	// for each existing MCST node
	for(i = 0; i < node_ptrs.size(); i++)
	{
		// extract an identifying set of target classes
		set<int> node_classes = node_ptrs[i]->get_flattened_classes();
		// if a match is found, this node is a child
		if(node_classes == class_set)
		{
			// store the result and stop searching since there can only be
			// one child
			matching_node = node_ptrs[i];
			break;
		}
	}
	
	return matching_node;
}

// Function to calculate the Euclidean distance between the samples of two
// classes.
double hierarchical_classifier::class_distance(vector<vector<double> > & first_class,
											   vector<vector<double> > & second_class)
{
	vector<double> first_summary, second_summary;
	
	// calculate the centroid summary for each class
	if(representative == centroid)
	{
		first_summary = calc_centroid(first_class);
		second_summary = calc_centroid(second_class);
	}
	// calculate the median summary for each class
	else
	{
		first_summary = calc_median(first_class);
		second_summary = calc_median(second_class);
	}
	
	// return the Euclidean distance between the two
	// summary vectors
	return euclidean_distance(first_summary, second_summary);
}

// Function to calculate the centroid of a vector of samples.
vector<double> hierarchical_classifier::calc_centroid(
										const vector<vector<double> > & samples)
{
	int i, j;
	vector<double> result;
	
	// sanity check - must have at least one sample to compute a centroid
	if(samples.size() < 1)
	{
		cout << "Error:- attempted to calculate the centroid of an empty "
			 << "class, returning empy centroid vector..." << endl;
		
		return result;
	}
	
	// start with the first sample
	result = samples.front();
	
	// for every remaining sample
	for(i = 1; i < samples.size(); i++)
	{
		// extract the sample
		vector<double> curr_sample = samples[i];
		// add each feature's value to the running sum of feature values
		for(j = 0; j < curr_sample.size(); j++)
			result[j] += curr_sample[j];
	}
	
	// divide each summed feature by the the number of samples to attain
	// the centroid
	for(i = 0; i < result.size(); i++)
		result[i] /= samples.size();
	
	return result;
}

// Function to calculate the median of a vector of samples.
vector<double> hierarchical_classifier::calc_median(
										const vector<vector<double> > & samples)
{
	int i, j;
	// map feature index to feature values
	map<int, vector<double> > feature_values;
	map<int, vector<double> >::iterator iter;
	vector<double> result;
	
	// sanity check - must have at least one sample to compute a centroid
	if(samples.size() < 1)
	{
		cout << "Error:- attempted to calculate the centroid of an empty "
			 << "class, returning empy median vector..." << endl;
		
		return result;
	}
	
	// for every sample
	for(i = 0; i < samples.size(); i++)	
	{
		// extract the current sample
		const vector<double> & curr_sample = samples[i];
		// store the value for every feature of the sample
		for(j = 0; j < curr_sample.size(); j++)
			feature_values[j].push_back(curr_sample[j]);
	}
	
	// for every feature
	for(iter = feature_values.begin(); iter != feature_values.end(); iter++)
	{
		// sort the values of this feature
		sort(iter->second.begin(), iter->second.end());
		// extract the median value from the sorted vector
		double median_value = iter->second[iter->second.size() / 2];
		// store this value in the result vector.  The will be inserted in
		// the correct vector position since a map is sorted by key.
		result.push_back(median_value);
	}

	return result;
}

// Function to calculate the Euclidean distance between two vectors of
// feature centroids.
double hierarchical_classifier::euclidean_distance(
									const vector<double> & first_sample,
									const vector<double> & second_sample)
{
	int i;
	double result = 0.0;
	// samples should always have the same number of features
	assert(first_sample.size() == second_sample.size());
	
	// for each feature
	for(i = 0; i < first_sample.size(); i++)
	{
		// calculate the squared difference of the two sample sets
		result += (first_sample[i] - second_sample[i]) * 
				  (first_sample[i] - second_sample[i]);
	}
	
	// return the final result
	return sqrt(result);
}

// Function to classify a single sample.  This function assumes that
// the MCST SVM classifier has been trained or a model has been loaded.
double hierarchical_classifier::classify_sample(const vector<double> & features,
											const sample_type & the_sample_type)
{	
	vector<double> feature_set;	
	// classification result (RETHINK DOUBLE TYPE ONCE TESTING IS DONE AND OK'D)
	double predicted_class = 0;
	// continue traversing the MCST?
	bool keep_traversing = true;
	classifier_node *curr_node = root, *next_node = NULL;

	// can only use the MCST SVM if it has been setup (i.e. trained or loaded)
	if(!model_setup)
	{
		cout << "Error:- attempted to classify data using a MCST SVM that was "
			 << "not setup, returning nonsense prediction..." << endl;
			 
		return predicted_class;
	}
		
	// create a scaled version of the feature vector if necessary
	if(the_sample_type == unscaled)
		feature_set = scale_sample(features);
	else
		feature_set = features;

	// while there is more of the tree to traverse
	while(keep_traversing)
	{	
		// find the next node to examine by classifying using the current node
		next_node = curr_node->classify_sample(feature_set, predicted_class);
		// if this node can be decomposed by a child node, continue traversing
		if(next_node != NULL)
			curr_node = next_node;
		// else a final class has been predicted, so stop traversal
		else
			keep_traversing = false;
	}

	return predicted_class;
}
										   
// Function to save the MCST SVM classifier model to file.  Nodes are written
// in postfix order and this is a *very* important detail.  Postfix ordering
// means that a node with children will always be recorded after its children.
// The load() function depends on this behaviour when loading nodes sequentially
// and building the structure of the tree.
bool hierarchical_classifier::save(const string & archive_filename)
{
	// can only save a trained model, so sanity check
	if(!model_setup)
	{
		cout << "Error:- attempted to save a MCST SVM model that was not "
			 << "trained, ignoring operation..." << endl;
		return false;
	}
	// storage for the absolute filenames of each tree node
	list<string> abs_node_filenames;
	list<string>::const_iterator iter;
	// storage for the flattend tree structure used to interatively write nodes
	vector<classifier_node*> flattened_tree;
	// path to a directory containing all model files
	string contents_path = archive_filename + "_contents/";
	// form the filename of the master file
	string master_filename = contents_path + "master.dat";
	// create a temporary directory to store the model files
	mkdir(contents_path.c_str(), (mode_t)0755);
	// open the output filestream 
	ofstream master_file(master_filename.c_str());
	// check whether the file was opened successfully
	if(!master_file.is_open())
	{
		cout << "Error:- could not open training file for writing..." << endl;			 
		return false;
	}
	// write the header information regarding feature ranges etc.
	write_feature_details(master_file);
	// write information about the leaf distributions
	write_leaf_dists(master_file);
	// flatten the tree into a vector of node pointers using postfix traversal
	flatten_tree(flattened_tree, root);
	// record each node in the master file and write each svm to a separate file
	for(int i = 0; i < flattened_tree.size(); i++)
		flattened_tree[i]->write(master_file, contents_path, abs_node_filenames, i);

	// close master file before archiving (and later, deletion)
	master_file.close();
	// create a tar file containing all model files in the temporary directory
	if(!create_archive(archive_filename, contents_path))
		return false;
	
	// remove all temporary model files
	for(iter = abs_node_filenames.begin(); iter != abs_node_filenames.end(); iter++)
	{
		if(unlink((*iter).c_str()) != 0)
			return false;
	}
	// delete the master file and then the contents directory
	if(unlink(master_filename.c_str()) != 0 || rmdir(contents_path.c_str()) != 0)
		return false;

	return true;
}

// Function creates a 1D vector of node pointers by traversing the classifier
// tree in a postfix manner.  This is used to write node details to file in an
// iterative manner.
void hierarchical_classifier::flatten_tree(vector<classifier_node*> & node_ptrs,
										   classifier_node* curr_node)
{
	// if a node has been created
	if(curr_node != NULL)
	{
		// traverse the left subtree if one exists
		if(curr_node->has_child(classifier_node::left))
			flatten_tree(node_ptrs, curr_node->get_child(classifier_node::left));

		// traverse the right subtree if one exists
		if(curr_node->has_child(classifier_node::right))
			flatten_tree(node_ptrs, curr_node->get_child(classifier_node::right));
		
		// add a pointer to the current node		
		node_ptrs.push_back(curr_node);
	}
}

// Function to load the MCST SVM model from file.  Nodes are read and tree structure
// is built sequentially and depends on postfix node ordering in the master file
// record.  Postfix traversal means that a node's children, if present, are always 
// loaded before the parent node itself.  This is essential for restoring the tree
// structure in a progressive manner.
bool hierarchical_classifier::load(const string & archive_filename)
{
	int i;
	// re-usable pointer to each new node created
	classifier_node *curr_node;
	// temporary vector of pointers to newly created nodes
	vector<classifier_node*> node_ptrs;
	// form the absolute path of the contents directory
	string contents_path   = archive_filename + "_contents/";
	// for the absolute path of the master file
	string master_filename = contents_path + "master.dat";
	// storage for absolute paths of node files
	list<string> abs_node_filenames;
	list<string>::const_iterator iter;
	
	// check that we dont attempted to load over an already setup model
	if(model_setup)
	{
		cout << "Warning:- attempted to load an MCST-SVM model over an already "
			 << "trained model, ignoring operation..." << endl;
		
		return false;
	}
	// extract the model files from the tar archive
	if(!extract_archive(archive_filename, contents_path))
		return false;
	
	// open the input filestream
	ifstream master_file(master_filename.c_str());
	// check that the file opened successfully
	if(!master_file.is_open())
	{
		cout << "Error:- could not open model masterfile for reading..."
			 << endl;
			 
		return false;
	}
	// read the header information from file
	read_feature_details(master_file);
	// read information about the leaf distributions
	read_leaf_dists(master_file);
	// for each MCST node (an MCST will always contain (k-1) nodes)
	for(i = 0; i < targets.size() - 1; i++)
	{
		// create a new node
		curr_node = new classifier_node;
		// check that the memory could be allocated
		if(curr_node == NULL)
		{
			cout << "Error:- could not allocate memory for a new MCST node during "
				 << "loading..." << endl;
			
			return false;
		}
		// read the node details from file and store the node pointer
		if(!(curr_node->read(master_file, contents_path, abs_node_filenames)))
			return false;
			
		node_ptrs.push_back(curr_node);
	}
	
	// create parent-child links between nodes
	create_tree_linkage(node_ptrs);
	// set the root pointer - this will always be the last node created since the
	// model file is created using depth-first tree traversal
	root = node_ptrs[node_ptrs.size() - 1];

	// close the master file before deletion
	master_file.close();
	
	// remove all temporary model files
	for(iter = abs_node_filenames.begin(); iter != abs_node_filenames.end(); iter++)
	{
		if(unlink((*iter).c_str()) != 0)
			return false;
	}
	// delete the master file and the temporary contents directory
	if(unlink(master_filename.c_str()) != 0 || rmdir(contents_path.c_str()) != 0)
		return false;	
	
	// model is now setup and ready to classify
	model_setup = true;
	return true;
}

// Function to set the parent-child links between the nodes created when
// loading from file.
void hierarchical_classifier::create_tree_linkage(vector<classifier_node*> & node_ptrs)
{
	int i;
	
	// for each node newly created from file
	for(i = 0; i < node_ptrs.size(); i++)
	{
		// extract its identifying pair of class sets
		vector<set<int> > class_sets = node_ptrs[i]->get_class_sets();
		
		// create a temporary vector of node pointers which doesnt contain
		// the node currently being examined (a node cannot be a child of itself)
		vector<classifier_node*> new_node_ptrs = node_ptrs;
		new_node_ptrs.erase(new_node_ptrs.begin() + i);
		
		// attempt to find child nodes - NULL is assigned if no child is found
		node_ptrs[i]->set_child(classifier_node::left, 
								find_child(new_node_ptrs, class_sets.front()));
		node_ptrs[i]->set_child(classifier_node::right, 
								find_child(new_node_ptrs, class_sets.back()));
	}
}

// Function to clear the MCST SVM classifier so that it is ready for re-use.
void hierarchical_classifier::clear(void)
{
	model_setup = false;
	num_features = 1;
	
	// reset the feature scaling parameters
	scale_range_low = -1;
	scale_range_high =  1;
	lower_limits.clear();
	upper_limits.clear();
	
	targets.clear();
	
	// free memory used by nodes (generic SVMs)
	free_tree_nodes(root);
	root = NULL;
}

// Function to print details about the MCST SVM classifier.
void hierarchical_classifier::print(void)
{
	// only print a report if the model has been setup
	if(!model_setup)
	{
		cout << "Error:- attempted to print the details of an MCST SVM "
			 << "which had not been trained or loaded, ignoring operation..."
			 << endl;
		return;
	}
	
	cout << "MCST-SVM classifier for data from " << targets.size() << " classes "
		 << "with " << targets.size() - 1 << " tree nodes, as follows:"
		 << endl << endl;

	// output the scaling factors that this classifier uses
	print_scaling_details();
	// output the details of all tree nodes
	print_tree_nodes(root);
	// output leaf distribution details
	print_leaf_dists();	
}

// Function to print all nodes in a tree, depth first.
void hierarchical_classifier::print_tree_nodes(classifier_node *curr_node)
{
	// if a node has been created
	if(curr_node != NULL)
	{
		// traverse left sub-tree if one exists
		if(curr_node->has_child(classifier_node::left))
			print_tree_nodes(curr_node->get_child(classifier_node::left));
		// traverse right sub-tree if one exists
		if(curr_node->has_child(classifier_node::right))
			print_tree_nodes(curr_node->get_child(classifier_node::right));
	
		// print the details of the current node
		curr_node->print();
	}
}

// Function to output the contents of a single set.
void hierarchical_classifier::print_set(const set<int> & the_set)
{
	set<int>::const_iterator iter;
	// for each member of the set, print using tab separation
	for(iter = the_set.begin(); iter != the_set.end(); iter++)
		cout << *iter << "\t";
	
	cout << endl;
}

// Function to output the contents of more than one set/
void hierarchical_classifier::print_sets(const vector<set<int> > & the_set)
{
	int i;
	// for each set, print its members on a new line
	for(i = 0; i < the_set.size(); i++)
		print_set(the_set[i]);
}

//////////////////

// Default constructor currently does nothing.
nnsl_classifier::nnsl_classifier(void)
{
	// empty for now :-)
}

// Constructor with initializer list to set the distance type in the
// base class.
nnsl_classifier::nnsl_classifier(const distance_type & the_rep)
				: hierarchical_classifier(the_rep)
{
	// empty for now :-)
}

// Function to train the MCST classifier given a dataset parameter.
bool nnsl_classifier::train(const svm_dataset & the_dataset)
{
	int i = 0, j = 0, first_index, second_index, iteration = 1;
	// target class sets
	vector<set<int> > sets;
	// weighted edges representing the fully-connected graph of target classes
	vector<mcst_edge> edges;
	// temporary vector of node pointers used to create parent-child links
	// and set the root node pointer
	vector<classifier_node*> node_ptrs;
	
	// check that we are not attempting to train an already-trained model
	if(model_setup)
	{
		cout << "Warning:- attempted to train a MCST model that has already "
			 << "been setup, ignoring training operation...";
		
		return false;
	}
	
	// store a vector of target classes
	targets = the_dataset.get_targets();
	// store the number of features per sample in the dataset
	num_features = the_dataset.get_num_features();
	// calculate the fully connect graph weights
	edges = calc_graph_edges(the_dataset);
	
//	cout << "--- Edge distances are:" << endl;
//	for(i = 0; i < edges.size(); i++)
//		edges[i].print();
	
	// extract and store the scaling range used on the training dataset
	the_dataset.get_scale_range(scale_range_low, scale_range_high);
	// extract and store the observed feature limits of training dataset
	the_dataset.get_feature_limits(lower_limits, upper_limits);
	
	// for each of the target classes		
	for(i = 0; i < targets.size(); i++)
	{
		// create and store a singleton set
		set<int> curr_target;
		curr_target.insert(targets[i]);
		sets.push_back(curr_target);
	}

//	cout << "Sets are initially:" << endl;
//	print_sets(sets);

	// while there are still sets to join
	while(sets.size() > 1)
	{			 
		// check if the j-th edge connects any two sets and, if so,
		// store the set indices in first_index and second_index
		if(joins_subtrees(edges[j], sets, first_index, second_index))
		{
			cerr << "*-*-* Training node " << iteration << " of " 
				 << targets.size() - 1 << ", "
				 << sets[first_index].size() << " class size vs. "
				 << sets[second_index].size() << " class size" << endl;

			// edge connects two sets, so create a new MCST node
			classifier_node* new_node = new classifier_node;
			// add the two sets that this node will discriminate between
			new_node->add_class_set(sets[first_index]);
			new_node->add_class_set(sets[second_index]);
			// update the parent-child links between existing nodes
			new_node->set_child(classifier_node::left, find_child(node_ptrs, sets[first_index]));
			new_node->set_child(classifier_node::right, find_child(node_ptrs, sets[second_index]));
			// extract an appropriate data subset for the new node
			svm_dataset node_dataset = get_data_subset(the_dataset, 
													   sets[first_index], 
													   sets[second_index]);
			// train the node and store a pointer to this node
			new_node->train(node_dataset);	
			node_ptrs.push_back(new_node);
			// merge the two sets joined by the current edge
			merge_sets(sets, first_index, second_index);
			iteration++;
		}
		// move to the next-shortest edge
		j++;
	}
	// set the root - this will always be the last node created
	root = node_ptrs.back();
	// the model is now successfully setup and can be used to classify
	model_setup = true;	
	// setup probability distributions in the tree leaves
	setup_leaf_probabilities(the_dataset);
	return true;
}

// Function to calculate the weights of a fully connected graph where
// vertices are target classes and weights are the Euclidean distance 
// between target classes.
vector<mcst_edge> nnsl_classifier::calc_graph_edges(const svm_dataset & the_dataset)
{
	int i, j;
	// resultance vector of edges
	vector<mcst_edge> the_edges;
	
	// for each target class
	for(i = 0; i < targets.size(); i++)
	{
		// and every other target class
		for(j = i + 1; j < targets.size(); j++)
		{
			// create a new edge
			mcst_edge new_edge;
			// extract the samples belonging to these classes
			vector<vector<double> > subset1 = the_dataset.get_samples(targets[i]);
			vector<vector<double> > subset2 = the_dataset.get_samples(targets[j]);
			// set the end-points of the edge
			new_edge.set_vertices(targets[i], targets[j]);
			// calculate and set the weight of the edge
			new_edge.set_weight(class_distance(subset1, subset2));

			the_edges.push_back(new_edge);
		}
	}

	// sort the edges by weight before returning the result
	sort(the_edges.begin(), the_edges.end());
	return the_edges;
}

// Function returns true if the edge parameter joins any two sets in the `sets'
// vector.  In this case, the indices of the joined sets are assigned to
// first_index and second_index.  The function returns false otherwise.
bool nnsl_classifier::joins_subtrees(const mcst_edge & the_edge, 
									 const vector<set<int> > & sets,
									 int & first_index, int & second_index)
{
	int i, j;
	// edge end-points
	int first_vertex, second_vertex;
	// assume the edge does not join any two sets and check for a join
	bool result = false;
	// extract the end-points of the edge
	the_edge.get_vertices(first_vertex, second_vertex);
	
	// for each candidate set
	for(i = 0; i < sets.size(); i++)
	{
		// and every other candidate set
		for(j = i + 1; j < sets.size(); j++)
		{
			// extract the pair of sets
			const set<int> & first_set = sets[i];
			const set<int> & second_set = sets[j];
			
			// if the sets are disjoint
			if(sets_disjoint(first_set, second_set))
			{
				// and if the first end-point is present in the first set and the second
				// end-point is present in the second set
				if( (first_set.find(first_vertex) != first_set.end() && 
				     second_set.find(second_vertex) != second_set.end()) ||
					(first_set.find(second_vertex) != first_set.end() && 
					 second_set.find(first_vertex) != second_set.end()) )
				{
					// store the indices
					first_index = i;
					second_index = j;
					// stop the search and return
					result = true;
					break;
				}
			}
		}
		// join found between subtrees, stop searching
		if(result)
			break;
	}
	
	return result;
}

// Function returns true if first_set and second_set are disjoint, and returns 
// false otherwise.
bool nnsl_classifier::sets_disjoint(const set<int> & first_set,
									const set<int> & second_set)
{
	set<int> intersection;
	// find the intersection of the two parameter sets
	set_intersection(first_set.begin(), first_set.end(),
					 second_set.begin(), second_set.end(),
					 inserter(intersection, intersection.begin()) );
	// sets a disjoint if the intersection is empty
	return intersection.empty();
}

///////////////////////

// Default constructor currently does nothing.
non_mcst_classifier::non_mcst_classifier(void)
{
	// empty for now :-)
}

// Constructor with initializer list to set the distance type in the
// base class.
non_mcst_classifier::non_mcst_classifier(const distance_type & the_rep)
					: hierarchical_classifier(the_rep)
{
	// empty for now :-)
}

// Function to train the MCST classifier given a dataset parameter.
bool non_mcst_classifier::train(const svm_dataset & the_dataset)
{
	int i = 0, first_index, second_index, iteration = 1;
	// target class sets
	vector<set<int> > sets;	
	// temporary vector of node pointers used to create parent-child links
	// and set the root node pointer
	vector<classifier_node*> node_ptrs;

	// check that we are not attempting to train an already-trained model
	if(model_setup)
	{
		cout << "Warning:- attempted to train a MCST model that has already "
			 << "been setup, ignoring training operation...";
		
		return false;
	}
	
	// store a vector of target classes
	targets = the_dataset.get_targets();
	// store the number of features per sample in the dataset
	num_features = the_dataset.get_num_features();
	// extract and store the scaling range used on the training dataset
	the_dataset.get_scale_range(scale_range_low, scale_range_high);
	// extract and store the observed feature limits of training dataset
	the_dataset.get_feature_limits(lower_limits, upper_limits);
	
	// for each of the target classes		
	for(i = 0; i < targets.size(); i++)
	{
		// create and store a singleton set
		set<int> curr_target;
		curr_target.insert(targets[i]);
		sets.push_back(curr_target);
	}

	// while there are still sets to join
	while(sets.size() > 1)
	{
		// find the best two class sets to merge
		find_best_merger(sets, the_dataset, first_index, second_index);
		// form the data subset from these two sets
		svm_dataset node_dataset = get_data_subset(the_dataset,
												   sets[first_index],
												   sets[second_index]);
		
		cerr << "*-*-* Training node " << iteration << " of " 
			 << targets.size() - 1 << ", "
			 << sets[first_index].size() << " class size vs. "
			 << sets[second_index].size() << " class size" << endl;
				 		
		// edge connects two sets, so create a new MCST node
		classifier_node* new_node = new classifier_node;
		// add the two sets that this node will discriminate between
		new_node->add_class_set(sets[first_index]);
		new_node->add_class_set(sets[second_index]);
		// update the parent-child links between existing nodes
		new_node->set_child(classifier_node::left, 
							find_child(node_ptrs, sets[first_index]));
		new_node->set_child(classifier_node::right, 
							find_child(node_ptrs, sets[second_index]));		

		// train the node and store a pointer to this node
		new_node->train(node_dataset);	
		node_ptrs.push_back(new_node);
		
		// merge the two sets joined by the current edge
		merge_sets(sets, first_index, second_index);
		iteration++;
//		cout << "==> Updated vector of sets now contains:" << endl;
//		print_sets(sets);
	}
	// set the root - this will always be the last node created
	root = node_ptrs.back();
	// the model is now successfully setup and can be used to classify
	model_setup = true;			
	// setup probability distributions in the tree leaves
	setup_leaf_probabilities(the_dataset);
	return true;
}

// Function to return a vector of samples containing all samples belonging to classes
// as indicated by the parameter the_set.
vector<vector<double> > non_mcst_classifier::set_to_samples(const svm_dataset & whole_dataset,
															const set<int> & the_set)
{
	int i;
	set<int>::const_iterator iter;
	// the resultant subset of samples
	vector<vector<double> > sample_subset;
	// for each member (class ID) of the set
	for(iter = the_set.begin(); iter != the_set.end(); iter++)
	{
		// extract the agglomeration of samples
		vector<vector<double> > curr_class = whole_dataset.get_samples(*iter);
		// store the samples into the resultant vector
		for(i = 0; i < curr_class.size(); i++)
			sample_subset.push_back(curr_class[i]);
	}
	
	return sample_subset;
}

///////////////////////

// Default constructor currently does nothing.
nnal_classifier::nnal_classifier(void)
{
	// empty for now :-)
}

// Constructor with initializer list to set the distance type in the
// base class.
nnal_classifier::nnal_classifier(const distance_type & the_rep)
				: non_mcst_classifier(the_rep)
{
	// empty for now :-)
}

// Function to find the two closest sets for merging using a average-link
// approach.  The average distance between members of two given
// sets is found; the minimum of these distances determines which two clusters
// to merge at each iteration.  The indices of these sets are stored in 
//first_post and second_pos.
void nnal_classifier::find_best_merger(const vector<set<int> > & sets,
									   const svm_dataset & whole_dataset,
									   int & first_pos, int & second_pos)
{
	int i, j;
	// is this the first pair of sets compared?
	bool first_iteration = true;
	// distance between sets under examination, smallest distance observed
	double curr_distance, smallest_distance = BIG_NUM;
	// should always have at least 2 sets when calling this function
	assert(sets.size() > 1);
	// for every set
	for(i = 0; i < sets.size(); i++)
	{
		// and every other set
		for(j = i + 1; j < sets.size(); j++)
		{
			// get the first and second sets of samples
			vector<vector<double> > first_subset = set_to_samples(whole_dataset, sets[i]);
			vector<vector<double> > second_subset = set_to_samples(whole_dataset, sets[j]);
			// calculate the Euclidean distance between the two sample sets
			curr_distance = class_distance(first_subset, second_subset);
//			cout << "set " << i << "vs.\t" << j << " :\t" << curr_distance << endl;
			// if this is the first comparison or a new smallest distance has been found
			if(first_iteration || (curr_distance < smallest_distance))
			{					 
				// update the smallest distance record and store the indices of the pair
				smallest_distance = curr_distance;
				first_pos = i;
				second_pos = j;
				
//				cout << "*-*-* new smallest distance found at position " 
//					 << first_pos << " and " << second_pos << endl;
			}
			
			first_iteration = false;
		}
	}
}

///////////////////////////

// Default constructor does nothing for the time being.
nncl_classifier::nncl_classifier(void)
{
	// empty for now :-)
}

// Constructor with initializer list to set the distance type in the
// base class.
nncl_classifier::nncl_classifier(const distance_type & the_rep)
				: non_mcst_classifier(the_rep)
{
	// empty for now :-)
}

// Function to find the two closest sets for merging using a complete-link
// approach.  The largest distance between members of two given
// sets is found; the minimum of these distances determines which two clusters
// to merge at each iteration.  The indices of these sets are stored in 
//first_post and second_pos.
void nncl_classifier::find_best_merger(const vector<set<int> > & sets,
									   const svm_dataset & whole_dataset,
									   int & first_pos, int & second_pos)
{
	int i, j;
	// is this the first pair of sets compared?
	bool first_iteration = true;
	// distance between sets under examination, smallest distance observed
	double curr_distance, smallest_distance = BIG_NUM;
	// should always have at least 2 sets when calling this function
	assert(sets.size() > 1);
	
	// for every set
	for(i = 0; i < sets.size(); i++)
	{
		// and every other set
		for(j = i + 1; j < sets.size(); j++)
		{
			// calculate the Euclidean distance between the two sample sets
			curr_distance = maximum_distance(whole_dataset, sets[i], sets[j]);
//			cout << "set " << i << "vs.\t" << j << " :\t" << curr_distance << endl;
			// if this is the first comparison or a new smallest distance has been found
			if(first_iteration || (curr_distance < smallest_distance))
			{					 
				// update the smallest distance record and store the indices of the pair
				smallest_distance = curr_distance;
				first_pos = i;				
				second_pos = j;
			}
			
			first_iteration = false;
		}
	}
}

// Function to find the largest distance between between two sets containing
// target class IDs.
double nncl_classifier::maximum_distance(const svm_dataset & whole_dataset,
										 const set<int> & first_set, 
										 const set<int> & second_set)
{
	// current pairing distance and maximum distance discovered thus far
	double curr_distance, max_distance = -1;
	set<int>::const_iterator first_iter, second_iter;
	
	// for each target class in the first set
	for(first_iter = first_set.begin(); first_iter != first_set.end(); 
		first_iter++)
	{
		// and every target class in the second set
		for(second_iter = second_set.begin(); second_iter != second_set.end(); 
			second_iter++)
		{
			// extract the collections of samples for each target class
			vector<vector<double> > first_samples = 
											whole_dataset.get_samples(*first_iter);
			vector<vector<double> > second_samples = 
											whole_dataset.get_samples(*second_iter);
			// calculate the distance between the two collections of samples
			curr_distance = class_distance(first_samples, second_samples);
			// if a new largest distance is found, update the result
			if(curr_distance > max_distance)
				max_distance = curr_distance;
		}		
	}
	
	return max_distance;
}
