#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <math.h>
#include <memory.h>

#if !defined(plainRandom_Def)
#define plainRandom_Def
#define plainRandom(min, max) ((min) + (double) rand() / RAND_MAX * ((max) - (min)))
#endif

void main()	{
	// local subs to be called
	void getLong(char*, long&);
	void getDouble(char*, double&);
	void getPoint(char*, double&, double&);
	double gaussRandom(double m, double v);
	void banner();

	banner();

	// variable to be read from the user
	long nDataNum, nClusterNum, nVarianceLevels, nNoiseLevels;
	double fMinVariance, fMaxVariance, fMinNoise, fMaxNoise;
	double fMinX, fMaxX, fMinY, fMaxY;

	// cluster centroids to be:
	// either read from the user or randomly generated
	double** pptrClusterCentroids;

	long i;
	long nUserChoise;
	long n, v;
	char strFileName[1024];

	FILE* fptrOutput;


	unsigned long seed = time(0);
	printf("%d\n", seed);
	// initialize random seeds
	srand(seed);


	// get various parameters
	getLong("Input number of data points (long integer)", nDataNum);
	assert(nDataNum > 0);

	printf("Input the rectangular area of the data points\n");
	printf("  with x and y values separated by space, and followed by enter.\n");
	printf("  Example: 0.567 0.934<CR>\n");

	getPoint("Lower-Left Point", fMinX, fMinY);
	getPoint("Upper-Right Point", fMaxX, fMaxY);
	assert(fMaxX >= fMinX);
	assert(fMaxY >= fMinY);

	getLong("Input number of clusters (long integer)", nClusterNum);
	assert(nClusterNum > 1);
	assert(nDataNum > nClusterNum);

	getDouble("Input minimal noise rate (double float)", fMinNoise);
	assert(fMinNoise >= 0. && fMinNoise <= 1.);

	getDouble("Input maximal noise rate (double float)", fMaxNoise);
	assert(fMaxNoise >= 0. && fMaxNoise <= 1.);
	assert(fMaxNoise >= fMinNoise);

	getLong("Input noise levels (long integer)", nNoiseLevels);
	assert(nNoiseLevels > 0);

	getDouble("Input minimal gaussian variance for clean data (double float)", fMinVariance);
	assert(fMinVariance >= 0.);

	getDouble("Input maximal gaussian variance for clean data (double float)", fMaxVariance);
	assert(fMaxVariance >= 0.);
	assert(fMaxVariance >= fMinVariance);

	getLong("Input variance levels (long integer)", nVarianceLevels);
	assert(nVarianceLevels > 0);
	
	// allocate mem for cluster centroids
	pptrClusterCentroids = new double*[nClusterNum];
	assert(pptrClusterCentroids != NULL);
	for (i = 0; i < nClusterNum; i++)	{
		pptrClusterCentroids[i] = new double[2];
		assert(pptrClusterCentroids[i] != NULL);
	}

	// assign cluster centroids values
	getLong("Do you want to ...\n\t1. Input cluster centroids manually\n\t2. Generate cluster centroids randomly\nInput 1 or 2", nUserChoise);

	if (nUserChoise == 1)	{
		printf("Input cluster centroids below,");
		printf("  with x and y values separated by space, and followed by enter.\n");
		printf("  Example: 0.567 0.934<CR>\n");
		char strQuestion[128];
		for (i = 0; i < nClusterNum; i++)	{
			memset(strQuestion, 0, 128);
			sprintf(strQuestion, "#%4d", i + 1);
			getPoint(strQuestion, pptrClusterCentroids[i][0], pptrClusterCentroids[i][1]);
		}
	} else	{
		for (i = 0; i < nClusterNum; i++)	{
			pptrClusterCentroids[i][0] = plainRandom(fMinX, fMaxX);
			pptrClusterCentroids[i][1] = plainRandom(fMinY, fMaxY);
		}
	}
	
	// now start random generalization

	for (v = 0; v < nVarianceLevels; v++)	{
		for (n = 0; n < nNoiseLevels; n++)	{
			// generate file name
			memset(strFileName, 0, 1024);
			sprintf(strFileName, "%02d-%02d.txt", v, n);

			// get current variance and noise rate for the current file
			double fV = fMinVariance + (fMaxVariance - fMinVariance) / (nVarianceLevels - 1) * v;
			double fR = fMinNoise + (fMaxNoise - fMinNoise) / (nNoiseLevels - 1) * n;

			printf("Generating %s, r = %7.6f, v = %7.6f ...",
				strFileName, fR, fV);

			// open for write
			fptrOutput = fopen(strFileName, "w");
			assert(fptrOutput != NULL);

			// write the two comment lines
			fprintf(fptrOutput, "# 0 %d 2 0\n", nDataNum);
			fprintf(fptrOutput, "# %d %7.6f %7.6f %d %7.6f %7.6f %d\n", 
				nClusterNum, fMinNoise, fMaxNoise, nNoiseLevels, fMinVariance, fMaxVariance, nVarianceLevels);
			fprintf(fptrOutput, "# %7.6f %7.6f\n", fR, fV);

			// write the cluster centroids
			for (i = 0; i < nClusterNum; i++)	{
				fprintf(fptrOutput, "0:%7.6f 1:%7.6f\n", pptrClusterCentroids[i][0], pptrClusterCentroids[i][1]);
			}

			// get integer statistics
			long nNoiseData = (long) (nDataNum * fR);
			long nCleanData = nDataNum - nNoiseData;
			
			// generate random clean data one by one
			for (i = 0; i < nCleanData - nClusterNum; i++)	{
				// evenly distribute clean data into existing clusters
				// long nClusterID = i % nClusterNum;
				// modification: use randomly assigned cluster id
				// so that the input order are shuffled
				// while the distribution are nearly even
				long nClusterID = (long) plainRandom(0, nClusterNum - 0.000001);

				assert(nClusterID >= 0 && nClusterID < nClusterNum);

				long ok = 0;
				double fNorm, fAngle, fX, fY;
				
				while (!ok)	{
					// distance to the cluster centroid follows gaussian distribution
					fNorm = gaussRandom(0, fV);
					// a plain random angle
					fAngle = plainRandom(0, 2 * 3.14159);
					fX = pptrClusterCentroids[nClusterID][0] + fNorm * cos(fAngle);
					fY = pptrClusterCentroids[nClusterID][1] + fNorm * sin(fAngle);

					// only random point in the specified rectangular space is considered valid
					ok = (fX >= fMinX && fX <= fMaxX && fY >= fMinY && fY <= fMaxY) ? 1 : 0;
				}

				fprintf(fptrOutput, "0:%7.6f 1:%7.6f\n", fX, fY);
			}

			// generate random white noises one by one
			for (i = 0; i < nNoiseData; i++)	{
				fprintf(fptrOutput, "0:%7.6f 1:%7.6f\n",
					plainRandom(fMinX, fMaxX), plainRandom(fMinY, fMaxY));
			}

			fclose(fptrOutput);
			
			printf("OK\n");
			fflush(stdout);

		}
	}



	// release mem for cluster centroids
	for (i = 0; i < nClusterNum; i++)	{
		delete pptrClusterCentroids[i];
	}
	delete pptrClusterCentroids;
}

void getLong(char* strPrompt, long& input)	{
	printf("%s: ", strPrompt);
	scanf("%d", &input);
}

void getDouble(char* strPrompt, double& input)	{
	printf("%s: ", strPrompt);
	scanf("%lf", &input);
}

void getPoint(char* strPrompt, double& x, double& y)	{
	printf("%s: ", strPrompt);
	scanf("%lf %lf", &x, &y);
}


// Returns a normally (Gaussian) distributed deviate 
// with mean     m
// and  variance v
// Codes based on NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING
// Chapter 7.3
double gaussRandom(double m, double v) {
	static long iset = 0;
	static double gset;
	double fac, rsq, v1, v2;

	if (iset == 0) { 							// We don't have an extra deviate handy, so
		do {
			v1 = 2.0 * plainRandom(0, 1) - 1.0; 	// pick two uniform numbers in the square extending
								   			    // from -1 to +1 in each direction, 
			v2 = 2.0 * plainRandom(0, 1) - 1.0;
			rsq = v1 * v1 + v2 * v2; 			// see if they are in the unit circle,
		} while (rsq >= 1.0 || rsq == 0.0);

		fac = sqrt(-2.0 * log(rsq) / rsq);
							// save the other for next time.
		gset = m + v * v1 * fac;
		iset = 1; 			// Set flag.
		return m + v * v2 * fac;
	} else { 				// We have an extra deviate handy,
		iset = 0; 			// so unset the flag,
		return gset; 		// and return it.
	}
}

void banner()	{
	printf("**************************************************\n");
	printf("      Gaussian Random 2D Data Generator\n");
	printf("      (Data Sets for Clustering Systems)\n");
	printf("                (c) 2003\n");
	printf("            Ji He (mail@jihe.net)\n");
	printf("      Man Lan (lanman@i2r.a-star.edu.sg)\n");
	printf("**************************************************\n\n");
}
