/*
	Copyright (C) the University of Jyvskyl
	The Laboratory of Data Analysis

	Example Visual C++ project of using NDA DLL from 
	external program, includes also TS-SOM edit and 
	imputation examples

	NOTE:
	The compiled executable requires also files
	edit.cmd, impute.cmd and nda.dll to be placed
	in the executable file directory. The mentioned
	files can be found from \bin directory.
*/

#include "stdafx.h"
#include <conio.h>
#include <stdlib.h>
#include <math.h>
#include "nda.h"						/* NDA DLL functions + helper functions, and errors */

int generate_data(void);				/* generates incomplete and erroneous data of 1000 records */
int do_edit_and_imputation(void);		/* does both TS-SOM edit and imputation */
int do_imputation(void);				/* does only TS-SOM imputation */
int do_edit();							/* does only TS-SOM edit */
int display_output(void);				/* displays output (= changed) data */
int display_error_probabilities(void);	/* displays error probabilities */

int main(int argc, char* argv[])
/* main program */
{
	int stat;

	/* initialize NDA dll */
	if(init_nda_dll()){
		printf ("ERROR: Unable to load nda.dll library");
		free_nda_dll();
		return -1;
	}
	printf("nda.dll loaded\n\n");


	/* generate data to NDA's namespace */
	printf("generating incomplete and erroneous data\n\n");
	
	if (generate_data()){
		printf("Failed to create data into NDA's namespace\n");
		free_nda_dll();
		printf("nda.dll released\n");
		return -1;
	}


	/* do imputation (ONLY) */
	stat = do_imputation();
	if(stat) printf("do_imputation() failed, error %d\n", stat);
	printf("\n");

	/* do edit (ONLY) */
	stat = do_edit();
	if(stat) printf("do_edit() failed, error %d\n", stat);
	printf("\n");

	/* do edit and imputation */
	stat = do_edit_and_imputation();
	if(stat) printf("do_edit_and_imputation() failed, error %d\n", stat);
	printf("\n");


	/* release NDA DLL */
	free_nda_dll();
	printf("nda.dll released\n");
	return 0;
}

int generate_data(void)
/* 
	generates and incomplete and erroneous data into NDA's namespace
	NOTE: a) this code can be modified to transfer any external data 
		     into NDA's namespace 
		  b) NDA namespace data frames' names must be unique,
		     therefore earlier data frame must be removed if 
			 new with same name is created. The data frame 
			 "data" can be removed by calling:
			 run_nda_command("rm data");
*/
{
	float myVar1[1000];			/* my variable1 (1000 records) */
	float myVar2[1000];			/* my variable2 (1000 records) */
	int Status;					/* function call Status */
	int i;						/* counter */

	/* 
		generate data to be imputed and edited 		
	*/

    Status = nda_create_new_frame("data"); /* create new data frame named "data" */
	if (Status != OPERATION_OK) return -1; /* failure */
	for(i=0;i<1000;i++){
		/* build two variables, record by record */
		myVar1[i] = (float)(cos((double)i/1000.0f)+1.0f) / 2.0f;
		myVar2[i] = 0.25f + 0.5f*((float)(rand()%10000))/10000.0f;
	}

    /* 
	   make five missing data values (= -10000) and 
	   five obvious errors to SECOND variable (var2)
	*/
	myVar2[5] = -10000;		/* NOTE: missing data values must be marked with */
	myVar2[105] = -10000;	/* a special value, here we use value -10000 */
	myVar2[210] = -10000;
	myVar2[315] = -10000;
	myVar2[420] = -10000;

	myVar2[0] = 0.0f;		/* these are obvious outliers */
	myVar2[100] = 0.9f;
	myVar2[200] = 0.15f;
	myVar2[300] = 0.91f;
	myVar2[400] = 0.05f; 

	/* 
		append variables' data to data frame "data"
		NOTE: variable's name must not have special characters	
		      like space and "/", therefore it is best to keep 
			  variable names simple like var1, ..., varN. 
	*/  
    Status = nda_insert_ffield("data", "var1", 1000, &myVar1[0]); /* var1, 1000 records, data from myVar1 */
	if (Status != OPERATION_OK) return -1; /* failure */
    Status = nda_insert_ffield("data", "var2", 1000, &myVar2[0]); /* var2, 1000 records, data from myVar2 */
	if (Status != OPERATION_OK) return -1; /* failure */

	/*
		NOTE: nda_insert_ifield can be used to insert 32-bit signed integer
			  data into NDA's namespace, whereas nda_insert_sfield can
			  be used to insert 8-bit string data into NDA's namespace.
	*/
	return 0;
}

int display_output(void)
/* displays output (imputed, edited or imputed & edited data) */
{
	int Status;					/* status */
	long items, data_items;		/* number of records */
	long *vect;					/* integer vector (row indices) */
	float *fvect;				/* float vector (data / error probability) */
	long i;						/* record counter */
	long index;					/* current row index */
	int dim;					/* variable counter */

	/* 
		read edited/imputed data 
		NOTE: row index data is always integer type (32-bit) and
		      changed data is float (32-bit).
	*/
	Status = nda_get_field_ptr("output", 0, (void **)&vect, &items);
	if (Status != OPERATION_OK){
		printf("output data frame is missing from NDA's namespace?\n");
		return -1;
	}

	printf("%lu records have been changed, the records are:\n", items);
	for(i=0;i<items;i++){
		index = vect[i];
		printf("row index=%lu, data:\t", index);

		/* 
			get 2 variables' edited/imputed data 
			NOTE: in practise it is best to transfer edited/imputed data 
			      variable by variable to eliminate multiple nda_get_field_ptr 
				  calls (which can be slow). Here multiple calls are done for 
				  easy result display.
		*/
		for(dim=0;dim<2;dim++){
			Status = nda_get_field_ptr("output", 1+dim, (void **)&fvect, &data_items);
			if (Status != OPERATION_OK) return -1;
			/* NOTE: data_items equals to items */
			printf("var%d=%f\t", dim+1, fvect[i]);
		}
		printf("\n");
	}

	return 0;
}

int display_error_probabilities(void)
/* displays error probabilities */
{
	int Status;					/* status */
	long items, data_items;		/* number of records */
	long *vect;					/* integer vector (row indices) */
	float *fvect;				/* float vector (data / error probability) */
	long i;						/* record counter */
	long index;					/* current row index */
	int dim;					/* variable counter */

	/* 
		read error probability data
		NOTE: a) row index data is always integer type (32-bit) and 
		      error probability data is allows float (32-bit).
			  b) row index data is 0 based, thus 0 is the first record 
			     in the data
	*/
	Status = nda_get_field_ptr("errors", 0, (void **)&vect, &items);
	if (Status != OPERATION_OK){
		printf("errors data frame is missing from NDA's namespace?\n");
		return -1;
	}

	printf("There are error probabilities for %lu records, which are:\n", items);
	for(i=0;i<items;i++){
		index = vect[i];
		printf("row index=%lu, error probabilities:\t", index);

		/* 
			get 2 variables' error probability data 
			NOTE: in practise it is best to error probability data 
			      variable by variable to eliminate multiple nda_get_field_ptr 
				  calls (which can be slow). Here multiple calls are done for 
				  easy result display.
		*/
		for(dim=0;dim<2;dim++){
			Status = nda_get_field_ptr("errors", 1+dim, (void **)&fvect, &data_items);
			if (Status != OPERATION_OK) return -1;
			/* NOTE: data_items equals to items */
			printf("var%d=%f\t", dim+1, fvect[i]);
		}
		printf("\n");
	}

	return 0;
}

int do_edit_and_imputation(void)
/* does TS-SOM edit and imputation */
{
	int Status;					/* status */

	printf("EDIT AND IMPUTATION\n");

	/*
		build fstat (the data which defines variables' train, 
		edit and impute parameters)
	*/
    Status = run_nda_command("setdata -f F0 -len 4 -t string -vals 0=IMP_NONE ;"); /* no imputation */
	if (Status != OPERATION_OK && Status != IGNORE) return -1;
    Status = run_nda_command("setdata -f F0 -t string -vals 1=EDIT_NONE ;"); /* no edits */
	if (Status != OPERATION_OK && Status != IGNORE) return -2;
    Status = run_nda_command("setdata -f F0 -t string -vals 2=0.5 ;");	/* edit cut probability */
	if (Status != OPERATION_OK && Status != IGNORE) return -3;
    Status = run_nda_command("setdata -f F0 -t string -vals 3=150.0 ;");	/* sigma1, training robustness parameter */
	if (Status != OPERATION_OK && Status != IGNORE) return -4;
    
    Status = run_nda_command("setdata -f F1 -len 4 -t string -vals 0=IMP_NRAND ;");	/* gaussian random imputation */
	if (Status != OPERATION_OK && Status != IGNORE) return -5;
    Status = run_nda_command("setdata -f F1 -t string -vals 1=EDIT_CONTINUOUS ;");	/* continuous */
	if (Status != OPERATION_OK && Status != IGNORE) return -6;
    Status = run_nda_command("setdata -f F1 -t string -vals 2=0.5 ;");	/* edit cut probability */
	if (Status != OPERATION_OK && Status != IGNORE) return -7;
    Status = run_nda_command("setdata -f F1 -t string -vals 3=1.75 ;");	/* sigma1, training robustness parameter */
	if (Status != OPERATION_OK && Status != IGNORE) return -8;
    
    Status = run_nda_command("select sfr -f F0 F1");
	if (Status != OPERATION_OK && Status != IGNORE) return -9;

    /*
		run TS-SOM edit and imputation (macro edit.cmd), with parameters:
		2 = TS-SOM dimension
		4 = TS-SOM layer
		0 = TS-SOM topology
		-10000 = missing data value
		5 = lower limit for cluster record count when classifying incomplete records
		0.5 = weighting of neighbours
		0.001 = stopping criteria
		20 = maximum number of iterations
		3 = number of corrected lookup layers
		0 = training rule
		1 = use lookup table
		0 = use fullsearch
		1 = use Huber estimator
		1 = do outlier imputation [NOTE: imputation!]
		1.0 = Sigma2, affects to continuous variables error probabilities
		3.0 = Sigma1, training robustness parameter [IGNORE], overridden by sfr
    */
	Status = run_nda_command("runcmd edit.cmd 2 4 0 -10000 5 0.5 0.001 20 3 0 1 0 1 1 1.0 3.0");
	if (Status != OPERATION_OK && Status != IGNORE) return -10;

	/* display output and error probabilities */
	Status = display_output();
	if (Status != OPERATION_OK) return -11;
	Status = display_error_probabilities();
	if (Status != OPERATION_OK) return -12;

	/* clean NDA namespace (from temporary data structures) */
	Status = run_nda_command("rm sfr");
	if (Status != OPERATION_OK && Status != IGNORE) return -13;
	Status = run_nda_command("rm F0");	/* 1. variable parameter data (do this for each variable FI, I=0...N-1) */
	if (Status != OPERATION_OK && Status != IGNORE) return -14;
	Status = run_nda_command("rm F1");	/* 2. variable parameter data */
	if (Status != OPERATION_OK && Status != IGNORE) return -15;
	return 0;
}

int do_imputation(void)
/* does TS-SOM imputation */
{
	int Status;					/* status */

	printf("IMPUTATION\n");

	/*
		build fstat (the data which defines variables' train, 
		edit and impute parameters)
	*/
    Status = run_nda_command("setdata -f F0 -len 4 -t string -vals 0=IMP_NONE ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -1;
    Status = run_nda_command("setdata -f F0 -t string -vals 1=EDIT_NONE ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -2;
    Status = run_nda_command("setdata -f F0 -t string -vals 2=0.5 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -3;
    Status = run_nda_command("setdata -f F0 -t string -vals 3=3.0 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -4;
    
    Status = run_nda_command("setdata -f F1 -len 4 -t string -vals 0=IMP_NRAND ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -5;
    Status = run_nda_command("setdata -f F1 -t string -vals 1=EDIT_CONTINUOUS ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -6;
    Status = run_nda_command("setdata -f F1 -t string -vals 2=0.5 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -7;
    Status = run_nda_command("setdata -f F1 -t string -vals 3=1.75 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -8;
    
    Status = run_nda_command("select sfr -f F0 F1");
	if (Status != OPERATION_OK && Status != IGNORE) return -9;

    /*
		run TS-SOM edit (macro impute.cmd), with parameters:
		2 = TS-SOM dimension
		4 = TS-SOM layer
		0 = TS-SOM topology
		-10000 = missing data value
		5 = lower limit for cluster record count when classifying incomplete records
		0.5 = weighting of neighbours
		0.001 = stopping criteria
		20 = maximum number of iterations
		3 = number of corrected lookup layers
		0 = training rule
		1 = use lookup table
		0 = use fullsearch
		0 = use Huber estimator
    */
	Status = run_nda_command("runcmd impute.cmd 2 4 0 -10000 5 0.5 0.001 20 3 0 1 0 0");
	if (Status != OPERATION_OK && Status != IGNORE) return -10;

	/* display output */
	Status = display_output();
	if (Status != OPERATION_OK) return -11;

	/* clean NDA namespace (from temporary data structures) */
	Status = run_nda_command("rm sfr");
	if (Status != OPERATION_OK && Status != IGNORE) return -12;
	Status = run_nda_command("rm F0");	/* 1. variable parameter data */
	if (Status != OPERATION_OK && Status != IGNORE) return -13;
	Status = run_nda_command("rm F1");	/* 2. variable parameter data */
	if (Status != OPERATION_OK && Status != IGNORE) return -14;
	return 0;
}

int do_edit(void)
/* does TS-SOM edit */
{
	int Status;					/* status */

	printf("EDIT\n");

	/*
		build fstat (the data which defines variables' train, 
		edit and impute parameters)
	*/
    Status = run_nda_command("setdata -f F0 -len 4 -t string -vals 0=IMP_NONE ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -1;
    Status = run_nda_command("setdata -f F0 -t string -vals 1=EDIT_NONE ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -2;
    Status = run_nda_command("setdata -f F0 -t string -vals 2=0.5 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -3;
    Status = run_nda_command("setdata -f F0 -t string -vals 3=3.0 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -4;
    
    Status = run_nda_command("setdata -f F1 -len 4 -t string -vals 0=IMP_NONE ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -5;
    Status = run_nda_command("setdata -f F1 -t string -vals 1=EDIT_CONTINUOUS ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -6;
    Status = run_nda_command("setdata -f F1 -t string -vals 2=0.5 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -7;
    Status = run_nda_command("setdata -f F1 -t string -vals 3=1.75 ;");
	if (Status != OPERATION_OK && Status != IGNORE) return -8;
    
    Status = run_nda_command("select sfr -f F0 F1");
	if (Status != OPERATION_OK && Status != IGNORE) return -9;

    /*
		run TS-SOM edit (macro edit.cmd), with parameters:
		2 = TS-SOM dimension
		4 = TS-SOM layer
		0 = TS-SOM topology
		-10000 = missing data value
		5 = lower limit for cluster record count when classifying incomplete records
		0.5 = weighting of neighbours
		0.001 = stopping criteria
		20 = maximum number of iterations
		3 = number of corrected lookup layers
		0 = training rule
		1 = use lookup table
		0 = use fullsearch
		1 = use Huber estimator
		0 = do outlier imputation [NOTE: no imputation!]
		1.0 = Sigma2, affects to continuous variables error probabilities
		3.0 = Sigma1, training robustness parameter [IGNORE], overridden by sfr
    */
	Status = run_nda_command("runcmd edit.cmd 2 4 0 -10000 5 0.5 0.001 20 3 0 1 0 1 0 1.0 3.0");
	if (Status != OPERATION_OK && Status != IGNORE) return -10;

	/* display output and error probabilities */
	Status = display_error_probabilities();
	if (Status != OPERATION_OK) return -12;

	/* clean NDA namespace (from temporary data structures) */
	Status = run_nda_command("rm sfr");
	if (Status != OPERATION_OK && Status != IGNORE) return -13;
	Status = run_nda_command("rm F0");	/* 1. variable parameter data */
	if (Status != OPERATION_OK && Status != IGNORE) return -14;
	Status = run_nda_command("rm F1");	/* 2. variable parameter data */
	if (Status != OPERATION_OK && Status != IGNORE) return -15;
	return 0;
}
