/*
	Copyright (C) the University of Jyvskyl
	The Laboratory of Data Analysis
*/

/*********************************************************************
. file: hdr.h
. purpose: The definition for SOM
.
**********************************************************************/

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#define  FALSE                             0   
#define  TRUE                              1   

#define  F_OBS                             0 /* must be uniq */
#define  F_MIS                             1  
#define  F_IMP                             2
#define  F_ERR                             3  /* hard error */
#define  F_OUT                             4  /* outlier */


#define  OK                                0   /* must be uniq */
#define  ERR_CREATE_NEURON                -901
#define  ERR_CREATE_NEIBS                 -902
#define  ERR_CREATE_WEIGHTS               -903
#define  ERR_CREATE_SEARCH_SET            -904
#define  ERR_CREATE_NEURON_LIST           -905
#define  ERR_CREATE_SOMDATA               -906
#define  ERR_TRAIN_MODE                   -907
#define  ERR_INIT_SONS                    -908
#define  ERR_ACCESS_DATA                  -909
#define  ERR_SOM_PARSER                   -910
#define  ERR_CREATE_LOOKUP                -911
#define  ERR_NAME_EXISTS                  -912
#define  ERR_INVALID_FIELD_TYPE           -913

#define  TOPO_LATTICE                       0 /* must be uniq */
#define  TOPO_RING                          1
#define  TOPO_TSVQ                          2

#define  TRAIN_VQ                           0 /* must be uniq */
#define  TRAIN_SPREAD                       1

#define  BMU_INIT                           1  /* must be 1 */
#define  BMU_FOUND                          1  /* must be 1 */
#define  BMU_TEST                           0  /* must be 0 */
#define  BMU_SKIP                           0  /* must be 0 */

#define  RUN_INIT                           0  /* must be uniq */
#define  RUN_CONTINUE                       1
#define  RUN_STOP                           2
#define  RUN_ABORT                          3


#define  TS_PARMS                           50
#define  N_ID                               52
#define  L_ID                               53
#define  P_ID                               54

#define  IMP_NONE                           1 /* must be uniq */
#define  IMP_MEAN                           2
#define  IMP_URAND                          3
#define  IMP_NRAND                          4
#define  EDIT_CATEGORIAL                    5 
#define  EDIT_CONTINUOUS                    6
#define  EDIT_NONE                          7

#define  NO_TRANSFORM                       1
#define  LN_TRANSFORM                       2

#define  SOM_ALL                            0 /* must be uniq */
#define  SOM_BUILD                          1
#define  SOM_TRAIN                          2
#define  SOM_EXIT                           3


#define  INITIAL_EPOCH                      2
#define  GVIS_SIZE                          300

#define Assign_estat(stats, statn ) \
  {          if (strcmp(stats, "EDIT_CATEGORIAL") == 0) statn=EDIT_CATEGORIAL; \
        else if (strcmp(stats, "EDIT_NONE")  == 0) statn=EDIT_NONE;  \
        else if (strcmp(stats, "EDIT_CONTINUOUS") == 0) statn=EDIT_CONTINUOUS; \
        else return(-9990); }

#define Assign_istat(stats, statn ) \
  {          if (strcmp(stats, "IMP_NONE")   == 0) statn=IMP_NONE;   \
        else if (strcmp(stats, "IMP_MEAN")   == 0) statn=IMP_MEAN;   \
        else if (strcmp(stats, "IMP_URAND")  == 0) statn=IMP_URAND;  \
        else if (strcmp(stats, "IMP_NRAND")  == 0) statn=IMP_NRAND;  \
        else return(-99900); }

#define Assign_tstat(stats, statn ) \
  {          if (strcmp(stats, "TRAN_NONE")   == 0) statn=NO_TRANSFORM;   \
        else if (strcmp(stats, "TRAN_LN")     == 0) statn=LN_TRANSFORM;   \
        else return(-999000); }

#define Chk(rval) \
  { int returnValue; \
    if((returnValue = (rval)) < 0) return(returnValue); } 

/*--------------------------------------------------------------
.
.
----------------------------------------------------------------*/


typedef struct {
  int       view_points;	/* number of data_point to view         */
  float     g_maxmean;		/* max         */
  float     g_maxmax;		/*           */
  float     g_amax;		/*           */
  float     g_bmax;		/*           */
  long     *np1;
  long     *np2;
  long     *np3;
  long     *np4;
  
  float     *gmean_error;	/* mean error table     */
  float     *gmax_error;	/* max error table      */
  float     *a_adapt;	        /* mean move            */
  float     *b_adapt;   	/* max move             */
} train_str;

typedef struct n_str {
  int       layer;		/* layer index                   */
  int       index;		/* node index                    */
  float    *var;		/* variances                     */
  float    *var_old;		/* variances                     */
  float    *w_ptr;		/* weights                       */
  float    *w_cen;              /* centroid                      */
  float    *wprior;             /* !!!! OBS THIS IS NEW !!!!     */
  float     prior;              /* !!!! OBS THIS IS NEW !!!!     */
  int       samples;            /*  number of samples/node       */
  float     output;		/* output value (if any)         */
  float     error;		/* error  value (if any)         */
  int       n_sons;		/* number of sons/node           */
  struct n_str **s_ptr;		/* sons                          */
  int       n_neibs;		/* number of neibs/node          */
  struct n_str **n_ptr;		/* neibs                         */
  int       n_search;		/* search set size               */
  struct n_str **search_set;	/* search set                    */
} neuron;

typedef struct {

  /*** parameters ***/

  neuron   *root;		/* the root of TS-SOM structure   */
  neuron  **neurons;		/* neuron table                   */
  int      *layer_list;		/* first node indexes on layers   */
  int      *layer_sizes;	/* list of layer sizes            */
  int       next_free_node;	/* number of nodes during build   */
  int       nodes;		/* number of nodes                */
  neuron  **lookup;		/* lookup vector for samples      */
  float    *xErr;		/* For error calculation	  */
  float    *x3;			/* RESERVED !!!                   */

  float     mean_error;		/* Traintime mean error           */
  float     max_error;		/* Traintime max error            */
  float     mean_move;		/* Traintime mean error           */
  float     max_move;		/* Traintime max error            */
  int       last_index;		/* max node index number          */

} net_str;

/* EDITING AND INPUTING INFORMATION */

typedef struct e_str {

   /* constant fields */

   int    impute;        /* Flag for imputation                       */
   int    missing;       /* Flag for missing data items               */
   int    rerrors;       /* Flag for already found data errors (rule errors) */
   int    erroneous;     /* Flag for data errors                      */
   int    Nbest;         /* Find for N best matching units            */
   float  m_val;         /* Missing value in float format             */
   float  e_val;         /* Pre hard error value in float format      */
   int    m_flag;        /* missing value found from  input flag      */
   int    e_flag;        /* error value found from  input flag        */

   int    smethod;       /* 1=RANDOM SELCECTION              */ 
                         /* 2=MULTIPATH                      */ 
   int    imethod;       /* 1=WEIGHTED MULTIPLE IMPUTATION   */ 
                         /* 2=MEAN VALUE                     */
                         /* 3=URANDOM SAMPLE                 */
                         /* 4=NRANDOM SAMPLE                 */
                         /* 5=DONATOR                        */
   int    emethod;       /* 1=ROBUST 1                       */ 
                         /* 2=MEAN VALUE                     */
                         /* 3=RANDOM SAMPLE                  */
                         /* 4=DONATOR                        */

   int   var_out;       /* print variances 1=true,0=false               */
   int   er_dout;       /* print error data 1=true,0=false              */
   int   er_rprobs;     /* print error record probabilities true/false  */
   int   er_fprobs;     /* print error field probabilities true/false   */
   int   ref_numbers;   /* print error record numbers                   */

   int   impute_outliers;      /* impute outliers:TRUE/FALSE         */

   int   write_variances;   
   int   write_imputations; 
   int   write_edits;

   int write_imput_data;
   int write_im_classes;
   int write_cldata;
   int write_convergence_data;


   int   incremental_training;
   int   start_incremental_training;
   int   stop_incremental_training;
   int   stop_at_layer;              /* stop training at this layer */
   int   start_at_layer;             /* stop training at this layer */
   int   current_layer;              /* current layer after training
                                        is stopped  */
  
  /* MISSING DATA HANDLING */

  char      id_frname[80];        /* imputed data frame name             */
  char      eri_frname[80];       /* located errors data frame name      */
  char      erd_frname[80];       /* located errors indicator frame name */
  char      var_frname[80];       /* variances framename                 */
  char      ref_frname[80];       /* ref.num data frame name             */
  char      fstatname[80];        /* field status frame                  */
  char      cldata_name[80];      /* cldata for imputed and errors       */

  int       fstatset;             /* Flag for external status            */
  int      *imstatus;             /* imputing functions for fields       */
  int      *estatus;              /* editing functions for fields        */
  float    *Pr_edit;              /* cut probabilities per fields (edit) */
  float    *Pr_tr_cut;            /* cut probabilities per fields (train)  */
  float    *scale_imput;          /* scale imput variance per fields     */
  
  float     Pr_cut;               /* global cut probability              */

  float    *imputed;              /* temp vector for imputed values      */

  void     *im_node;              /* classes for error samples           */
  void     *im_indexes;           /* missing (or pre-error )
                                             data record indexes         */
  void     *er_indexes;           /* outlier data record indexes         */

  void    **imfields;             /* imputed data fields                 */
  void    **efields;              /* error data fields                   */
  void    **er_rnum;              /* error reference   fields            */
  void    **im_rnum;              /* error reference   fields            */
  void    **er_fPr;               /* error probability fields            */
  void     *er_rPr;               /* error probability record            */
  void     *tmp_er_Pr;            /* error probability field             */
  void    **var_f;                /* variance field                      */
  void     *var_r;                /* variance record                     */
  int      var_fractile;         /* fractile (for robust variance)      */

  int       irecords;            /* number of incomple records          */     
  int       num_outliers;        /* total of outlier records            */
  int       num_edits;           /* total of edits                      */


  float     sigma1;              /* confidence interval for training    */     
  float     sigma2;              /* confidence interval for 
                                    error localization                  */     
  int       erecords;            /* erronous records                    */     
  int      *f_stat;              /* status of  fields (missing or not ) */
  neuron ***ml_lookup;           /* multi path lookup vector            */
  int      *ml_size;             /* multi path sizes                    */

  /* SAMPLING WEIGHTS */

  char      sw_frname[80];      /* sampling weights dframe name      */
  int       s_weights;          /* sampling weights TRUE/FALSE       */
  float    *sw;                 /* sampling weights fir ALL records  */

   /* variable fields (for one sample) */

  neuron **path_ptr;    /* father neurons of search paths  */
  neuron **path_tmp;    /* father neurons of search paths  */
  int      path_size;   /* number of active search paths   */
  float   *input;       /* pointer to input vector         */
  int      isample;     /* incomplete sample number        */
  int      incomplete;  /* There is missingness   */
  int      mode;        /* mode ?  */

  float    tmp1;
  float    tmp2;
  float    tmp3;

  /* sample omit */

  int  omit_small_neurons;
  int  sample_limit;

  /* automatic data transformations */
  int   transforms;
  int  *t_opt;

  /* external data */

  int      impute_external_data;
  int      external_data_statistics;

  
  int      ext_data_dim;
  char     ext_data_name[80];
  char     ext_stat_name[80];
  void   **ext_fields;
  int     *ext_types;   /* INT, FLOAT , ETC... */

  float   *einput;     /* pinter to external data input vector */

  float    nweight;
  float    ext_stop;
  int      ext_max_iter;
  int      ext_m_flag;
  int     *t_stat;        /* field status vectors  mis or no_miss    */
  int     *ext_status;    /* robust training options                 */
  float  **ext_cen; 
  float  **ext_std; 
  float  **tmp_cen; 
  float  **tmp_std; 
  float  **ext_prior; 
  float    sigma3;

   /* robust training */
  int      Huber;



     
} e_str;

typedef struct {		

  /* STRUCTURE */
  int       layers;		/* number of layers             */
  int       som_dim;		/* SOM dimension                */
  int       net_type;		/* net type lattice, ring, vq   */

  /* DATA */
  long int  records;		/* clean data records           */
  int       data_dim;		/* data dimension               */
  char      data_frname[80];    /* train data frame name        */
  void    **fields;             /* data fields                  */
  int      *types;              /* types of fields              */
  float   **weights;            /* som weights after training   */

  /* TRAINING */
  int       is_lookup;		/* lookup used 1=yes, 0=no      */
  int       fs_lookup;		/* full search correction up to */
  int       opt_rule;		/* Training rule 0=VQ, 1=spread */
  long      maxiter;		/* max number of iterations     */
  float     stop_criteria;	/* Stop criteria                */
  float     neib_weight;	/* neighbor weightning          */
  int       graphics;           /* visualize training progress  */
  int       verify;             /* report training progress     */
  int       full_search;	/* Full search instead of TS/LU */

  /* NET */
  net_str   *net;               /* The TS-SOM                   */
  e_str     *e;	                /* edit and imput structure     */
  train_str *tinfo;

  

} som_str;

