%macro poem(data1=,var_list=,gew=,outlier=,alfa=,beta=,c=,data2=,out=,reweight=);

/********************************************************************************
  data1=        input sas data set                        (required)

  var_list=     variable list  (max 20)                   (required)
                full list of Var_Names, as : var_list=x1 x2 x3 .......

  gew=          name of sampling weight variable          (optional)
                if none a variable w=1 for every observation is generated

  outlier=      name of outlier variable                  (optional)
                if none a variable u=1 for every observation is generated

  alfa=         reduction factor in (0,1)                 (default : 0.5)

  beta=         donor severity parameter in (0,1]         (default : 0.3)

  c=            tuning constant                           (default : 3)

  data2=        input sas data set for the E Flags        (optional)
                if none a file _edit_ is generated with ei variables, i=1...p,
                and ei=1 for every observation

  out=          Output sas data set                       (default : _out_)

  reweight=     Redefintion of outliers (0:No / 1:Yes)    (default : 0)

example :
 %poem(data1=<libref>.bushfiretrc,var_list=v1 v2 v3 v4 v5,gew=w,outlier=u,
       alfa=0.5,beta=0.5,c=3,data2=<libref>.bushfiremwe,
       out=<libref>.bushfirepoem,reweight=0);

 --------------------------------------------------------------------------------
 Version 1.3 , Apr 2003
 SAS Modules : Base, IML / Version 8.2
 Copyright   : Swiss Federal Statistical Office, B. Hulliger & C. Beguin
               Swiss Federal Office of Information Technology, M. Eichenberger
 Reference   : Euredit D4-5.2.1-2.C, B. Hulliger & C. Beguin, SFSO
********************************************************************************/

%global dsexist varexist;
%if &out=   %then %let out=_out_;
%if &alfa= or &alfa <= 0 or &alfa >= 1  %then %let alfa=0.5;
%if &beta= or &beta <= 0 or &beta >  1  %then %let beta=0.3;
%if &c=     %then %let c=3;
%if &reweight=     %then %let reweight=0;
title2; title3; title4; title5; title6; title7;

%macro exist(data);
%let dsexist=no;
data _null_;
  if 0 then set &data;
  stop;
run;
%if &syserr=0 %then %let dsexist=yes;
%mend exist;

%macro varex(data,var);
%let varexist=no;
data _varex_;
  set &data;
  if _n_=2 then stop;
run;
data _null_;
  set _varex_(keep=&var obs=0);
run;
%if &syserr=0 %then %let varexist=yes;
%mend;

%exist(&data1);
%if &dsexist=no    %then %do;
    data _null_;
    file print;
    put #3 @2 " Error : No File &data1  ";
    put #4 @2 " Program terminated ";
    run;
    %goto ende_x;
%end;

%let nvar=0;
%let gewfl=1;
%let outlfl=1;

%if &gew ne %then %do;
 %varex(&data1,&gew);
   %if &varexist=no    %then %do;
     data _null_;
     file print;
     put #3 @2 " Error : No Weight_Var &gew in file &data1 ";
     put #4 @2 " Program terminated ";
     run;
     %goto ende_x;
   %end;
%end;

%if &gew eq %then %do;
 %let gewfl=0;
 %let gew=w;
%end;

%if &outlier ne %then %do;
 %varex(&data1,&outlier);
   %if &varexist=no    %then %do;
     data _null_;
     file print;
     put #3 @2 " Error : No Outlier_Flag_Var &outlier in file &data1 ";
     put #4 @2 " Program terminated ";
     run;
     %goto ende_x;
   %end;
%end;

%if &outlier eq %then %do;
 %let outlfl=0;
 %let outlier=u;
%end;

%do i=1  %to  20;
 %let s=%scan(&var_list,&i,' ');
  %if &s ne %then %do;
    %let var&i=&s;
    %varex(&data1,&&var&i);
      %if &varexist=no    %then %do;
       data _null_;
       file print;
       put #3 @2 " Error : No Var  &&var&i in file &data1 ";
       put #4 @2 " Program terminated ";
       run;
       %goto ende_x;
      %end;
    %let nvar=&i;
  %end;
%end;

data daten;
 set &data1  end=letzt;
 %if &gewfl=0  %then %do;
  w=1;
 %end;
 %if &outlfl=0 %then %do;
  u=1;
 %end;
 if letzt then call symput('nobs',left(put(_n_,8.)));
run;

%if &data2 ne  %then %do;
%exist(&data2);
%if &dsexist=no    %then %do;
    data _null_;
    file print;
    put #3 @2 " Error : No File &data2 ";
    put #4 @2 " Program terminated ";
    run;
    %goto ende_x;
%end;
%else %do;
data _edit_ ;
  set &data2 end=letzt;
 if letzt then call symput('nobsed',left(put(_n_,8.)));
run;
%end;
proc contents data=&data2 out=__tmp__(keep=varnum)  noprint;
data __tmp__;
 set __tmp__ end=letzt;
 if letzt then call symput('nvared',left(put(_n_,8.)));
run;
%end;

%if &nobs ne &nobsed  or &nvared ne &nvar %then %do;
    data _null_;
    file print;
    put #3 @2 " Error : File &data2 has not same number of obs or var as &data1 ";
    put #5 @2 " &data1 - Obs: &nobs var: &nvar / &data2 - Obs: &nobsed var: &nvared ";
    put #6 @2 " Program terminated ";
    run;
    %goto ende_x;
%end;

%if &data2 eq  %then %do;
 data _edit_;
  set daten;
  keep e1-e&nvar;
  %do i=1  %to  %eval(&nvar);
    e&i = 1;
  %end;
 run;
%end;

  /* elimination of obs with missing values for every var in var_list */
data daten;
 set daten;
  if
  %do i=1  %to  %eval(&nvar-1);
    &&var&i eq .  and
  %end;
  &&var&nvar eq . then delete ;
run;

proc iml  workspace=16384;
reset noname;
alpha = &alfa;
beta  = &beta;
c     = &c;
rw    = &reweight;
imlflag=1;
 varname1={
           %do i=1 %to &nvar;
             "&&var&i"
           %end;
           };
 varname2={
           %do i=1 %to &nvar;
             "&&var&i"
           %end;
             "&gew"
             "&outlier"
             "utilde"
           };
 varname3={ "Population size"
           %do i=1 %to &nvar;
             "&&var&i"
           %end;
           };
 varname4={ "imputed_Obs"  "Donor" "minimal_distance"};
 varname5={"imputand" "donor_obs_nr"};
 varname6={
           %do i=1 %to &nvar;
             "&&var&i"
           %end;
             "N_Obs"
           };

use daten;
read all var{&var_list} into dat;
 nrd=nrow(dat);
 ncd=ncol(dat);
use daten;
read all var{&gew} into w;
use daten;
read all var{&outlier} into u;

use _edit_;
read all into e;

rid=j(nrd,ncd,1);
if any(dat = .) then do;
miss = loc(dat = .);
rid[miss]=0;
end;

nbobsm=nrd-sum(int(rid[,+]/ncd));
bmiss=nrd-rid[+,] || nbobsm;

  /* Standardization */
a = j(nrd,ncd,.);
dstd = j(nrd,ncd,.);
a = (alpha**(1-e));
a = rid # a;
mu   = j(1,ncd,.);
sig2 = j(1,ncd,.);
do j=1 to ncd;
 t1=sum(u#w#a[,j]#dat[,j]);
 t2=sum(u#w#a[,j]);
 mu[j]=t1/t2;
 t3=sum(u#w#a[,j]#(dat[,j]-mu[j])##2);
 sig2[j]=t3/t2;
 if sig2[j] = 0 then do;
   imlflag=0;
   print "Variance of Variable_nr" j " is zero, Program terminated";
 end;
 else dstd[,j]=(dat[,j]-mu[j])/sqrt(sig2[j]);
end;
if imlflag = 1 then do;
dstd=dstd#rid;

 /* Covariance Matrix D  */
d=j(ncd,ncd,.);
do j=1 to ncd;
 do k=1 to j;
   t1=sum(u#w#a[,j]#a[,k]#dstd[,j]#dstd[,k]);
   t2=sum(u#w#a[,j]#a[,k]);
   d[j,k]=t1/t2;
   d[k,j]=d[j,k];
 end;
end;
dinv=inv(d);
eig=min(eigval(d));
if eig <= 0 then do;
 print "Covariance Matrix D is not positive definite, D' will be taken";

  /* Covariance Matrix D'  */
 d=j(ncd,ncd,.);
 do j=1 to ncd;
  do k=1 to j;
   t1=sum(u#w#a[,j]#a[,k]#dstd[,j]#dstd[,k]);
   t2=sum(u#w);
   d[j,k]=t1/t2;
   d[k,j]=d[j,k];
  end;
 end;
 dinv=inv(d);
end;

 /* mahalanobis distances, second outlier indicator */
md = j(nrd,1,.);
ut = u;
nc2=ncd#ncd;
do i=1 to nrd;
  ad=a[i,]; x=dstd[i,];
  s1=sum(shape((ad#x),ncd,ncd)#(shape((ad#x),ncd,ncd))`#dinv`);
  s2=sum(shape(ad,ncd,ncd)#(shape(ad,ncd,ncd))`);
  md[i] = nc2#(s1/s2);
  if eig <= 0 then md[i] = s1;
  if rw = 1 then do;
     g=c#cinv(0.683,ncd);
     if md[i] > g then ut[i] = 0; else ut[i] = 1;
  end;
end;

/* imputation process, nearest neighbor */
donor= j(nrd,2,0);
mdmin= j(nrd,1,.);
do i=1 to nrd;
if (any(rid[i,]#e[i,] = 0) | ut[i] = 0)  then do;
 donor[i,1] = 1;
 md = j(nrd,1,.);
 ai=a[i,];
  do h=1 to nrd;
   if h ^= i then do;
        /* Case 1 */
    if ut[i] = 0 | beta = 1 then do;
      c0 = ut[h]#sum(rid[h,]#e[h,]);
      if c0 = ncd then do;
         ah=a[h,];  dx=dstd[i,]-dstd[h,];
         s1=sum(shape((ai#ah#dx),ncd,ncd)#(shape((ai#ah#dx),ncd,ncd))`#dinv`);
         s2=sum(shape(ai#ah,ncd,ncd)#(shape(ai#ah,ncd,ncd))`);
         md[h] = (nc2#s1)/s2;
         if eig <= 0 then md[h] = s1;
      end;
    end;
        /* Case 2 */
    if ut[i] = 1 &  beta < 1 then do;
      c1 = ut[h]#sum(rid[i,]#e[i,]#rid[h,]#e[h,]);
      c2 = sum((1-rid[i,])#rid[h,]#e[h,]);
      c3 = sum((1-e[i,])#rid[h,]#e[h,]);
      sa = sum((1-rid[i,]));
      sb = sum((1-e[i,]));
      if c1 >= beta#ncd & c2 = sa & c3 = sb then do;
         ah=a[h,];  dx=dstd[i,]-dstd[h,];
         s1=sum(shape((ai#ah#dx),ncd,ncd)#(shape((ai#ah#dx),ncd,ncd))`#dinv`);
         s2=sum(shape(ai#ah,ncd,ncd)#(shape(ai#ah,ncd,ncd))`);
         md[h] = (nc2#s1)/s2;
         if eig <= 0 then md[h] = s1;
      end;
    end;
   end;
  end;
          /* nearest neighbor */
   if any(md ^= .)  then do;
     minim=min(md); h0=loc(md=minim);
     donor[i,2] = h0; mdmin[i]=minim;
     if ut[i] = 1 then do;
       do j=1 to ncd;
         if rid[i,j]#e[i,j] = 0 then dat[i,j] = dat[h0,j];
       end;
     end;
     if ut[i] < 1 then do;
         dat[i,] = dat[h0,];
     end;
   end;
end;
end;

impu=donor || mdmin;
data=dat || w || u || ut;

ridi=j(nrd,ncd,1);
if any(dat = .) then do;
miss = loc(dat = .);
ridi[miss]=0;
end;

nrobsm=nrd-sum(int(ridi[,+]/ncd));
rmiss=nrd-ridi[+,] || nrobsm;

ridi[,]=a[,]#w;
sw=w[+];
good=sw || ridi[+,];

msig2=j(4,ncd,.);
do j=1 to ncd;
 msig2[1,j]=mu[j];
 msig2[2,j]=sig2[j];
 t=sum(w);
 msig2[3,j]=(sum(w#dat[,j]))/t;
 msig2[4,j]=(sum(w#(dat[,j]-msig2[3,j])##2))/t;
end;

outl=j(1,2,.);
outl[1,1]=(u=0)[+]; outl[1,2]=(ut<1)[+];

robu=j(1,2,.);
robu[1,1]=sum(u#w); robu[1,2]=sum(ut#w);

create &out     from data  [colname=varname2];
append from data;

create covar    from d     [colname=varname1];
append from d;
create covarinv from dinv  [colname=varname1];
append from dinv;
create msig2    from msig2 [colname=varname1];
append from msig2;
create good     from good  [colname=varname3];
append from good;
create impu from impu      [colname=varname4];
append from impu;
create donor from donor    [colname=varname5];
append from donor;
create rmiss    from rmiss [colname=varname6];
append from rmiss;
create bmiss    from bmiss [colname=varname6];
append from bmiss;
create outl     from outl;
append from outl;
create robu     from robu;
append from robu;
end;
create imlflag    from imlflag;
append from imlflag;
run; quit;

data imlflag;
set imlflag;
call symput('imlflag',left(put(col1,1.)));
run;
%if &imlflag = 0 %then  %goto ende_x;
data txt1;
 length txt $ 18;  label txt='00'x;
 txt="mean before       ";  output;
 txt="variance before   ";  output;
 txt="mean after        ";  output;
 txt="variance after    ";  output;
run;
data txt2;
 length txt $ 7;  label txt='00'x;
 txt="before ";  output;
 txt="after  ";  output;
run;
data donor;
 set donor;
 obs_nr=_n_;
run;
data  miss;
 set bmiss  rmiss;
run;
data  miss;
 merge txt2 miss;
run;
data msig2;
 merge txt1 msig2;
run;
data empdon ;
 set donor end=letzt;
 keep n_empty;
 retain n_empty 0;
 if imputand =1  and  donor_obs_nr = 0 then n_empty+1;
 if letzt then output;
run;
data impu;
 set impu;
 obs_nr=_n_;
run;
data impu;
 set impu;
 drop imputed_obs;
 label obs_nr="Observation Nr"
       donor="Donor Observation"
       minimal_distance="Minimal Malahanobis Distance";
 if imputed_obs = 0 and donor = 0 then delete;
run;
proc freq data=donor noprint;
     where donor_obs_nr > 0;
     tables donor_obs_nr / out=tab(drop=percent) nocol nocum;
run;
ods listing close;
ods output extremeObs=ext;
proc univariate data=tab ;
     id    donor_obs_nr;
     var   count;
run;
ods output close;
ods listing;

options nodate pageno=1 nonumber  ps=50 ls=96;
title1 "weighted imPutation for Outliers, Edit failures and Missing values : POEM ";
title2 "  ";
title3 "Data File: &data1   Edit File: &data2   Output File: &out";
title4 "alfa=&alfa  beta=&beta c=&c  reweight=&reweight ";
title5 "  ";
title6 "Number of  missing values per variable before and after processing";
title7 ">> if number of missing values 'after' are not all 0 then beta perhaps too severe";
proc print data=miss noobs uniform label;
     format &var_list  6.;
run;
title6 "Number of good values per variable";
title7 " ";
proc print data=good noobs uniform;
     format _numeric_   6.;
run;
title6 "Mean and Variance per variable before and after imputation";
title7 " ";
proc print data=msig2 noobs uniform label;
     format &var_list   12.6;
run;
title6 "Covariance Matrix";
title7 " ";
proc print data=covar noobs uniform;
     format _all_   12.4;
run;
title6 "Inverse Covariance Matrix";
title7 " ";
proc print data=covarinv noobs uniform;
     format _all_   12.4;
run;
title6 "Number of outliers";
title7 " ";
proc print data=outl  noobs uniform label;
     label col1="u < 1" col2="utilde < 1";
     format _all_   12.;
run;
title6 "Total Robustness Weights";
title7 " ";
proc print data=robu  noobs uniform label;
     label col1="Sum(w * u)" col2="Sum(w * utilde)";
     format _all_   18.4;
run;
title6 "Number of empty donor sets";
title7 " ";
proc print data=empdon noobs uniform ;
     format _all_   6.;
run;
title6 "Observations with higher frequencies as donors";
title7 " ";
proc print data=ext noobs uniform label split='*';
     where high > 1;
     label high="was used as donor*frequency"
           donor_obs_nr_high="obs nr of the donor";
     var   donor_obs_nr_high high ;
run;
title6 "Imputation History";
title7 " ";
proc print data=impu noobs uniform label;
     var    obs_nr donor minimal_distance;
     format obs_nr donor 6. minimal_distance 12.4 ;
run;

%ende_x:
%mend;




%poem(data1=mev7.bushfiretrc,var_list=v1 v2 v3 v4 v5,gew=w,outlier=u,
      alfa=0.5,beta=0.2,c=5,data2=mev7.bushfiremwe,out=bushfirepoem,reweight=1);

                              /*
%poem(data1=mev7.philipstrc,var_list=v1 v2 v3 v4 v5 v6 v7 v8,gew=w,outlier=u,
      alfa=0.5,beta=0.3,c=3,data2=mev7.philipsmwe,out=philipspoem,reweight=0);
                              */
