Input values
n number of observations
m number of variables
From perturbed differences file
trow - row index
tcol column index
tval true value
pval perturbed value
From imputed differences file
irow row index
icol column index
oval original value in perturbed data file (may be
true or
perturbed value)
ival imputed value
sumwts -
sum of weights
t1[j] - weighted sum of true values
&t2[j] -
weighted sum of true values squared
wt_mean[j] -
weighted mean
wt_var[j]) -
weighted variance
varD[j] - jacknife variance (Vw(Y))
iqr[j] -
interquartile range
Information on the categories for categorical
variables is also input from the summary statistics file.
for j=0
to j = m-1 Definition
{
a[j] = 0 number accept/correct
b[j] = 0 number reject/correct
c[j] = 0 number accept/incorrect d[j]
= 0 number reject/incorrect
nimp[j] = 0 number imputed
D[j] = 0.0 yhat - ystar
D2[j] = 0.0 (yhat-ystar)^2
Dmin[j] = Big min(yhat-ystar)
Dmax[j] = -Big max(yhat-ystar)
Dcat[j] = 0.0 weight*(cat(y)-cat(ystar) [c] only
EY1[j] = 0.0 - weight*ystar (+ weight*y in [c])
EY2[j] = 0.0 - weight*ystar^2 (+ weight*y*y in [c])
saw[j] = 0.0 weights for [a]
sbw[j] = 0.0 weights for [b]
scw[j] = 0.0 weights for [c]
sdw[j] = 0.0 weights for [d]
DL1[j] = 0.0 weight*abs(yhat-ystar)
DL2[j] = 0.0 weight*(yhat-ymin)^2
DLi[j] = 0.0 max(weight*(yhat-ymin)
m1[j] = 0.0 weight*(yhat-ystar)
m2[j] = 0.0 weight*(yhat^2-ystar^2)
Dcount[j] = 0.0 category(yhat)==category(ystar)
Dgen[j] = 0.0 abs(category(yhat)-category(ystar))
wt_sum[j] = sumwts sum of weights for yhat=ystar
}
Note in the algorithm ++
means increment value by 1 i.e., if the value of d[2] = 20 the d[2]++ changes
the value to 21.
Distance(x,y) is block
matrix distance. If a returned category
is not valid the observation is skipped and the number of such errors reported.
Get perturbed and impute values
for i=0
to i = n-1
{
get weight for ith observation: wts
(set
to 1.0 for unweighted)
for j = 1 to m-1
{
if
no changes to i,j
{
Correctly accept [a]
a[j]++
saw[j]
= saw[j] + wts
G
= G + probi,j
}
else if perturbed and imputed both report
change
{
if
pval = miss_val
{
if
no imputation SKIP VALUE
}
else
{
d[j]++
E1
= 0
YY
= 0
G
= G + (1.0-probi,j)
if
continuous and applicable
{
EY1[j] = EY1[j] - wts*tval
EY2[j] = EY2[j] - wts*tval*tval
}
}
if
imputations and ival=miss_val
{
nmm++
SKIP
VALUE
}
if
tval not applicable or ival not applicable
{
goto
SKIP_VALUE
}
if
imputations and variable continuous
{
nimp[j]++
sdw[j]
= sdw[j] + wts
Store values tval,ival,wts as (y,x,wt)
m1[j]
= m1[j] + wts*(tval-ival)
m2[j]
= m2[j] +
wts*(tval*tval-ival*ival)
if
ival != tval
{
tempR = abs(ival-tval)
DL1[j]
= DL1[j] + wts*tempR
DL2[j]
= DL2[j] +
wts*tempR*tempR
if
tempR > DLi[j] then
DLi[j]
= tempR
wt_sum[j] = wt_sum[j] - wts
Drop tval
W = -wts/(sumwts-wts)
tempR = (tval-wt_mean[j])
wt_mean[j] = wt_mean[j] +
tempR*W
wt_var[j]
= wt_var[j]+
tempR*tempR*W*sumwts
Add ival
W = wts/sumwts
tempR = (ival-wt_mean[j])
wt_mean[j] = wt_mean[j] +
tempR*W
wt_var[j]
= wt_var[j] +
tempR*tempR*W*(sumwts-wts)
}
}
if
imputations and categorical variable
{
if
tval = ival
{
Dcount[j] = Dcount[j] + 1.0
nimp[j]++
}
else
{
increment
table count
if tval valid
{
Dgen[j] = Dgen[j] +
Distance(tval,ival)
}
}
}
get
next perturbed value and impute value
}
else if only imputed report change
{
Incorrectly reject (impute) [b]
b[j]++
sbw[j]
= sbw[j] + wts
E1
= 0
G
= G + probi,j
if
continuous variable
{
if
oval valid
{
EY1[j] = EY1[j] - wts*oval
EY2[j] = EY2[j] - wts*oval*oval
}
}
if
imputations and ival==miss_val
{
nmm++
SKIP
VALUE
}
if
oval not applicable or ival not applicable
{
SKIP
VALUE
}
if
imputations and continuous variable {
nimp[j]++
Store values oval, ival, wts as (y,x,wt)
m1[j]
= m1[j] + wts*(oval-ival)
m2[j]
= m2[j] +
wts*(oval*oval-ival*ival)
tempR
= abs(ival-oval)
DL1[j]
= DL1[j] + wts*tempR
DL2[j]
= DL2[j] + wts*tempR*tempR
if
(tempR>DLi[j]) then
DLi[j] = tempR
wt_sum[j]
= wt_sum[j] - wts
Drop oval
W
= -wts/(sumwts-wts)
tempR
= (oval-wt_mean[j])
wt_mean[j]
= wt_mean[j] + tempR*W
wt_var[j] = wt_var[j] +
tempR*tempR*W*sumwts
Add ival
W
= wts/sumwts
tempR
= (ival-wt_mean[j])
wt_mean[j]
= wt_mean[j] + tempR*W
wt_var[j] = wt_var[j] +
tempR*tempR*W*(sumwts-wts)
}
if
imputations and categorical variable
{
if
tval applicable
Dgen[j] = Dgen[j] +
Distance(oval,ival)
nimp[j]++
increment
table count
}
get
next imputed value
}
else if only perturbed change reported
{
if
pval = miss_val
{
if
imputation then nmm++
SKIP
VALUE
}
if
tval not applicable or pval not applicable
{
SKIP
VALUE
}
G
= G + (1.0-probi,j)
if
continuous variable
{
EY1[j] = EY1[j] - wts*(tval-pval)
EY2[j] = EY2[j] - wts*(tval*tval
pval*pval)
}
c[j]++
scw[j]
= scw[j] + wts
YY
= 0
if
continuous variable
{
tempR = pval-tval
D[j] = D[j] + wts*tempR
D2[j] = D2[j] + wts*tempR*tempR
if tempR < Dmin[j] then Dmin[j] = tempR
if
tempR > Dmax[j]then Dmax[j] = tempR
wt_sum[j]
= wt_sum[j] - wts
Drop tval
W
= -wts/(sumwts-wts)
tempR
= tval-wt_mean[j]
wt_mean[j]
= wt_mean[j] + tempR*W
wt_var[j] = wt_var[j] +
tempR*tempR*W*sumwts
Add pval
W
= wts/sumwts
tempR
= pval-wt_mean[j]
wt_mean[j]
= wt_mean[j] + tempR*W
wt_var[j] = wt_var[j] +
tempR*tempR*W*(sumwts-wts)
}
if
categorical variable
{
if
ordinal
{
Dcat[j] = Dcat[j] +
Distance(tval,pval)*wts
}
else
{
Dcat[j] = Dcat[j] + wts
}
get
next perturbed value
}
}
}
Case wide statistics
if E1==1 and YY==1
{
ra++
}
else if E1 = 1
{
rc++
}
else if YY = 1
{
rb++
}
else
{
rd++
}
}
for j = 1
to m-1
{
if (c[j]>0)
{
alpha = c[j]/(c[j]+d[j])
}
else
{
alpha = 0.0
}
if (b[j]>0)
{
beta = b[j]/(a[j]+b[j])
}
else
{
beta = 0.0
}
rn = a[j] + b[j] + c[j] + d[j]
delta = (b[j]+c[j])/rn
if continuous variable
{
RAE = D[j]/t1[j]
RRASE =
sqrt(D2[j])/t1[j]
RER =
(Dmax[j]-Dmin[j])/iqr[j]
tj =
D[j]/sqrt(varD[j])
R = sumwts/(saw[j]+scw[j])
EY1[j] = (R-1.0) + (EY1[j]/t1[j])*R
AREm1 = abs(EY1[j])
EY2[j] = (R-1.0) + (EY2[j]/t2[j])*R
AREm2 = abs(EY2[j])
}
else
{
Dcat = Dcat[j]/rn
tj = Dcat[j]/sqrt(varD[j])
}
if imputation and continuous
{
wimp = sbw[j] + sdw[j]
if (wimp>0)
{
dL1
= DL1[j]/wimp
dL2
= sqrt(DL2[j]/wimp)
dLinf
= nimp[j]*DLi[j]/wimp
}
if nimp[j] > 1
{
Retrieve
values (y,x,wt) for variable
Call
regression(y,x,wt)
Compute K-S statistics
sort(x)
sort(y)
ksa = 0.0
ks1
= 0.0
ks2
= 0.0
i
= 0
k
= 0
fnx
= 0.0
fny
= 0.0
if
x[0] < y[0]
{
t0
= floor(x[0])
}
else
{
t0 = floor(y[0])
}
if
( x[nimp[j]-1] > y[nimp[j]-1]
{
t2n = x[nimp[j]-1]
}
else
{
t2n = y[nimp[j]-1]
}
x[nimp[j]]
= Big
y[nimp[j]] = Big
point = t0
while i < nimp[j] or k
< nimp[j]
{
if
(x[i] < y[k])
{
fnx = (i+1)/nimp[j]
step = x[i] - point
point = x[i]
++i
}
else
if (y[k] < x[i])
{
fny = (k+1)/nimp[j]
step = y[k] - point
point = y[k]
++k
}
else
{
fnx = (i+1)/nimp[j]
++i
fny = (k+1)/nimp[j]
++k
while i<nimp[j] and x[i]=x[i-1])
{
fnx = (i+1)/nimp[j]
++i
}
while k<nimp[j] and y[k]=y[k-1])
{
fny = (k+1)/nimp[j]
++k
}
if x[i] <= y[k]
{
step = x[i] - point
point = x[i]
}
else
{
step = y[k] - point
point = y[k]
}
}
tempR = abs(fnx-fny)
if (tempR > ksa)
ksa = tempR
ks1
= ks1 + tempR*step
ks2
= ks2 + tempR*tempR*step
}
if
x[nimp[j]] > y[nimp[j]]
{
point = x[nimp[j]]
}
else
{
point = y[nimp[j]]
}
step
= t2n - t0
K-S
= ksa
if
(step>0.0)
{
if
ks1 > 0
{
K-S_1 = ks1/step
}
else
{
K-S_1 = -ks1/step
}
K-S_2
= ks2/step
}
m_1
= abs(m1[j])/wimp
m_2
= abs(m2[j])/wimp
}
if wt_sum[j] > 0.0
{
tempR
= wt_mean[j] - true_mean[j]
MSE
= wt_var[j]/sumwts/wt_sum[j] +
tempR*tempR
}
}
else if imputation and categorical
{
Extract
vectors R and S and matrix T from
values
stored in tables and forming vec = R-S and
matrix = diag(R+S)-T-Tt
remove
not applicable and empty rows and columns
calculate choleski of matrix (in situ)
solve matrix with vec to give vec (in
situ)
W = 0.0
for (i=0 i<k i++)
{
W = W + vec[i]*vec[i]
}
W = W
D = 1.0 - Dcount[j]/nimp[j]
tempR =
sqrt(Dcount[j]/(double)(nimp[j]*nimp[j]))
tempR = (1.0 - Dcount[j]/nimp[j]) -
2.0*tempR
if (tempR > 0.0)
{
Eps
= tempR
}
else
{
Eps
= 0.0
}
if variable is ordinal
{
tempR
= number of true applicable categories
-1
Dgen
= 0.5+0.5*(Dgen[j]/tempR-
count[j])/nimp[j]
}
}
}
}
if
probabilities supplies
{
G = G/(2*m*n)
}
if
(rc>0)
{
A = rc/(rc+rd)
}
else
{
A = 0.0
}
if
(rb>0)
{
B = rb/(ra+rb)
}
else
{
B = 0.0
}
C
= (rb+rc)/n
Input y, x, optional wt, n = length(y), c = 2.0, tol
= 0.00001
Note: this will return errors if x values are
identical or if scale is zero (see mad function)
scale =
0.0
beta =
0.0
while
iterations < 50
{
ssx = 0.0
cxy = 0.0
for i = 0 to n-1
{
res = scale*abs(y[i]-beta*x[i])
if (res > c)
{
wwt
= c/res
}
else
{
wwt
= 1.0
}
if weights
wwt
= wwt*wt[i]
xi = x[i]
yi = y[i]
ssx = ssx+wwt*xi*xi
cxy = cxy+wwt*xi*yi
}
if ssx = 0.0
{
return with error
}
beta = cxy/ssx
if iter = 1
{
beta0 = beta
}
else
{
if abs(beta-beta0) < tol then
exit
iterations
}
for i = 0 t n-1
{
r[i] = y[i] - beta*x[i]
}
call mad with input r return scale
if scale = 0.0
{
return with error
}
scale = 1.0/scale
}
Slope
= beta
t_val =
(beta-1.0)*scale*sqrt(ssx)
Compute R^2 values
ybar
= 0.0
xbar =
0.0
if
weights
{
wtsum = 0.0
for i = 0
to n-1
{
ybar = ybar + y[i]
xbar = xbar + x[i]
wtsum = wtsum + wt[i]
}
if wtsum <= 0.0
{
return with error
}
ybar = ybar/wtsum
xbar = xbar/wtsum
ssx = 0.0
ssy = 0.0
cxy = 0.0
rsum = 0.0
for i = 0
to n-1
{
xi = x[i] - xbar
yi = y[i] - ybar
ssx = ssx + wt[i]*xi*xi
ssy = ssy + wt[i]*yi*yi
cxy = cxy + wt[i]*xi*yi
res = y[i] - beta*x[i]
rsum = rsum + wt[i]*res*res
}
}
else
{
for i=0 to n-1
{
ybar = ybar + y[i]
xbar = xbar + x[i]
}
wtsum = n
ybar = ybar/wtsum
xbar = xbar/wtsum
ssx = 0.0
ssy = 0.0
cxy = 0.0
rsum = 0.0
for i = 0 to n-1
{
xi = x[i] - xbar
yi = y[i] - ybar
ssx = ssx + xi*xi
ssy
= ssy + yi*yi
cxy = cxy + xi*yi
res = y[i] - beta*x[i]
rsum = rsum + res*res
}
}
if ssy
non-zero and ssx non-zero
{
R^2 = cxy*cxy/(ssx*ssy)
}
mse =
rsum/(double)(n-1)
mad function
input y, n = length(y), return scale
Note if the (n+1)/2 middle observations are
identical this will return scale = zero
if (n ==
2)
{
xme = (y[0] + y[1]) / 2.0
if (y[0] > y[1])
{
xmd = y[0] - xme
}
else
{
xmd = y[1] - xme
}
scale = xmd / .6744897501962755
}
else if n
> 2
{
rank y into rank
convert rank into index
km = (n + 1) / 2 - 1
xme = y[rank[km]]
if n is even
{
xme
= 0.5*(xme + y[rank[km+1]])
}
k = -1
k1 = km
k2 = km
x1 = 0.0
x2 = 0.0
Loop:
if k < km
{
k++
if (x1 > x2)
{
k2++
if
k2 <= n
{
x2
= y[rank[k2]] - xme
goto
Loop
}
}
else
{
k1--
if
k1 >= 0
{
x1
= xme - y[rank[k1]]
goto
Loop
}
}
}
if x1 < x2
{
xmd = x1
}
else
{
xmd = x2
}
scale = xmd / .6744897501962755
}