Nothing
TrueTrait=function(datX, y,datXtest=NULL, corFnc = "bicor", corOptions = "use = 'pairwise.complete.obs'", LeaveOneOut.CV=FALSE,skipMissingVariables=TRUE,addLinearModel=FALSE){
datX=as.matrix(datX)
no.variables=dim(as.matrix(datX))[[2]]
datVariableInfo=data.frame(matrix(NA, nrow=no.variables, ncol=4))
names(datVariableInfo)=c("Variable","center", "scale", "weights.y.true2")
if ( is.null(colnames(datX))){datVariableInfo$Variable=1:no.variables} else { datVariableInfo$Variable=colnames(datX)}
no.observations=dim(as.matrix(datX))[[1]]
if (no.observations != length(y) ) {stop("The number of rows of datX does not correspond to the length of y. Consider transposing your input matrix or use a different input.") }
if (no.observations==1 ) {
warning("Only 1 observations, i.e. the length of y is 1. The function cannot be used. For your convenience, the estimated true values will be set to the input value of y.")
y.true1=y; y.true2=y; y.true3=y}
if (no.observations>1 ) {
y.true1=rep(NA, length(y) )
y.true2=rep(NA, length(y) )
y.true3=rep(NA, length(y) )
y.lm=rep(NA, length(y) )
restNonMissingY= !is.na(y)
r.characteristic=NA
SD.ytrue2=NA
SD.ytrue3=NA
SsquaredBE=NA
if (sum(restNonMissingY,na.rm=TRUE) >3 ){
corX = parse(text = paste(corFnc, "(datX,y ",prepComma(corOptions), ")"))
rVector= as.numeric(eval(corX))
datCoef=t(coef(lm(datX~ y,na.action="na.exclude")))
datVariableInfo$center=datCoef[,1] #intercept
datVariableInfo$scale=datCoef[,2] #slope
datXscaled=scale(datX,center=datCoef[,1],scale=datCoef[,2] )
weights0=rVector^2/((1-rVector^2)*var(y,na.rm=TRUE)) # Steve, this is where I made the one change
weights=weights0/sum(weights0)
datVariableInfo$weights.y.true2=weights
y.true1=as.numeric(apply(as.matrix(datXscaled),1,mean))
y.true2=as.numeric(as.matrix(datXscaled)%*%weights)
if (skipMissingVariables ) {
y.true1= as.numeric(apply(as.matrix(datXscaled),1,mean,na.rm=TRUE))
weightsMatrix=matrix(weights,byrow=TRUE,nrow=dim(as.matrix(datXscaled))[[1]],ncol=length(weights) )
weightsMatrix[is.na(datXscaled)]=0
rowsum.weightsMatrix=apply(as.matrix(weightsMatrix),1,sum)
weightsMatrix=t(scale(t(as.matrix(weightsMatrix)),center=F,scale= rowsum.weightsMatrix))
datXscaledweighted= as.matrix(datXscaled* weightsMatrix)
# this corresponds to formula 25 in Klemera et al 2006
y.true2=as.numeric(apply(datXscaledweighted,1,sum,na.rm=TRUE))
} #end of if (skipMissingVariables )
# the following is different from Klemera in that it has an absolute value
r.characteristic=sum(rVector^2/sqrt(1-rVector^2) )/sum(abs(rVector)/sqrt(1-rVector^2) )
no.missing=sum(apply( as.matrix( is.na(datX)),1,sum))
if (sum(no.missing)>0) {warning("The input datX contains missing values.\n
I recommend you impute missing values in datX before running this function. ")
} # end of if (sum(no.missing)>0)
# formula 37 from Klemera
SsquaredBE=var( y.true2-y,na.rm=TRUE) -(1- r.characteristic^2)/r.characteristic^2*var(y,na.rm=TRUE)/no.variables
# this corresponds to formula 34 in Klemera
y.true3=(as.numeric( as.matrix(datXscaled)%*% weights0)+y/SsquaredBE )/( sum(weights0)+ 1/SsquaredBE)
y.true3[is.na(y.true3) ]=y[is.na(y.true3)]
} # end of if (no.observations>1 )
SD.ytrue2=sqrt(1-r.characteristic^2)/r.characteristic*sqrt(var(y,na.rm=TRUE)/no.variables)
# now formula 42
SD.ytrue3=SD.ytrue2/sqrt(1+SD.ytrue2^2/SsquaredBE )
} # end of if (sum(restNonMissingY,na.rm=TRUE) >3 )
datEstimates=data.frame(y, y.true1,y.true2,y.true3)
if (!is.null(datXtest)){
datXtest=as.matrix(datXtest)
no.variablestest=dim(as.matrix(datXtest))[[2]]
if (no.variablestest != no.variables) {stop("the number of variables in the test data is not the same as in the training data")}
y.true1test=rep(NA, length(y) )
y.true2test=rep(NA, length(y) )
y.true3test=rep(NA, length(y) )
restNonMissingY= !is.na(y)
if (sum(restNonMissingY,na.rm=TRUE) >3 ){
datXtestscaled=scale(datXtest,center=datCoef[,1],scale=datCoef[,2] )
y.true1test=as.numeric(apply(as.matrix(datXtestscaled),1,mean) )
y.true2test=as.numeric( as.matrix(datXtestscaled)%*%weights)
if (skipMissingVariables ) {
y.true1test= as.numeric(apply(as.matrix(datXtestscaled),1,mean,na.rm=TRUE))
weightsMatrixtest=matrix(weights,byrow=TRUE,nrow=dim(as.matrix(datXtestscaled))[[1]],ncol=length(weights) )
weightsMatrixtest[is.na(datXtestscaled)]=0
rowsum.weightsMatrixtest=apply(as.matrix(weightsMatrixtest),1,sum)
weightsMatrixtest=t(scale(t(as.matrix(weightsMatrixtest)),center=F,scale= rowsum.weightsMatrixtest))
datXscaledweightedtest= as.matrix(datXtestscaled* weightsMatrixtest)
# this corresponds to formula 25 in Klemera et al 2006
y.true2test=as.numeric(apply(datXscaledweightedtest,1,sum,na.rm=TRUE))
} #end of if (skipMissingVariables )
} # end of if (sum(restNonMissingY,na.rm=TRUE) >3 )
datEstimatestest=data.frame(y.true1= y.true1test,y.true2= y.true2test)
} # end of if (!is.null(datXtest))
if ( LeaveOneOut.CV ) {
y.true1test.LOO=rep(NA,no.observations)
y.true2test.LOO=rep(NA,no.observations)
y.lmLOO= rep(NA,no.observations)
for ( i in 1:no.observations ){
rm(datCoef); rm(corX);
datX.LOO=datX[-i,]
datXtest.LOO= matrix(datX[i,],nrow=1)
y.LOO=y[-i]
no.variables=dim(as.matrix(datX.LOO))[[2]]
no.observations=dim(as.matrix(datX.LOO))[[1]]
if (no.observations==1 ) {
warning("When dealing with leave one out cross validation, there is only 1 observations in the training data")}
if (no.observations>1 ) {
if (addLinearModel) {
lmLOO=lm(y.LOO~., data=data.frame(datX.LOO),na.action=na.exclude)
y.lmLOO[i]= sum(datXtest.LOO*lmLOO$coeff[-1])+lmLOO$coeff[[1]]
}
corX = parse(text = paste(corFnc, "(datX.LOO,y.LOO ",
prepComma(corOptions), ")"))
rVector= as.numeric(eval(corX))
datCoef=t(coef(lm(datX.LOO~ y.LOO,na.action="na.exclude")))
datX.LOOscaled=scale(datX.LOO,center=datCoef[,1],scale=datCoef[,2] )
weights0=rVector^2/(1-rVector^2)
weights=weights0/sum(weights0)
datXtest.LOOscaled=(datXtest.LOO-datCoef[,1])/datCoef[,2]
y.true1test.LOO[i]= mean(datXtest.LOOscaled)
y.true2test.LOO[i]=sum(datXtest.LOOscaled*weights)
if (skipMissingVariables ) {
y.true1test.LOO[i]= mean(datXtest.LOOscaled,na.rm=TRUE)
weightsMatrixLOO= weights
weightsMatrixLOO[is.na(datXtest.LOOscaled)]=0
rowsum.weightsMatrixLOO=sum(weightsMatrixLOO)
weightsMatrixLOO=weightsMatrixLOO/rowsum.weightsMatrixLOO
datXscaledweightedLOO= datXtest.LOOscaled* weightsMatrixLOO
y.true2test.LOO[i]=sum(datXscaledweightedLOO,na.rm=TRUE)
} #end of if (skipMissingVariables )
} # end of for loop
} # end of if (no.observations>1 )
datEstimates.LeaveOneOut.CV=data.frame(y.true1= y.true1test.LOO,
y.true2= y.true2test.LOO)
} # end of if ( LeaveOneOut.CV )
if (addLinearModel) {
y.lmTest=rep(NA, dim(as.matrix(datXtest))[[1]] )
restNonMissingY= !is.na(y)
if (sum(restNonMissingY,na.rm=TRUE) >3 ){
lm1=lm(y~., data=data.frame(datX),na.action=na.exclude)
y.lmTraining=predict(lm1)
y.lmTraining=predict(lm1)
if( !is.null(datXtest)) {y.lmTest=predict(lm1,newdata=data.frame(datXtest))}
}
}
if ( !is.null(datXtest) & LeaveOneOut.CV & !addLinearModel ) {out= list( datEstimates=datEstimates, datEstimatestest= datEstimatestest,
datEstimates.LeaveOneOut.CV= datEstimates.LeaveOneOut.CV, SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( !is.null(datXtest) & !LeaveOneOut.CV & !addLinearModel) {out= list( datEstimates=datEstimates, datEstimatestest= datEstimatestest, SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( is.null(datXtest) & LeaveOneOut.CV & !addLinearModel) {out= list( datEstimates=datEstimates, datEstimates.LeaveOneOut.CV= datEstimates.LeaveOneOut.CV, SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( is.null(datXtest) & !LeaveOneOut.CV & !addLinearModel) {out= list( datEstimates=datEstimates, SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( !is.null(datXtest) & LeaveOneOut.CV & addLinearModel ) {out= list( datEstimates=data.frame(datEstimates, y.lm=y.lmTraining), datEstimatestest= data.frame(datEstimatestest, y.lm=y.lmTest),
datEstimates.LeaveOneOut.CV= data.frame(datEstimates.LeaveOneOut.CV,y.lm=y.lmLOO), SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( !is.null(datXtest) & !LeaveOneOut.CV & addLinearModel) {out= list( datEstimates=data.frame(datEstimates,y.lm=y.lmTraining), datEstimatestest= data.frame(datEstimatestest,y.lm=y.lmTest), SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( is.null(datXtest) & LeaveOneOut.CV & addLinearModel) {out= list( datEstimates=data.frame(datEstimates,y.lm=y.lmTraining), datEstimates.LeaveOneOut.CV= data.frame(datEstimates.LeaveOneOut.CV,y.lm=y.lmLOO),
SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
if ( is.null(datXtest) & !LeaveOneOut.CV & addLinearModel) {out= list( datEstimates=data.frame(datEstimates,y.lm=y.lmTraining), SD.ytrue2=SD.ytrue2, SD.ytrue3=SD.ytrue3, datVariableInfo= datVariableInfo ) }
out
} # end of function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.