#' @import stats
NULL
#' Estimating equation approach to the proportion of the explained variation,
#' and the explained variation.
#'
#' This function estimates:
#' (1). the proportion of the explained variation
#' (2). the explained variation
#' by covariates in a linear model assuming the covariates are independent.
#'
#' @param y outcome: a vector of length n.
#' @param x covariates: a matrix of nxp dimension.
#' @param lam parameter for altering the weighting matrix.
#' @param niter number of iterations for updating the lam parameter
#' @param alpha a vector of type I error for create the confidence intervals.
#'
#' @details Both point estimate and confidence intervals
#' are computed. Two set of confidence intervals under normal or non-normal error are computed.
#'
#' @return Estimate of the proportion of explained variation, variance estimates, and confidence intervals,
#' under normality and non-normality assumptions.
#'
#' @return Estimate of the explained variation, variance estimates, and confidence intervals
#' under normality and non-normality assumptions.
#'
#' @references Chen, H.Y. (2022). Statistical inference on explained variation in high-dimensional
#' linear model with dense effects. arXiv:2201.08723
#' @references Chen, H. Y., Li, H., Argos, M., Persky, V. W., and Turyk, M. (2022). Statistical Methods
#' for Assessing Explained Variation of a Health Outcome by Mixture of Exposures. International Journal
#' of Environmental Research and Public Health.
#'
#' @examples \dontrun{RVee(y,x)}
#'
#'@export
RVee=function(y,x,lam=1,niter=0,alpha=c(0.05)){
n=dim(x)[1]
p=dim(x)[2]
#1. Standardization
for(j in 1:p){
mu=mean(x[,j])
sdx=sd(x[,j])
x[,j]=(x[,j]-mu)/sdx
}
sdy=sd(y)
y=(y-mean(y))/sdy
#2. Sigular value decomposition
Xsvd=svd(x,nv=0) #nv=0 means not computing v matrix
# singular value decomposition
# $u%*%diag($d)%*%t($v)=X, t($u)%*%$u=I, t($v)%*%$v=I
uy=t(Xsvd$u)%*%y
Mev=Xsvd$d^2/p #Vector of eigenvalues of matrix XX'/p.
Wev=(Mev-1)/(1+lam*Mev)^2 #vector of eigenvalues of weight matrix
u1=t(Xsvd$u)%*%rep(1,n)
if(n>p){
com=sum(u1^2*(Wev+1))/n-1 #negligible
num=sum(uy^2*(Wev+1))-sum(y^2)-sum(Wev)+n-p
den=sum(Wev*(Mev-1))+n-p
}else{
com=sum(u1^2*Wev)/n #negligible
num=sum(uy^2*Wev)-sum(Wev)
den=sum(Wev*(Mev-1))
}
r2=(num+com)/(den+com)
r2=min(1,max(0,r2)) # initial value
#print(c(r2,num/den))
if(niter>0){
for(ii in 1:niter){ #iteration to update lambda
if(ii>0 & r2<1){lam=r2/(1-r2)}
Wev=(Mev-1)/(1+lam*Mev)^2 #vector of eigenvalues of weight matrix
#3. Compute the estimators
u1=t(Xsvd$u)%*%rep(1,n)
if(n>=p){
#com=sum(u1^2*(Wev+1))/n-1
num=sum(uy^2*(Wev+1))-sum(y^2)-sum(Wev)+n-p
den=sum(Wev*(Mev-1))+n-p
}else{
#com=sum(u1^2*Wev)/n
num=sum(uy^2*Wev)-sum(Wev)
den=sum(Wev*(Mev-1))
}
r2=(num+com)/(den+com)
r2=min(1,max(0,r2)) #The proportion of explained variation
}}
vy=sdy*sdy
s2=vy*r2 #The explained variation
#4. estimate variance of the estimators
#4(a). With normal random error assumption
Dev=Wev*Mev
D1=sum(Dev)/n
if(n>=p){
D2=(sum(Wev)-n+p)/n
A0=2*p*var(Dev)
A01=2*p*cov(Dev,Mev)
A1=2*p*var(Mev)
B=sum(Wev^2*Mev)
S=sum(Wev^2)+n-p
W=Xsvd$u%*%diag(Wev+1)%*%t(Xsvd$u)-diag(rep(1,n))
T=sum(diag(W^2))
}else{
D2=sum(Wev)/n
A0=2*n*(mean(Dev^2)-(mean(Dev))^2*n/p)
A01=2*n*(mean(Dev*Mev)-mean(Dev)*mean(Mev)*n/p)
A1=2*n*(mean(Mev^2)-(mean(Mev))^2*n/p)
B=sum(Wev^2*Mev)
S=sum(Wev^2)
W=Xsvd$u%*%diag(Wev)%*%t(Xsvd$u)
T=sum(diag(W^2))
}
DELTA=r2*D1+(1-r2)*D2
vestr2n=r2^2*(A0-2*DELTA*A01+DELTA^2*A1)
vestr2n=vestr2n+4*r2*(1-r2)*(B-2*DELTA*D1*n+n*DELTA^2)
vestr2n=vestr2n+2*(1-r2)^2*(S-2*DELTA*D2*n+n*DELTA^2)
vests2n=r2^2*(A0-2*D2*A01+D2^2*A1)
vests2n=vests2n+4*r2*(1-r2)*(B-2*n*D1*D2+n*D2^2)
vests2n=vests2n+2*(1-r2)^2*(S-D2^2*n)
vests2n=vests2n*vy^2
#4(b). Without normal random error assumption
M=Xsvd$u%*%diag(Mev)%*%t(Xsvd$u)
veps2=mean((y^2-1-(diag(M)-1)*r2)^2)-4*r2*(1-r2)-2*r2^2
vestr2=vestr2n+(T-2*DELTA*D2*n+n*DELTA^2)*(max(veps2,0)-2*(1-r2)^2)
vests2=vests2n+vy^2*(T-D2^2*n)*(max(veps2,0)-2*(1-r2)^2)
# proportion of explained variation
len=length(alpha)
vestr2=vestr2/den^2
cir2=r2+sqrt(vestr2)*qnorm(c(alpha/2,1-alpha/2))
cir2[c(1:len)]=cir2[c(1:len)]*(cir2[c(1:len)]>0)
cir2[len+c(1:len)]=cir2[len+c(1:len)]*(cir2[len+c(1:len)]<1)+1.0*(cir2[len+c(1:len)]>=1)
vestr2n=vestr2n/den^2 #under normal error
cir2n=r2+sqrt(vestr2n)*qnorm(c(alpha/2,1-alpha/2))
cir2n[c(1:len)]=cir2n[c(1:len)]*(cir2n[c(1:len)]>0)
cir2n[len+c(1:len)]=cir2n[len+c(1:len)]*(cir2n[len+c(1:len)]<1)+1.0*(cir2n[len+c(1:len)]>=1)
# explained variation
vests2=vests2/den^2
cis2=s2+sqrt(vests2)*qnorm(c(alpha/2,1-alpha/2))
cis2[c(1:len)]=cis2[c(1:len)]*(cis2[c(1:len)]>0)
vests2n=vests2n/den^2 #under normal error
cis2n=s2+sqrt(vests2n)*qnorm(c(alpha/2,1-alpha/2))
cis2n[c(1:len)]=cis2n[c(1:len)]*(cis2n[c(1:len)]>0)
#5. output result
ind=rep(0,2*len)
ind[2*c(1:len)-1]=c(1:len)
ind[2*c(1:len)]=len+c(1:len)
list(c(r2,vestr2,vestr2n),cir2[ind],cir2n[ind],
c(s2,vests2,vests2n),cis2[ind],cis2n[ind])
#[[1]]==> R2: estimate, estimated variance, estimated variance under normal error.
#[[2]]==>confidence interval for R2
#[[3]]==>confidence interval for R2 under normal error.
#[[4]]==> sigma_s^2: estimate, estimated variance, estimated variance under normal error.
#[[5]]==>confidence interval for sigma_s^2
#[[6]]==>confidence interval for sigma_s^2 under normal error.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.