R/functions.R

Defines functions ZgMoments predZg CVOptNv predZgNvopt

ZgMoments = function(dta){
  y = dta$y
  x = dta$x
  n = length(dta$y)
  Sx = var(x)
  xbar = colMeans(x)
  sxy = cov(x,y)
  sy = sd(y)
  S = Sx-sxy%*%t(sxy)/sy^2
  Ds = sqrt(diag(S))
  R = S/(Ds%*%t(Ds))
  xc = (x-matrix(rep(xbar,n),nrow=n,byrow=TRUE))/matrix(rep(Ds,nrow(x)),nrow=nrow(x),byrow=TRUE)
  ev = suppressWarnings(eigs(R,k=min(c(n,ncol(x)))))
  ind = which(ev$values>10^(-16))
  ev$vectors = ev$vectors[,ind,drop=FALSE]
  ev$values = ev$values[ind]
  U = ev$vectors
  Z = t((t(U)%*%t(xc)))
  gamma = as.vector(t(U)%*%(sxy/Ds))
  Zg = matrix(rep(gamma,nrow(Z)),nrow=nrow(Z),byrow=TRUE)*Z
  gamma2 = gamma^2
  VarZg = diag(ev$values*gamma2)+(gamma2%*%t(gamma2))/sy^2
  CovYZg = gamma2
  return(list(Zg=Zg,xbar=xbar,xc=xc,S=S,sxy=sxy,U=U,L=ev$values,Ds=Ds,Z=Z,gamma=gamma,VarZg=VarZg,CovYZg=CovYZg,dta=dta))
}

predZg = function(res.ZgMoments,xtest,nv=NULL){
  if(is.vector(xtest)){
    xtest = matrix(xtest,nrow=1)
  }
  xbar = res.ZgMoments$xbar
  U = res.ZgMoments$U
  L = res.ZgMoments$L
  gamma = res.ZgMoments$gamma
  Ds = res.ZgMoments$Ds
  testxc = (xtest-matrix(rep(xbar,nrow(xtest)),nrow=nrow(xtest),byrow=TRUE))/matrix(rep(Ds,nrow(xtest)),nrow=nrow(xtest),byrow=TRUE)
  Z = t(t(U)%*%t(testxc))
  Zg = matrix(rep(gamma,nrow(Z)),nrow=nrow(Z),byrow=TRUE)*Z
  VarZg = res.ZgMoments$VarZg
  CovYZg = res.ZgMoments$CovYZg
  ev = eigen(VarZg)
  ev$vectors = Re(ev$vectors)
  ev$values = Re(ev$values)
  if(is.null(nv)){
    nv = 1:ncol(ev$vectors)
  }else{
    if(max(nv)>ncol(ev$vectors)){
      nv = 1:ncol(ev$vectors)
    }
  }
  res = matrix(0,nrow=nrow(xtest),ncol=max(nv))
  A = matrix(0,ncol=nrow(ev$vectors),nrow=nrow(ev$vectors))
  if(length(nv)>1){
    for(i in 1:max(nv)){
      tmp = (1/ev$values[i])*(matrix(ev$vectors[,i],ncol=1)%*%matrix(ev$vectors[,i],nrow=1))
      A = A+tmp
      h = A%*%CovYZg
      res[,i] = Zg%*%matrix(h,ncol=1)
    }
  }else{
    if(nv==1){
      A = (1/ev$values[1])*(ev$vectors[,1,drop=FALSE]%*%t(ev$vectors[,1,drop=FALSE]))
    }else{
      A = ev$vectors[,1:nv]%*%diag(1/ev$values[1:nv])%*%t(ev$vectors[,1:nv])
    }
    h = A%*%CovYZg
    res = matrix(Zg%*%matrix(h,ncol=1),ncol=1)
  }
  return(res)
}

CVOptNv = function(dta,nfolds,nvmax){
  indcv = split(sample(1:nrow(dta$x)),floor(((1:nrow(dta$x))-1)/(nrow(dta$x)/nfolds)))
  res = lapply(1:nfolds,function(i){
    dtai = list(x=dta$x[indcv[[i]],,drop=FALSE],y=dta$y[indcv[[i]]])
    dtai_ = list(x=dta$x[-indcv[[i]],],y=dta$y[-indcv[[i]]])
    tmp.Zgmoments = ZgMoments(dtai_)
    if(is.null(nvmax)||(nvmax>ncol(tmp.Zgmoments$VarZg))){
      nvmax = ncol(tmp.Zgmoments$VarZg)
    }
    predZgi = predZg(tmp.Zgmoments,dtai$x,nv=1:nvmax)
    return(predZgi)
  })
  y = dta$y[unlist(unname(indcv))]
  nc = min(unlist(lapply(res,ncol)))
  res = lapply(res,function(x){x[,1:nc,drop=FALSE]})
  res = do.call("rbind",res)
  R2 = round(as.vector(cor(y,res)^2),2)
  nv = which(R2==max(R2))
  if(length(nv)>1){
    nv = nv[1]
  }
  return(list(pred=res,nv=nv,R2=R2))
}

predZgNvopt = function(dta,test,nfolds=10){
  resCV = CVOptNv(dta,nfolds)
  res.ZgMoments = ZgMoments(dta)
  nv = resCV$nv
  res = predZg(res.ZgMoments,test$x,nv)
  return(res)
}
fhebert/AdaptivePrediction documentation built on Nov. 4, 2019, 12:40 p.m.