R/fitPCR.R
In gamlss.foreach: Parallel Computations for Distributional Regression

Documented in coef.PCR fitPCR fitted.PCR plot.PCR predict.PCR summary.PCR vcov.PCR

# this function is imitates the function svdpc.fit()
# of package pls
# it gets the coefficients, fitted values and residual 
# for the whole path of models 
# from the 1st PC to the M  principal componet 
############################################################
############################################################
# functions 
# 1) fitPCR(x,y,M,nu,nv) 
# 2) plot.PCR() (OK)
# 3) fitted.PCR() (OK)
# 4) coef.PCR(() (OK)
# 5) summary.PCR (OK)
# 6) print.PCR (OK)
# 7) predict (OK) does it se's?
############################################################
############################################################
# COMMENTS
#   IS THE AIC CALCULATED PROPERLY? 
#   IT SEEMS THAT IT IS SCALED RELATED 
#   FOR EXAPLE IF I CHANGED THE (CONTANT) WEIGHT 
#   THE RESULTS ARE DIFFERENT 
############################################################
############################################################
# TO DO
# i)    It needs testing on whether produce the right coefficients 
#       It looks that the coef fitted values and residuals are OK
# ii)   put stanard errors  OK 
# iii)  put local AIC       OK 
# iv)   add weights         OK needs testing
# v)    add supervised      Not yet
# vi)   add argument to do svd outside (NO maybe not appropriate) 
# vii)  add foreach        needs testing (I am not sute that spead up things) 
# viii) prediction         OK (need more checking)
#############################################################
############################################################# 
fitPCR<- function( x = NULL,      # the x matrix
                   y = NULL,      # the response
             weights = rep(1,n),  # prior weights
                   M = NULL,# maximum no of componets to fits
                  df = NULL,
      #           nu = min(n, p), # 
      #           nv = min(n, p),
          supervised = FALSE,
                   k = 2,         # For the AIC
                   r = 0.2,       # a value between 0 and 1 for supervised
                plot = TRUE)      # wheter to pplot the path
{
##############################################################
############################################################## 
weighted.sd <- function(x, w=rep(1,length(x)), na.rm = FALSE) {
    if (na.rm) {
      w <- w[i <- !is.na(x)]
      x <- x[i]
    }
    sum.w <- sum(w)
    sqrt((sum(w*x^2) * sum.w - sum(w*x)^2) / (sum.w^2 - sum(w^2)))
  }
##############################################################
##############################################################  
scall <- deparse(sys.call(), width.cutoff = 200L) 
  if (is.null(y)) stop("the y is needed") 
     n <- dim(x)[1] # no of observations
     p <- dim(x)[2] # no of variables
     M <- if(is.null(M)) min(n,p) else M
  if(!is.null(df)) M <- df
  if((n>=p)&&(M > p)){
       M <- p 
       warning(cat("M is reset to", p, '\n'))  
     }
  if((n <p)&&(M > n)){
       M <- n 
       warning(cat("M is reset to", n, '\n')) 
     } 
mean.y <- rep(weighted.mean(y, weights), n)
     Y <- as.matrix(sqrt(weights)*y)
 if (is.null(rownames(x))) colnames(x) <- paste0("X", seq(1:dim(x)[2]))
# IS SCALLING SHOULD BE DONE WITH WEIGHTED MEAN AND VARIANCE
# Is this  correcting SOME OF THE PROBLEMS in forecasting????
#############################################################
# (a) scale 
#   weights <- (weights/sum(weights))*n
#   always scale 
     x <- scale(x, center=apply(x,2,weighted.mean, na.rm=TRUE, w=weights),
                 scale=apply(x,2,weighted.sd, na.rm=TRUE, w=weights))
center <- attr(x,"scaled:center")   # we need this for prediction 
 scale <- attr(x,"scaled:scale")
############################################################# 
# (b) transform
     X <- sqrt(weights)*x 
############################################################# 
# (c) supervised
if (supervised) 
  {
     X <- which.yX.Corr(Y, X, r=r, plot=FALSE, print = FALSE)
cnames <- colnames(X)
     p <- dim(X)[2]
     M <- ifelse(n>=p, p, n)
  }
#############################################################       
#    to save 
      B <- matrix(0, nrow=p, ncol=M) # the coefficients of the M fits 
     Fi <- matrix(0,  nrow=n, ncol=M)# the fitted values of the M fits
    AiC <- gamMa <- seGamma <- cpT <- rep(0, M) 
 cNames <- colnames(X)               # the colunm names
############################################################
# (d) svd 
   Xsvd <- La.svd(X, nu=M, nv=M)     
      D <- Xsvd$d[1:M]
      T <- Xsvd$u[,1:M, drop = FALSE] %*% diag(D, nrow = M)# scores
      P <- t(Xsvd$vt[1:M, , drop = FALSE]) # loadings
  gamMa <- crossprod(T,Y)/D^2   # the gamma coef
for (m in 1:M)
  { 
  B[,m] <- P[, 1:m, drop = FALSE] %*% gamMa[1:m]  # the betas
 Fi[,m] <- if (supervised)  x[,cnames] %*% B[,m]+ mean.y
           else             x %*% B[,m]+ mean.y
#Res[,m] <- y-Fi[,m] 
  }
residuals <- c(y) - Fi                             # the residuals
    sig <- apply(residuals, 2, "weighted.sd", w=weights) # all estimated sigmas  
#  with foreach
if (is.null(df))
{
  AiC <- foreach(m = 1 : M, .combine=rbind)%dopar%{
    di <- -2 * dNO(y, mu=Fi[,m], sigma = sig[m], log=TRUE)
    AiC <- sum(di) + k * (m+2)}#  
  # AIC3 <- foreach(m = 1 : M, .combine=rbind)%dopar%{
  #     sigm <- sum(residuals[,m]^2) /n
  #  AIC3 <- n * log(sigm) + k * (m+2)}# this produce identical results to AiC
  }
# the se of gamma  at M  
    se.gamma <- sig[M] / D
 rownames(B) <- cNames
       gamMa <- as.vector(gamMa)
names(gamMa) <- paste0("PC", seq(1:M))
if (plot)
  {
    plot(B[1,]~seq(1,M), ylim=c(min(B),max(B)), 
         type="n", xlab="number of PC", ylab="coef")
    for (i in 1:M)
      lines(B[i,]~seq(1,M), col=i)
  if (is.null(df))  abline(v=which.min(AiC), col="gray")
  }
#  output  
out <- list(coefficients = B, 
                  scores = T, 
                loadings = P, 
                   gamma = gamMa, 
                se.gamma = se.gamma, 
                  center = center, 
                   scale = scale, 
           fitted.values = Fi,  
                    Xvar = D^2, 
                    gaic = AiC, 
                      pc = {if (is.null(df)) which.min(AiC) else df},
                       k = k,
                       M = M,
                  mean.y = mean.y[1],
                   dim.x = dim(x),
                   sigma = sig, 
               residuals = residuals,
                    call = scall)
  class(out) <- "PCR"
  out
}  
################################################################
################################################################
plot.PCR <- function(x, type=c("path", "gaic"), labels=TRUE, cex=0.8,...)
{
  type <- match.arg(type)  
if (type=="path")
{
M <- length(x$gamma)
plot( x$coefficients[1,]~seq(1,M), 
      ylim=c(min(x$coefficients),max(x$coefficients)),
      xlim=c(0, M+1), 
         type="n", xlab="number of PC", ylab="coef")
for (i in 1:M) 
  {lines(x$coefficients[i,]~seq(1,M), col=i)
  if (labels) text(x=M+1, y=x$coefficients[i,M], 
                   labels=rownames(x$coefficients)[i], cex=cex)} 
    abline(v=x$pc, col="gray")
}
else
{
plot(x$gaic, pch=20, xlab="number of componets", ylab="local AIC")
abline(v=x$pc, col="gray")  
}  
}
################################################################
################################################################
print.PCR <- function(x, digits = max(3, getOption("digits") - 3), ...) 
{
  cat("\n A fitted PCR model","\n")
  #cat("\nFamily: ", deparse(x$family), "\n")
  #cat("Fitting method:", deparse(x$method), "\n")
  cat("\nCall: ", deparse(x$call, width.cutoff = 50), "\n", 
      fill = TRUE)
  cat("With gamma coefficients")
  cat(":\n")
    print.default(format(coef(x), digits = digits), 
                  print.gap = 2, quote = FALSE)
  invisible(x)
  }
################################################################
################################################################
summary.PCR <- function(object, param = c("gamma", "beta"),
                                   pc = object$pc, ...)
{
  param <- match.arg(param)  
  if (param=="gamma")
  {
    cat("The gamma coefficients of the", object$M, "Principal Componets","\n")
    return(cbind(coef=coef(object), se=object$se.gamma, t.val=coef(object)/object$se.gamma))  
  }
  else  
  {
    cat("The beta coefficients at", pc, "Principal Componets","\n")
     beta <- coef(object, param="beta", pc=pc)
  se.beta <- vcov(object, type="se", pc=pc)# the se of beta
 return(cbind(beta, se=se.beta, t.val=beta/se.beta)) 
  }  
}
 
################################################################
################################################################
# fitted.PCR
fitted.PCR <- function(object, pc=object$pc, ...)
{
object$fitted.values[, pc=pc]  
}
################################################################
################################################################  
# fitted.PCR
residuals.PCR <- function(object, pc=object$pc, ...)
{
  object$residuals[, pc=pc]  
}
################################################################
################################################################  
# coef.PCR
coef.PCR <- function(object, param = c("gamma", "beta"),
                                pc = object$pc, ...)
{
  param <- match.arg(param)  
 if (param=="gamma")
 {
   return(object$gamma)  
 }
else
 {
  return(object$coefficients[,pc] )
 }
}
################################################################
################################################################
# vcov.PCR
vcov.PCR <- function(object, pc = object$pc, 
                      type = c("vcov", "cor", "se", "all"),
                     ...) 
{
      type <- match.arg(type)
         s <- object$sigma[pc] # sqrt(sum(object$residuals[,pc]^2)/(object$dim.x[1]))
     gamma <- object$gamma[1:pc]
  se.gamma <- s / sqrt(object$Xvar[1:pc])
  # sqrt(diag(crossprod(object$scores[,1:pc], 
  #                      object$scores[,1:pc])))# the se of gamma
m.se.gamma <- if (pc==1) matrix(se.gamma, ncol=1) else diag(se.gamma)
 vcoc.beta <- object$loading[,1:pc]%*%m.se.gamma%*%t(object$loading[,1:pc])
   se.beta <- sqrt(diag(vcoc.beta))
 corr.beta <- cov2cor(vcoc.beta)
switch(type, vcov = vcoc.beta, cor = corr.beta, se = se.beta,  
       all = list(se = se.beta, vcov = vcoc.beta, cor = corr.beta))
}  

################################################################
################################################################
# predict.PCR
predict.PCR <- function(object, newdata=NULL, pc=object$pc, ...)
{
  if (is.null(newdata))
  {
    fv <- fitted(object,  pc=pc)
    return(fv)
  } else
  {
   if (dim(newdata)[2]!=object$dim.x[2])
     stop("the dimensions of the new data is not the same with the old")
    #names <- names(coef(object,"beta"))
      nX  <- as.matrix(newdata) #as.matrix(newdata[,names])
    scale <- object$scale
   center <- object$center
     beta <- coef(object, param="beta",pc=pc)
     pred <- scale(nX, center=center,  scale=scale)%*%beta
     pred <- as.vector(pred) + object$mean.y
    return(pred)
  }  
}

################################################################
################################################################
################################################################
getSigTerms <- function(obj, x=NULL, val=1.96)
{
   if (is.null(x)) stop("the function need the original x variables")
     t.val <- coef(obj)/obj$se.gamma
  whichSig <- as.numeric(abs(t.val) > val) 
     gaMma <- coef(obj, param="gamma", pc=obj$M)*whichSig 
      beta <-  obj$loadings%*%gaMma 
     scale <- obj$scale
    center <- obj$center
        fv <- as.vector(scale(x, center=center,  scale=scale)%*%beta)+obj$mean.y
       out <- list(fitted.values=fv, coefficients=as.vector(beta), gamma=gaMma) 
       out
}
################################################################
################################################################
################################################################
# test <- function(X,M=NULL) 
# {
#   
#    n <- dim(X)[1] # no of observations
#    p <- dim(X)[2] # no of variables
#    M <- if(is.null(M)) min(n,p) else M
# if((n>=p)&&(M > p)){
#   M <- p 
#   warning("M is reset to p")  
# }
# if((n <p)&&(M > n)){
#   M <- n 
#   warning("M is reset to n") 
# } 
#    cat("M=",M,"\n")
#   ll <-  Xsvd <- La.svd(X, nu=M, nv=M)   
#   list(dim(ll$u), dim(ll$vt), length(ll$d))
# }
#   
# weighted.var <- function(x, w, na.rm = FALSE) {
#   if (na.rm) {
#     w <- w[i <- !is.na(x)]
#     x <- x[i]
#   }
#   sum.w <- sum(w)
#   sum.w2 <- sum(w^2)
#   mean.w <- sum(x * w) / sum(w)
#   (sum.w / (sum.w^2 - sum.w2)) * sum(w * (x - mean.w)^2, na.rm =
#                                        na.rm)
# }
# 
# weighted.var2 <- function(x, w, na.rm = FALSE) {
#   if (na.rm) {
#     w <- w[i <- !is.na(x)]
#     x <- x[i]
#   }
#   sum.w <- sum(w)
#   (sum(w*x^2) * sum.w - sum(w*x)^2) / (sum.w^2 - sum(w^2))
# }

# weighted.var <- function(x, w = NULL, na.rm = FALSE) {
#   if (na.rm) {
#     na <- is.na(x) | is.na(w)
#     x <- x[!na]
#     w <- w[!na]
#   }
#   
#   sum(w * (x - weighted.mean(x, w)) ^ 2) / (sum(w) - 1)
# }

# CASE 0
# sig <- sqrt(sum(residuals[,m]^2)/(n))
# di <- -2 * dNO(y, mu=Fi[,m], sigma = sig, log=TRUE) 
# AiC <- sum(di) + k * (m+2)}#constant plus sigma      
# CASE 1
# sig <- weighted.sd(y-Fi[,m], w=weights) #sqrt(sum(residuals[,m]^2)/(n))
#di <- -2 * dNO(y, mu=Fi[,m], sigma = sig[m], log=TRUE)
#AiC <- sum(weights*di) + k * (m+2)}#constant plus sigma
# CASE 2    
#           AiC <- sum(weights(y-Fi[,m])^2)+ k * (m+1)}
# CASE 3
#           sig2 <- sum(weights*residuals[,m]^2)/(n)}
#           AiC  <- n*log(sig2)+  k * (m+1)
# case 4
#           sig <- weighted.sd(residuals[,m], weights)            
#           AiC  <- n*log(sig^2)+  k * (m+1)}

Any scripts or data that you put into this service are public.

gamlss.foreach documentation built on Aug. 28, 2022, 5:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

gamlss.foreach
Parallel Computations for Distributional Regression

R/fitPCR.R
In gamlss.foreach: Parallel Computations for Distributional Regression

Defines functions getSigTerms predict.PCR vcov.PCR coef.PCR residuals.PCR fitted.PCR summary.PCR print.PCR plot.PCR fitPCR

Documented in coef.PCR fitPCR fitted.PCR plot.PCR predict.PCR summary.PCR vcov.PCR

Try the gamlss.foreach package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gamlss.foreach Parallel Computations for Distributional Regression

R/fitPCR.R In gamlss.foreach: Parallel Computations for Distributional Regression

Defines functions getSigTerms predict.PCR vcov.PCR coef.PCR residuals.PCR fitted.PCR summary.PCR print.PCR plot.PCR fitPCR

Documented in coef.PCR fitPCR fitted.PCR plot.PCR predict.PCR summary.PCR vcov.PCR

Try the gamlss.foreach package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gamlss.foreach
Parallel Computations for Distributional Regression

R/fitPCR.R
In gamlss.foreach: Parallel Computations for Distributional Regression