R/cv.pcplus.R

Defines functions cv.pcplus

Documented in cv.pcplus

cv.pcplus <- function(y, bandwidth, lambda, nbandwidth = 30L, nlambda = 30L, lambda.min.ratio = 0.01,
                      sd = NULL, thresh = 1e-7, maxit = 1e5L) {
  if (!is.numeric(y)) {
    stop("'y' must be a numeric vector")
  }
  
  if (!all(is.finite(y))) {
    stop("'y' must contain only finite values")
  }
  
  n <- length(y)
  n2 <- as.integer(n / 2 + 1e-10)
  
  if (missing(bandwidth)) {
    if (!is.numeric(nbandwidth) || length(nbandwidth) != 1 || !is.finite(nbandwidth) || nbandwidth <= 0) {
      stop("nbandwidth must be a single positive integer")
    }
    
    bandwidth <- c(exp(seq(log(2.01 / n2 / 2), log(0.25), length.out = nbandwidth)), Inf)
  } else {
    if (!is.numeric(bandwidth)) {
      stop("'bandwidth' must be a numeric")
    }
    
    if (!all(is.finite(bandwidth))) {
      test <- bandwidth[!is.finite(bandwidth)] == Inf
      if (any(is.na(test)) | any(!test)) {
        stop("all entries of 'bandwidth' must be finite or Inf")
      }
    }
    
    if (any(bandwidth <= 0)) {
      stop("'bandwidth' must contain only positive values")
    }
    
    if (any(bandwidth > 0.25 & bandwidth != Inf)) {
      bandwidth[bandwidth > 0.25 & bandwidth != Inf] <- Inf
      warning("'bandwidth' larger than 0.25 have been replaced by Inf")
    }
    
    if (any(bandwidth < 1 / n2)) {
      bandwidth[bandwidth < 1 / n2] <- 2.01 / n2 / 2
      warning("'bandwidth' smaller than 2 / length(y) have been replaced by 2 / length(y)")
    }
  }
  
  if (missing(lambda)) {
    missinglambda <- TRUE
    
    if (!is.numeric(nlambda) || length(nlambda) != 1 || !is.finite(nlambda) || nlambda <= 0) {
      stop("nlambda must be a single positive integer")
    }
    
    if (!is.numeric(lambda.min.ratio) || length(lambda.min.ratio) != 1 || !is.finite(lambda.min.ratio)) {
      stop("'lambda.min.ratio' must be a single finite numeric value")
    }
    
    if (lambda.min.ratio <= 0 || lambda.min.ratio >= 1) {
      stop("'lambda.min.ratio' must be larger than 0 and smaller than 1")
    }
  } else {
    missinglambda <- FALSE
    
    if (!is.numeric(lambda)) {
      stop("'lambda' must be a numeric")
    }
    
    if (!all(is.finite(lambda))) {
      stop("'lambda' must contain only finite values")
    }
    
    if (any(lambda <= 0)) {
      stop("'lambda' must contain only positive values")
    }
    
    if (any(diff(lambda) > 0)) {
      warning("lambda is not a decreasing sequence; we continue with the sorted sequence")
      lambda <- sort(lambda, decreasing = TRUE)
    }
  }
  
  loss <- function(test, est) mean(abs(test - est))
  
  if (is.null(sd)) {
    sd <- as.numeric(diff(quantile(diff(y), c(0.25, 0.75))) / diff(qnorm(c(0.25, 0.75))) / sqrt(2))
  } else {
    if (!is.numeric(sd) || length(sd) != 1 || !is.finite(sd)) {
      stop("'sd' must be a single finite numeric value or NULL")
    }
    
    if (sd <= 0) {
      stop("'sd' must be a positive value or NULL")
    }
  }
  
  if (!is.numeric(thresh) || length(thresh) != 1 || !is.finite(thresh)) {
    stop("'thresh' must be a single finite numeric value")
  }
  
  if (thresh <= 0) {
    stop("'thresh' must be a positive value")
  }
  
  if (!is.numeric(maxit) || length(maxit) != 1 || !is.finite(maxit)) {
    stop("'maxit' should be a single finite integer.")
  }
  
  if (maxit <= 0) {
    stop("'maxit' must be a positive integer")
  }
  
  if (!is.integer(maxit)) {
    maxit <- as.integer(maxit + 1e-6)
  }
  
  
  fold1 <- seq(1, 2 * n2, 2)
  y1 <- y[fold1]
  fold2 <- seq(2, 2 * n2, 2)
  y2 <- y[fold2]
  
  cv <- numeric(length(bandwidth))
  value <- list()
  
  for (indexBandwidth in seq(along = bandwidth)) {
    if (bandwidth[indexBandwidth] == Inf) {
      estRet <- numeric(n2 * 2)
      
      pelt <- .pelt(obs = y1, sd = sd)$est
      est <- numeric(n)
      est[1] <- pelt[1]
      est[fold2[-1]] <- diff(pelt)
      estRet[fold1] <- cumsum(est)[fold1]
      
      pelt <- .pelt(obs = y2, sd = sd)$est
      est <- numeric(n)
      est[1] <- pelt[1]
      est[fold1[-1]] <- diff(pelt)
      estRet[fold2] <- cumsum(est)[fold2]
      
      cv[indexBandwidth] <- loss(y[1:(2 * n2)], estRet)
      
      value[[indexBandwidth]] <- NA
    } else {
      bw <- bandwidth[indexBandwidth]
      
      if (missinglambda) {
        L <- max(0, min(as.integer(bw * n + 1e-14), n - 1))
        listKernel <- list(y = y, 
                           cusumKernel = numeric(2 * L + 1),
                           Xty = numeric(n - 1),
                           XtX = matrix(0, 2 * L - 1, 4 * L - 2),
                           isComputedXtX = matrix(FALSE, 2 * L - 1, 4 * L - 2), 
                           XtXgap = numeric(2 * L),
                           ImSX = matrix(0, 3 * L, 2 * L),
                           isComputedImSX = logical(2 * L))
        
        ImSy <- y - .kernelSmoothingEpanechnikov(y, bw)
        .initializeKernel(ImSy, bw, listKernel$cusumKernel, listKernel$Xty, listKernel$XtX, listKernel$isComputedXtX,
                          listKernel$XtXgap, listKernel$ImSX, listKernel$isComputedImSX)
        Xty <- .getXty(ImSy, bw, listKernel$cusumKernel, listKernel$Xty, listKernel$XtX, listKernel$isComputedXtX,
                       listKernel$XtXgap, listKernel$ImSX, listKernel$isComputedImSX)
        
        lambda_max <- max(abs(Xty))
        lambda_min <- lambda_max * lambda.min.ratio
        lambda <- exp(seq(from = log(lambda_max), to = log(lambda_min), length.out = nlambda))
      }
      
      vec <- numeric(length(lambda))
      estRet <- matrix(0, 2 * n2, length(lambda))
      
      L2 <- max(0, min(as.integer(bw * n2 + 1e-14), n2 - 1))
      listKernelCV <- list(list(fold = fold1,
                                cusumKernel = numeric(2 * L2 + 1),
                                Xty = numeric(n2 - 1),
                                XtX = matrix(0, 2 * L2 - 1, 4 * L2 - 2),
                                isComputedXtX = matrix(FALSE, 2 * L2 - 1, 4 * L2 - 2), 
                                XtXgap = numeric(2 * L2),
                                ImSX = matrix(0, 3 * L2, 2 * L2),
                                isComputedImSX = logical(2 * L2)),
                           list(fold = fold2,
                                cusumKernel = numeric(2 * L2 + 1),
                                Xty = numeric(n2 - 1),
                                XtX = matrix(0, 2 * L2 - 1, 4 * L2 - 2),
                                isComputedXtX = matrix(FALSE, 2 * L2 - 1, 4 * L2 - 2), 
                                XtXgap = numeric(2 * L2),
                                ImSX = matrix(0, 3 * L2, 2 * L2),
                                isComputedImSX = logical(2 * L2)),
                           list(fold = fold1))
      for (i in 1:2) {
        J <- listKernelCV[[i]]$fold
        mJ <- listKernelCV[[i + 1]]$fold
        y1 <- y[J]
        y2 <- y[mJ]
        
        ImSy <- y2 - .kernelSmoothingEpanechnikov(y2, bw)
        null_dev <- mean(ImSy^2)
        
        .initializeKernel(ImSy, bw, listKernelCV[[i]]$cusumKernel, listKernelCV[[i]]$Xty, listKernelCV[[i]]$XtX, listKernelCV[[i]]$isComputedXtX,
                          listKernelCV[[i]]$XtXgap, listKernelCV[[i]]$ImSX, listKernelCV[[i]]$isComputedImSX)
        
        beta <- .lassoImSX(ImSy, bw, listKernelCV[[i]]$cusumKernel, listKernelCV[[i]]$Xty, listKernelCV[[i]]$XtX, listKernelCV[[i]]$isComputedXtX,
                           listKernelCV[[i]]$XtXgap, listKernelCV[[i]]$ImSX, listKernelCV[[i]]$isComputedImSX,
                           lambda, thresh * null_dev, maxit)
        
        for (indexLambda in seq(along = lambda)) {
          if (any(!is.finite(beta[, indexLambda]))) {
            estRet[J, indexLambda] <- Inf
          } else {
            fit <- c(0, cumsum(beta[, indexLambda]))
            smooth <- .kernelSmoothingEpanechnikov(y2 - fit, bw)
            
            # restimate cps by PELT
            pelt <- .pelt(obs = as.numeric(y2 - smooth), sd = sd)
            
            # reestimate without penalty but condition on cps of PELT's estimate
            cps <- pelt$cps
            
            if (length(cps) > 0) {
              fitJ <- .postProcessing(cps - 2, ImSy, bw, # - 1 for C++ style, - 1 for dealing with extended ImSX matrix, ImSX <- cbind(rep(0, n), ImSX)
                                      listKernelCV[[i]]$cusumKernel, listKernelCV[[i]]$Xty, listKernelCV[[i]]$XtX, listKernelCV[[i]]$isComputedXtX,
                                      listKernelCV[[i]]$XtXgap, listKernelCV[[i]]$ImSX, listKernelCV[[i]]$isComputedImSX) 
              
              fit <- numeric(n2)
              fit[cps] <- fitJ
              fit <- cumsum(fit)
              
              if (i == 1) {
                estRet[J, indexLambda] <- c(0, fit[-length(fit)]) + .kernelSmoothingEpanechnikovCVleft(y2 - fit, bw)
              } else {
                estRet[J, indexLambda] <- fit + .kernelSmoothingEpanechnikovCVright(y2 - fit, bw)
              }
              
            } else {
              if (i == 1) {
                estRet[J, indexLambda] <- .kernelSmoothingEpanechnikovCVleft(y2, bw)
              } else {
                estRet[J, indexLambda] <- .kernelSmoothingEpanechnikovCVright(y2, bw)
              }
            }
          }
        }
      }
      
      for (indexLambda in seq(along = lambda)) {
        vec[indexLambda] <- loss(y[1:(2 * n2)], estRet[, indexLambda])
      }
      
      index <- which.min(vec)
      cv[indexBandwidth] <- vec[index]
      value[[indexBandwidth]] <- lambda[1:index]
    }
  }
  
  index <- which.min(cv)
  list(lambda = value[[index]], bandwidth = bandwidth[index], cv = cv[index], cvs = cv, bandwidths = bandwidth)
}

Try the PCpluS package in your browser

Any scripts or data that you put into this service are public.

PCpluS documentation built on April 4, 2025, 2:14 a.m.