##' Rcpp implementation of the HCP algorithm
##'
##' Objective:
##' This function solves the following problem:
##' argmin_{Z,B,U} ||Y-Z*B||_2 + lambda1*||Z-F*U||_2 + lambda2*||B||_2 +
##' lambda_3||U||_2
##'
##' To use the residual data: Residual = Y - Z*B
##'
##' Note: k>0, lambda1>0, lambda2>0, lambda3>0 must be set by the user based on
##' the data at hand. one can set these values using cross-validation, by
##' evaluating the "performance" of the resulting residual data on a desired
##' task. typically, if lambda>5, then hidden factors match the known covariates closely.
##' @title R/Rcpp implementation of the HCP algorithm
##' @param Z a matrix nxd of known covariates, where n is the number of
##' subjects and d is the number of known covariates. *must be standardize
##' (columns have 0 mean and constant SS).
##' @param Y a matrix of nxg of expression data (must be standardized (columns
##' scaled to have constant SS and mean 0). ** use standardize function to standardize F and Y.
##' @param k number of inferred hidden components (k is an integer)
##' @param lambda1 model parameter 1
##' @param lambda2 model parameter 2
##' @param lambda3 model parameter 3
##' @param iter (optional) iter: number of iterations (default = 100);
##' @param stand default standardize data TRUE
##' @param log default log-transformation TRUE
##' @param fast default use fast RcppArmadillo implementation
##' @param verbose default TRUE
##' @return list
##' Z: matrix of hidden components, dimensionality: nxk,
##' B: matrix of effects of hidden components, dimensionality: kxg,
##' o: value of objective function on consecutive iterations.
##' @author mvaniterson
##' @references \url{http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0068141}
##' @importFrom stats runif
##' @importFrom Rcpp evalCpp
##' @useDynLib Rhcpp
##' @export
##' @examples
##' data(rhcppdata)
##' F <- rhcppdata$F
##' Y <- rhcppdata$Y
##' k <- 10
##' lambda1 <- 20
##' lambda2 <- 1
##' lambda3 <- 1
##' iter <- 100
##' ##and run
##' ##Rres <- hcp(Z, Y, k, lambda1, lambda2, lambda3, iter)
hcp <- function(Z, Y, k, lambda1, lambda2, lambda3, iter=100, stand=TRUE, log=TRUE, fast=TRUE, verbose=TRUE)
{
t0 <- proc.time()
if(is.null(Z)) {
message("Assume only hidden components")
Z <- matrix(runif(2*nrow(Y), 1, 10), nrow=nrow(Y), ncol=2)
lambda3 <- 0 ##do not penalized the coefficients with an effect on the known covariates
}
## (0) check dimensions
if(nrow(Y) != nrow(Z))
stop("Rows represent the samples for both Y and Z!")
if(sum(is.na(Z)) > 0 | sum(is.na(Y)) > 0)
stop("NA's in input data are not allowed!")
## (1) take log of read counts
if(log)
{
message("Log-transformation data...")
Y <- log2(2+Y)
if(sum(is.na(Y)) > 0)
stop("NA's introduced by log-transformation these are not allowed!")
}
standardize <- function(x)
{
x <- x - outer(rep(1, nrow(x)), colMeans(x)) ##center
x <- x*outer(rep(1, nrow(x)), 1/sqrt(colSums(x^2))) ##scale SS
##x <- apply(x, 2, function(x) x - mean(x))
##x <- apply(x, 2, function(x) x/sqrt(sum(x^2)))
x
}
## (2) standardize the data
if(stand) {
message("Standardize data...")
Y <- standardize(Y)
Z <- standardize(Z)
}
if(sum(is.na(Z)) > 0 | sum(is.na(Y)) > 0) {
message(paste("Row(s) (Z):", paste(which(apply(Z, 1, function(x) any(is.na(x)))), collapse=", ")))
message(paste("Row(s) (Y):", paste(which(apply(Y, 1, function(x) any(is.na(x)))), collapse=", ")))
stop("NA's introduced by the standardization these are not allowed!")
}
## (3) HCP
if(fast) {
message("Run RcppArmadillo implemented HCP algorithm...")
res <- rcpparma_hcp(Z, Y, k, lambda1, lambda2, lambda3, iter)
W <- res$Z
B <- res$B
}
else
{
message("Run plain R implemented HCP algorithm...")
res <- r_hcp(Z, Y, k, lambda1, lambda2, lambda3, iter)
W <- res$W
B <- res$B
}
if(verbose)
message(paste("Finished after", res$iter, "iterations of", iter, "iterations."))
if(verbose)
message(paste("The batch correction took:", round((proc.time() - t0)[3], 2), "seconds."))
return(list(Res = Y - W%*%B, W=W, B=B, Y=Y, Z=Z))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.