R/Dtd.R

Defines functions Dtd

Documented in Dtd

#' DTD
#' @param scdata single data with genes in rows and cells in columns.
#' @param bulkdata A matrix with genes in rows and samples in columns.
#' @param label The cell type label of single data.
#' @import DTD
#' @importFrom MatrixGenerics rowSds
#' @return   A data frame of Mixed cellular fractions.
#' @export
#'
#' @examples
#'
#' #Bulk <- Bulk_GSE60424
#' #SC <- SC_GSE60424
#' #Label <- Label_GSE60424$Label
#' #res <- Dtd(bulkdata = Bulk,
#' #            scdata = SC,
#' #            label = Label)

Dtd <- function(scdata, bulkdata,label) {

  gene <- intersect(rownames(scdata),rownames(bulkdata))

  scdata <- scdata[gene,]

  bulkdata <- bulkdata[gene,]

  indicator.list <- label

  names(indicator.list) <- colnames(scdata)

  include.in.X <- names(table(label))

  ##making signature matrix X

  sample.X <- DTD::sample_random_X(included.in.X = include.in.X,
                                  pheno = indicator.list,
                                  expr.data = as.matrix(scdata),
                                  percentage.of.all.cells = 0.1,
                                  normalize.to.count = TRUE )
  #####signature
  X.matrix <- sample.X$X.matrix

  ##remove some unimportant
  if ( any(table(label)<4)){
     samples.to.remove <- NULL
     remaining.expr <- scdata
  }else{
       samples.to.remove <- sample.X$samples.to.remove

       remaining.expr <- scdata[, -which(colnames(scdata) %in% samples.to.remove)]

  }


  ##reducing features(genes)selecting top sd of 500 genes
  n.features <- 500

  sds.in.x <- MatrixGenerics::rowSds(X.matrix)

  names(sds.in.x) <- rownames(X.matrix)

  sorted.sds.in.x <- sort(sds.in.x, decreasing = TRUE)

  selected.feature <- names(sorted.sds.in.x)[1:n.features]

  X.matrix <- X.matrix[selected.feature, ]

  remaining.expr <- remaining.expr[selected.feature, ]

  #making test data in-silicio bulk
  # rule of thumb:

  n.samples <- 500

  # there are 4645 SC profiles, set 'n.per.mixture' 10% of that => ~400

   n.per.mixture <- round(ncol(scdata)*0.1)

  # split all profiles randomly into training and testing

  tyy <- indicator.list[-which(colnames(scdata) %in% samples.to.remove)]

  if( length(tyy)==0){

    tyy <- indicator.list

 }else{

     tyy <- indicator.list[-which(colnames(scdata) %in% samples.to.remove)]

 }

   train.samples <- NULL

   for ( ty in names(table(tyy))) {

    train <- sample(

      x = colnames(remaining.expr)[which(tyy==ty)],

      size = ceiling(length(which(tyy==ty)) / 2),

      replace = FALSE
    )

      train.samples <- c(train.samples ,train)
  }

      test.samples <- colnames(remaining.expr)[which(!colnames(remaining.expr) %in% train.samples)]

  # extract training data ...
      train.expr <- remaining.expr[, train.samples]

      indicator.train <- indicator.list[train.samples]

  # same for test:

  if (any(table(label)==1)){
      test.samples <- c(test.samples,colnames(SC)[which(Label==names(which(table(label)==1)))])

      test.expr <- remaining.expr[, test.samples]

      indicator.test <- indicator.list[test.samples] }else{

      test.expr <- remaining.expr[, test.samples]

      indicator.test <- indicator.list[test.samples]

    }

   test.expr <- remaining.expr[, test.samples]

   indicator.test <- indicator.list[test.samples]

  # apply the function:
   training.data <- DTD::mix_samples(expr.data = as.matrix(train.expr),
                                     pheno = indicator.train,
                                     included.in.X = include.in.X,
                                     n.samples = n.samples,
                                     n.per.mixture = n.per.mixture,
                                     verbose = FALSE )



   test.data <- DTD::mix_samples(expr.data = as.matrix(test.expr),
                                  pheno = indicator.test,
                                  included.in.X = include.in.X,
                                  n.samples = n.samples,
                                  n.per.mixture = n.per.mixture,
                                  verbose = FALSE )
  ##making estimating model data
   standard.model <- rep(1, n.features)

   names(standard.model) <- selected.feature

  #making train data
   start.tweak <- rep(1, n.features)

   names(start.tweak) <- selected.feature

   lambda.sequence <- 2^seq(0, -50, length.out = 20)

   model <- DTD::train_deconvolution_model(tweak = start.tweak,
                                           X.matrix = X.matrix,
                                           train.data.list = training.data,
                                           test.data.list = test.data,
                                           estimate.c.type = "direct",
                                           maxit = 50,
                                           lambda.seq = lambda.sequence,
                                           verbose = FALSE,
                                           cv.verbose = TRUE,
                                           warm.start = TRUE)

   bulk.data  <- bulkdata[intersect(rownames(X.matrix),rownames(bulkdata)),]

   estimated.c.bulk <- DTD::estimate_c(X.matrix = X.matrix,
                                        new.data = as.matrix(bulk.data),
                                        DTD.model = model)
   res_Dtd <- t(estimated.c.bulk)

   return(res_Dtd)

}
libcell/deconvBench documentation built on Sept. 24, 2022, 12:36 p.m.