R/IMA.R
In countland: Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

Documented in IMA IMA_Compute_Init_Scaled IMA_init IMA_params IMA_Update_Factor

# This code was copied from the repository at
# github.com/shchurch/integer-matrix-approximation
# on July 11, 2022

#' Parameter class for IMA
#'
#' @param rank target number of features in final matrices
#' @param u_bounds upper bounds on integers
#' @param l_bounds lower bounds on integers (default = c(0,0))
#' @param maxiter maximum number of iterations (default = 1000000)
#' @param stop_crit criterion of difference at which to stop (default = 0.0001)
#'
#' @return parameter object
IMA_params <- function(rank,u_bounds,l_bounds=c(0,0),maxiter=1000000,stop_crit=0.0001){
  max_inner_iter <- 1
  alpha <- 0.01

  params <- list(rank,u_bounds,l_bounds,maxiter,stop_crit,max_inner_iter,alpha)
  names(params) <- c("rank","u_bounds","l_bounds","maxiter","stop_crit","max_inner_iter","alpha")
  return(params)
}

#' Update factor matrix - see SUSTain code
#'
#' @param M matrix to be updated (either U or V)
#' @param coeff matrix used in updating algorithm
#' @param mkrp matrix used in updating algorithm
#' @param mode whether update U or V
#' @param lambda_ scaling matrix
#' @param params parameter object
#'
#' @return updated matrix and scaling factors
IMA_Update_Factor <- function(M,coeff,mkrp,mode,lambda_,params){

  r <- ncol(M)
  for(k in seq(1:r)){
    core <- M %*% (lambda_ * coeff[,k])

    ## will be subtracted to update based on new lambda[k]
    core_k <- M[,k] * (lambda_[k]*coeff[k,k])

    ## update scalar weight of k-th component
    delta_lambda_k <- (t(M[,k]) %*% (mkrp[,k]-core)) / (coeff[k,k]*(norm(M[,k],"2")^2))
    lambda_[k] <- max(1,round(lambda_[k]+delta_lambda_k))

    ## adjust core with new lambda[k]
    core <- core - core_k + (M[,k] * (lambda_[k]*coeff[k,k]))

    ## get k-th column of factor matrix
    Mk <- M[,k] + ((mkrp[,k]-core) / (lambda_[k]*coeff[k,k]))

    ## project to integer constrained set
    Mk <- round(Mk)
    if (!is.infinite(params$l_bounds[mode])){
      Mk[Mk < params$l_bounds[mode]] <- params$l_bounds[mode]
    }
    if (!is.infinite(params$u_bounds[mode])){
      Mk[Mk > params$u_bounds[mode]] <- params$u_bounds[mode]
    }

    ## update k-th column of factor matrix
    M[,k] <- Mk

    ## avoid zero lock
    if(all(M[,k]==0)){
      stopifnot(params$u_bounds[mode] >= 1 && params$l_bounds[mode] >= 0)
      M[sample(seq(1:nrow(M)),1),k] <- 1 # add 1 to random value in k-th column
    }
  }

  return(list(M,lambda_))
}

#' rescale if max val is above upper bound
#'
#' @param h matrix to be rescaled
#' @param l_bound lower bound
#' @param u_bound upper bound
#'
#' @return rescaled matrix
IMA_Compute_Init_Scaled <- function(h,l_bound,u_bound){
  for(j in seq(1:ncol(h))){
    maxval <- max(h[,j])

    # if(maxval==0){
    #    stop("Caution, will divide by zero!")
    #}
    if (maxval>u_bound){
      mult_j <- u_bound / maxval
      h[,j] <- round(mult_j * h[,j])
    }
  }

  h[h < l_bound] <- l_bound
  return(h)
}

#' function to initialize U, V, and Lambda
#'
#' @param X observed data matrix
#' @param params parameter object
#'
#' @return initialized U, V, and Lambda matrices
IMA_init <- function(X,params){
  initU <- matrix(rep(0,nrow(X)*params$rank),ncol=params$rank)

  for(r in seq(1:params$rank)){
    p <- sample(nrow(X),nrow(X))
    nbig <- round((1 / params$rank) * nrow(X))
    initU[p[1:nbig],r] <- sample(seq(params$l_bounds[1]:params$u_bounds[1]),nbig,replace=T)
  }

  if(Matrix::rankMatrix(initU)!=params$rank){
    stop("initializing U failed to achieve target rank, try again")
  }

  rank_flag <- F
  max_rank_flag <- 0
  while(rank_flag == F){
    pat_picked <- sample(seq(1:nrow(X)),params$rank,replace=T)
    initV <- t(X[pat_picked,])
    initV <- IMA_Compute_Init_Scaled(initV,params$l_bounds[2],params$u_bounds[2])

    rank_flag <- (Matrix::rankMatrix(initV)==params$rank)
    max_rank_flag <- max_rank_flag + 1
    if(max_rank_flag == 1000){
      stop("cannot initialize V because rank is not achieved")
    }
  }

  initLambda <- rep(1,params$rank)
  return(list(initU,initV,initLambda))
}

#' run integer matrix approximation
#'
#' @param X observed data matrix
#' @param params parameter object
#'
#' @return U, V, and Lambda matrix factors
IMA <- function(X,params){
  # IMA init

  init_matrices <- IMA_init(X,params)
  U <- init_matrices[[1]]
  V <- init_matrices[[2]]
  lambda_ <- init_matrices[[3]]

  stopifnot(sum(lambda_ >= 1) == length(lambda_))

  nX <- norm(X,"F")^2

  iter_ <- 0

  ## convergence arrays
  e <- list(0)
  fit <- list(0)

  ## main loop
  while(iter_ <= params$maxiter){

    ## Update U and lambda
    A <- X %*% V
    B <- t(V) %*% V
    res1 <- IMA_Update_Factor(U,B,A,1,lambda_,params)
    U <- res1[[1]]
    lambda_ <- res1[[2]]

    ## Update V and lambda
    A <- t(X) %*% U
    B <- t(U) %*% U
    res2 <- IMA_Update_Factor(V,B,A,2,lambda_,params)
    V <- res2[[1]]
    lambda_ <- res2[[2]]

    ## Check convergence
    V_ <- V %*% diag(lambda_)
    e <- append(e,sqrt(max(nX - (2*sum(V_ * A)) + sum(sum( B * (t(V_) %*% V_) )),0)))
    fit <- append(fit,1-(e[[length(e)]]/sqrt(nX)))

    if(iter_ > 0){
      if(abs(fit[[length(fit)-1]] - fit[[length(fit)]]) < params$stop_crit){
        #print("SUSTain_M fit for each iteration")
        #print(fit)
        break
      }
    }

    iter_ <- iter_ + 1
  }

  return(list(U,V,diag(lambda_)))
}

Any scripts or data that you put into this service are public.

countland documentation built on May 29, 2024, 7:13 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

countland
Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

R/IMA.R
In countland: Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

Defines functions IMA IMA_init IMA_Compute_Init_Scaled IMA_Update_Factor IMA_params

Documented in IMA IMA_Compute_Init_Scaled IMA_init IMA_params IMA_Update_Factor

Try the countland package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

countland Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

R/IMA.R In countland: Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

Defines functions IMA IMA_init IMA_Compute_Init_Scaled IMA_Update_Factor IMA_params

Documented in IMA IMA_Compute_Init_Scaled IMA_init IMA_params IMA_Update_Factor

Try the countland package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

countland
Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq

R/IMA.R
In countland: Analysis of Biological Count Data, Especially from Single-Cell RNA-Seq