R/BSplineMI.R

Defines functions calcWeights entropy1d calcKnots scale0to1 calcSplineMI calcSplineMItoFile hist2d entropy2d

Documented in calcSplineMI calcSplineMItoFile calcWeights entropy1d entropy2d hist2d

#' Calculate bin weights using B-Spline
#'
#' Takes an expression matrix and calculates for each value the weights for each
#' bin using B-Splines of a given order.
#'
#' @param x Gene expression matrix with genes in rows and samples in columns
#' @param nBins Number of bins
#' @param splineOrder Spline order
#'
#' @return 3-dimensional array of weights with dimensions: nBins x samples x genes
#' @export
#'
#' @examples
#' x <- matrix(0:100,nrow=1)
#' w <- calcWeights(x,nBins = 5,splineOrder = 3)
#' plot(NULL,xlim=c(0,100),ylim=c(0,1),xlab="x",ylab="weight")
#' for(i in 1:dim(w)[1]){
#'   lines(x[1, ],w[i, ,1],col=rainbow(dim(w)[1])[i])
#' }
calcWeights <-function( x, nBins, splineOrder){

  # scales the expressions for each gene to between [0,1]*(nBins-splineOrder+1)
  z <- apply(x,1,scale0to1) * (nBins-splineOrder+1)
  # note z is transposed. i.e. [samples x genes]

  knots <- calcKnots(nBins, splineOrder)

  w <- SplineBlendAll(z, knots, splineOrder, nBins)
  # add some dimensions
  # should be [bins x samples x genes]
  dim(w) <- c(nBins,dim(z))
  dimnames(w) <- c(list(paste0("bin",1:nBins)),dimnames(z))

  return(w)
}


#' Calculate entropy per gene
#'
#' @param weights 3-dimensional array of weights with dimensions:
#' nBins x samples x genes. (see \code{\link{calcWeights}})
#'
#' @return vector of entropy per gene in bits
#' @export
#'
#' @examples
#' data( riceEx )
#' hist( entropy1d( calcWeights( riceEx, 7, 3) ) )
entropy1d <- function( weights ){
  # p is mean of weights for all samples per bin and gene
  P <- apply(weights, c(1,3),mean)
  # p: array [nBins x genes]

  # for each gene calculate entropy
  apply(P,2,function(p){
    -sum(p * log2(p), na.rm = T)
    # note: na.rm = T because when p=0 0*log2(0) sum returns NaN
  })
}


# Calculate B-Spline knots
#
# @param nBins Number of bins
# @param splineOrder Spline order
#
# @return vector of B-Spline knots
calcKnots <- function(nBins, splineOrder){
  c(rep(0,splineOrder),1:(nBins-splineOrder),rep(nBins-splineOrder+1,splineOrder))
}

# Scale a vector to between 0 and 1
#
# @param x numeric vector
#
# @return scaled numeric vector
scale0to1 <- function(x){
  Xrange <- range(x,na.rm = T)
  Xmin <- Xrange[1]
  Xfactor <- (Xrange[2]-Xrange[1])
  if( Xfactor == 0){
    # handle genes where all samples have 0 expression
    return( rep(0.0,length(x)) )
  } else{
    return( (x-Xmin)/Xfactor )
  }
}

#' Calculate mutual information
#'
#' Calculate mutual information for an expression matrix using B-Spline binning.
#'
#' @inheritParams calcWeights
#' @param threads if >1 then run in parallel
#'
#' @return matrix of mutual information (bits)
#' @export
#'
#' @examples
#' data( riceEx )
#' calcSplineMI( riceEx[1:5, ], 7, 3)
#'
#' # compare single CPU vs two CPU's
#' system.time( mi_1 <- calcSplineMI( riceEx, 7, 3) )
#' system.time( mi_2 <- calcSplineMI( riceEx, 7, 3, threads = 2) )
#' identical( mi_1, mi_2 )
calcSplineMI <- function(x, nBins, splineOrder, threads = 1){
  weights <- calcWeights(x,nBins,splineOrder)
  entropy <- entropy1d(weights)
  mi <- calcMIfromWeights(entropy, weights, threads)
  dimnames(mi) <- list(rownames(x),rownames(x))
  return(mi)
}


#' Calculates MI matrix and write directly to file
#'
#' Calculates specified rows of lower half of MI matrix and write directly to
#' file. This uses less memory as it doesn't need to hold the entire mi matrix
#' in memory. By setting fromRow and toRow it is possible to split up the job
#' to run independantly in parallel.
#'
#'
#' @inheritParams calcWeights
#' @param filename name of output file
#' @param fromRow first row of MI matrix to generate
#' @param toRow last row of MI matrix to generate
#'
#' @return Returns nothing. The results are written to file as text with values
#' space separated and one row per line. Gene IDs are discarded.
#'
#' @export
#'
#' @examples
#' tmp <- tempfile()
#' data(riceEx)
#' calcSplineMItoFile( riceEx, 7, 3, filename = tmp, fromRow = 4, toRow = 6 )
#' cat(readLines(tmp),sep="\n")
#' unlink(tmp)
calcSplineMItoFile <- function(x, nBins, splineOrder, filename,
                               fromRow=2, toRow=nrow(x)){

  # check arguments
  stopifnot( fromRow >= 1 && toRow <= nrow(x) && fromRow <= toRow )

  weights <- calcWeights(x[1:toRow,], nBins, splineOrder)

  entropy <- entropy1d(weights)

  # open output file
  outCon = file(filename, open = "wt")

  # note: 1st row is always skipped as it would be empty
  for( i in max(fromRow,2):toRow){
    sapply(1:(i-1), function(j){
      entropy[j] + entropy[i] - entropy2d_C(weights,i-1,j-1)
    }) -> miRow

    cat( paste(sprintf("%f",pmax(0,miRow)),collapse=" "), sep="\n", file = outCon )
  }
  close(outCon)
}

#' Calculate joint probability density matrix from weights
#'
#' @param weights 3-dimensional array of weights with dimensions:
#' nBins x samples x genes as generated by \code{\link{calcWeights}}
#' @param i1 index to 1st gene
#' @param i2 index to 2nd gene
#'
#' @return Returns joint probability density matrix [bins x bins]
#' @export
#'
#' @examples
#' m <- hist2d( calcWeights( riceEx, 7, 3), 1, 2 )
#' image(m)
hist2d <- function(weights, i1, i2){
  # check arguments
  stopifnot( length(dim(weights))==3 && length(weights) == prod(dim(weights)) )
  stopifnot( as.integer(i1) > 0 && as.integer(i1) <= dim(weights)[3] )
  stopifnot( as.integer(i2) > 0 && as.integer(i2) <= dim(weights)[3] )

  hist2d_C(weights, i1-1, i2-1)
}

#' Calculate joint entropy of two genes
#'
#' @inheritParams hist2d
#'
#' @return joint entropy in bits
#' @export
#'
#' @examples
#' entropy2d( calcWeights( riceEx, 7, 3), 1, 2 )
entropy2d <- function(weights, i1, i2){
  # check arguments
  stopifnot( length(dim(weights))==3 && length(weights) == prod(dim(weights)) )
  stopifnot( as.integer(i1) > 0 && as.integer(i1) <= dim(weights)[3] )
  stopifnot( as.integer(i2) > 0 && as.integer(i2) <= dim(weights)[3] )

  entropy2d_C(weights, i1-1, i2-1)
}
larsgr/BSplineMI documentation built on May 20, 2019, 7:57 p.m.