R/mle.getEncodingLength.r

Defines functions mle.getEncodingLength

Documented in mle.getEncodingLength

#' Minimum encoding length (MLE)
#'
#' This function calculates the mininmum encoding length associated with a subset of variables against a background knowledge graph.
#' @param ig - An igraph object of the background knowledge graph.
#' @param path - The filepath to a directory you will store patient-specific RData files in.
#' @param pvals - The matrix that gives the perturbation strength significance for all variables (columns) for each patient (rows)
#' @param ptID - The row name in data.pvals corresponding to the patient you specifically want encoding information for.
#' @export mle.getEncodingLength
#' @keywords minimum length encoding
#' @keywords algorithmic significance
#' @keywords kraft-mcmillian inequality
#' @keywords markov inequality
#' @examples
#' mle.getEncodingLength(int patientNum, matrix data.pvals, int kmax)
mle.getEncodingLength <- function(ig, path, pvals, ptID) {
  ptFiles <- list.files(path)
  load(sprintf("%s/%s-Bitstrings.RData", path, ptID))
  results <- data.frame(patientID=character(), optimalBS=character(), subsetSize=integer(), opt.T=integer(), varPvalue=numeric(),
                        fishers.Info=numeric(), IS.null=numeric(), IS.alt=numeric(), d.score=numeric(), stringsAsFactors = FALSE)
  row <- 1
  for (k in 2:kmax) {
    optBS <- optimalBitString[[k]]
    #print(sprintf("PatientID = %s, Mets perturbed: %s", ptIDs[patientNum], names(optBS)))
    mets.k <- unlist(strsplit(names(optBS), split="\\{"))[1]
    mets.k <- unlist(strsplit(mets.k, split="/"))
    mets.k <- gsub("\\*", "", mets.k)
    optBS <- as.numeric(unlist(strsplit(optBS, split="")))
    ind <- which(sapply(optBS, function(i) i==1))
    optBS <- optBS[1:ind[length(ind)]]
    p.k <- sum(optBS)

    # Entropy Metric. For bit string of first k out of S encoded nodes, do entropy metric. Then encode rest S-k nodes directly
    if (length(optBS)==1) {
      e = log2(length(igraphObjectG)) + iteratedLogarithm(length(optBS)) + log2(choose(length(igraphObjectG)-p.k, k-p.k))
    } else {
      e = log2(length(igraphObjectG)) + iteratedLogarithm(length(optBS)) + 1 + (length(optBS)-1)*entropyFunction(optBS[2:length(optBS)]) + log2(choose(length(igraphObjectG)-p.k, k-p.k))
    }

    optBS.tmp <- gsub("1", "T", paste(as.character(optBS), collapse=""))
    results[row, "patientID"] <- ptID
    results[row, "optimalBS"] <- optBS.tmp
    results[row, "subsetSize"] <- k
    results[row, "opt.T"] <- p.k
    results[row, "varPvalue"] <- paste(format(pvals[ptID, mets.k], digits=2, width=3), collapse="/")
    results[row, "fishers.Info"] <- -log2(fishersMethod(pvals[ptID,mets.k]))
    results[row, "IS.null"] <- log2(choose(length(V(ig)$name), k))
    results[row, "IS.alt"] <- e
    results[row, "d.score"] <- log2(choose(length(V(ig)$name), k)) - e
    row <- row + 1
  }
  return (results)
}
lashmore/MolEndoMatch documentation built on May 5, 2019, 8:02 p.m.