R/runLioness.R

Defines functions runLioness

Documented in runLioness

#' Run python implementation of LIONESS
#'
#' \strong{LIONESS}(Linear Interpolation to Obtain Network Estimates for Single Samples) is a method to estimate sample-specific regulatory networks.
#'  \href{https://arxiv.org/abs/1505.06440}{[(LIONESS arxiv paper)])}.
#'
#' @param e Character String indicatining the file path of expression values file, as each gene (row) by samples (columns) \emph{required}
#' @param m Character String indicatining the file path of pair file of motif edges,
#'          when not provided, analysis continues with Pearson correlation matrix. \emph{optional}
#' @param ppi Character String indicatining the pair file path of Protein-Protein interaction dataset. \emph{optional}
#' @param rm_missing Boolean indicatining whether to remove missing values. If TRUE, removes missing values.
#'         if FALSE, keep missing values. THe default value is FALSE. \emph{optional}
#'
#' @return A data frame with columns representing each sample, rows representing the regulator-target pair in PANDA network generated by \code{\link{runPanda}}. 
#'         Each cell filled with the related score, representing the estimated contribution of a sample to the aggregate network.

#'
#' @examples
#' # refer to the input datasets files of control in inst/extdat as example
#' control_expression_file_path <- system.file("extdata", "expr10.txt", package = "netZoo", mustWork = TRUE)
#' motif_file_path <- system.file("extdata", "chip.txt", package = "netZoo", mustWork = TRUE)
#' ppi_file_path <- system.file("extdata", "ppi.txt", package = "netZoo", mustWork = TRUE)
#' 
#' # Run PANDA algorithm
#' control_lioness_result <- runLioness(e = control_expression_file_path, m = motif_file_path, ppi = ppi_file_path, rm_missing = TRUE )
#' 
#' @import reticulate
#' @export
runLioness <- function(e = expression, m = motif, ppi = ppi, rm_missing = FALSE){
  
  if(missing(e)){
    stop("Please provide the gene expression value with option e, e.g. e=\"expression.txt\"") }
  else{ str1 <- paste("\'", e, "\'", sep = '') }
  
  if(missing(m)){
    str2 <-  paste('None')
    message("Pair file of motif edges is not provided, analysis continues with Pearson correlation matrix.") }
  else{ str2 <- paste("\'", m, "\'", sep = '') }
  
  if(missing(ppi)){
    str3 <- paste('None')
    message("No PPI provided.") }
  else{ str3 <- paste("\'", ppi, "\'", sep = '') }
  
  if(rm_missing == FALSE){
    str4 <- paste('False')
    message("Miss the value of options rm_missing, using the default value FALSE, i.e. Not removing missing values ") }
  else { str4 <- paste('True') }
  
  # source the panda.py and lioness.py from GitHub raw website.
  reticulate::source_python("https://raw.githubusercontent.com/twangxxx/pypanda/master/pypanda/panda.py",convert = TRUE)
  reticulate::source_python("https://raw.githubusercontent.com/twangxxx/pypanda/master/pypanda/lioness.py",convert = TRUE)
  # run py code to create an instance named "p" of Panda Class 
  str <-  paste("p=Panda(", str1, ",", str2,",", str3, ",", str4, ")", sep ='')
  py_run_string(str)
  # assign a with the output PANDA network
  py_run_string(paste("a=p.export_panda_results"))
  panda_net <- py$a
  
  # create an instance named "l" of Lioness Class.
  py_run_string(paste("l = Lioness(p)"))
  # call method "export_lioness_result" of instance "l" to assign varible "b" with the PANDA output in pd.DataFrame.
  py_run_string(paste("b = l.export_lioness_results"))
  # convert the python varible "b" to a data.frame in R enviroment.
  lioness_net <- py$b
  # cbind the first two columns of PANDA output with LIONESS output.
  lioness_output <- cbind(panda_net[,c(1,2)], lioness_net)
  return(lioness_output)
}
twangxxx/netZoo documentation built on May 17, 2019, 1:02 p.m.