R/calcyt_mzrolldb.R

Defines functions calcyt_mzrolldb

Documented in calcyt_mzrolldb

#' mzrollDB to dataframe for Cytoscape
#'
#' This function extracts features, measurments, and samples from mzrollDB list as dataframes, maps IDs between mzrollDB and database, and reformats the dataframe to merge with dataframe of nodes/edges from Cytoscape
#'
#' @param x mzrollDB list that is already processed with claman::process_mzroll_multi function
#' @param y a character variable with names of samples in the desired order to plot
#' @param z database with metabolites' names and IDs as a dataframe
#'
#' @return dataframe with experimental data ready to merge with dataframe of nodes/edges from Cytoscape
#' @export
#'
#' @examples
#' x <- mzrolldb_X0208
#' y <- c("control_1", "control_2", "control_3", "treatment_1", "treatment_2", "treatment_3",)
#' z <- metabolites_database
#' calcyt_mzrolldb(x,y,z)
calcyt_mzrolldb <- function(x,y,z)
{
  #extract features from mzrollDB
  features_mzrolldb <- x$features

  #map IDs between mzrollDB and database
  features_ids <- left_join(features_mzrolldb, z, by = c("compoundName" = "Query"))

  #extract data from mzrollDB
  measurements_mzrolldb <- x$measurements

  colnames(measurements_mzrolldb) <- c("groupId","sampleId","log2","c_log2")

  #extract samples info from mzrollDB
  samples_mzrolldb <- x$samples

  samples_mzrolldb$conditions <- gsub('.{3}$', '', samples_mzrolldb$`tube label`)

  samples_mzrolldb <- samples_mzrolldb %>%
    arrange(factor(conditions, levels = y))

  #join data with samples info from mzrollDB
  require(dplyr)
  measurements_mzrolldb <- left_join(measurements_mzrolldb, samples_mzrolldb %>% dplyr::select(sampleId, `tube label`, group, conditions), by = c("sampleId" = "sampleId"))

  #order samples
  measurements_mzrolldb$conditions <- factor(measurements_mzrolldb$conditions, levels = y)
  measurements_mzrolldb <- measurements_mzrolldb %>% arrange(factor(conditions, levels = y))
  samples_string <- unique(measurements_mzrolldb$`tube label`)
  measurements_mzrolldb$`tube label` <- factor(measurements_mzrolldb$`tube label`, levels = samples_string)
  measurements_mzrolldb <- measurements_mzrolldb[,-which(names(measurements_mzrolldb) %in% c("log2","sampleId","group","conditions"))]

  #pivot data from long to wide format
  measurements_mzrolldb_wide <- measurements_mzrolldb %>% spread(key = "tube label", value = "c_log2")

  #join data with feature info
  measurements_ids <- left_join(features_ids, measurements_mzrolldb_wide, by = c("groupId" = "groupId"))
  return(measurements_ids)
}
delfarahalireza/calcyt documentation built on April 28, 2022, 12:05 a.m.