#' @title geo_load_eset
#'
#' @description Helper function to load expression sets from GEO.
#'
#' @param name A character string. The name of the GEO study.
#' @param datadir A character string. The path to the directory to store
#' the downloaded GEO datasets.
#' @param targetcolname A character string. The name of the column in the
#' GEO expression set's pheno data, holding the variable of interest
#' (mapping information).
#' @param targetlevelname A character string. The name of the the targets in
#' `targetcolname` (mapping information).
#' @param controllevelname A character string. The name of the the controls in
#' `targetcolname` (mapping information).
#' @param targetcol A character string. The columname of `sample_metadata`
#' holding the targets (default: "target"; caution: this should not
#' be changed!).
#' @param targetname A character string. Name of the the targets,
#' specified in the 'target' column of `sample_metadata` (default: "Target";
#' caution: this should not be changed!).
#' @param controlname A character string. Name of the the controls,
#' specified in the 'target' column of `sample_metadata` (default: "Control";
#' caution: this should not be changed!).
#' @param use_rawdata A logical. Indicates, if raw the data is downloaded
#' in CEL file format, uncompressed and subsequently normalized with a GCRMA
#' normalization, or if the already normalized datasets from GEO are
#' downloaded (default: FALSE).
#' @param setid A integer. Indicates, which set should be used by the software
#' for subsequent analyses (default: 1).
#'
#' @export
geo_load_eset <- function(name,
datadir,
targetcolname,
targetlevelname,
controllevelname,
targetcol,
targetname,
controlname,
use_rawdata = FALSE,
setid = 1) {
# original GEO data
eset <- GEOquery::getGEO(
name,
destdir = datadir,
AnnotGPL = TRUE
)[[setid]]
# rename targetcol
colnames(Biobase::pData(eset))[which(
colnames(Biobase::pData(eset)) == targetcolname
)] <- targetcol
if (!is.null(targetlevelname)) {
# split levels, if more than one is provided for each targetlevelname
# and controllevelname (recognized by '|||')
if (grepl("(\\|){3}", targetlevelname)) { # nolint
targetlevelname <- strsplit(
targetlevelname, split = "|||", fixed = T
)[[1]]
}
if (!any(targetlevelname %in% unique(
as.character(Biobase::pData(eset)[[targetcol]])
))) {
stop(paste0("\nLevel(s) provided with targetlevelname are ",
"not present in your data.\n"))
}
}
if (!is.null(controllevelname)) {
if (grepl("(\\|){3}", controllevelname)) { # nolint
controllevelname <- strsplit(
controllevelname, split = "|||", fixed = T
)[[1]]
}
if (!any(controllevelname %in% unique(
as.character(Biobase::pData(eset)[[targetcol]])
))) {
stop(paste0("\nLevel(s) provided with controllevelname are ",
"not present in your data.\n"))
}
}
# reduce pheno data to hold only cases of which we have
# targetcolumname and controllevelname
Biobase::pData(eset) <- Biobase::pData(eset)[which(
Biobase::pData(eset)[[targetcol]] %in%
c(targetlevelname, controllevelname)), ]
# rename levels of targetcol
if (!is.null(targetlevelname)) {
t_levelnames <- rep(targetname, length(targetlevelname))
names(t_levelnames) <- targetlevelname
} else {
t_levelnames <- NULL
}
if (!is.null(controllevelname)) {
c_levelnames <- rep(controlname, length(controllevelname))
names(c_levelnames) <- controllevelname
} else {
c_levelnames <- NULL
}
levelnames <- c(t_levelnames, c_levelnames)
eset[[targetcol]] <- plyr::revalue(eset[[targetcol]], levelnames)
if (isTRUE(use_rawdata)) {
eset <- geo_use_raw_data(
eset = eset,
name = name,
datadir = datadir
)
}
# assign eset to the global env
global_env_hack(
key = name,
val = eset,
pos = 1L
)
# return eset as well
return(eset)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.