## All ----
#### subsetting (MultiAssayExperiment) ####
#' @rdname subsetting
#' @export
setMethod("subsetting", signature(x = "MultiAssayExperiment"),
function(x,
set.c = NULL,
genes.vc = "all",
sex.vc = "all",
tissues.vc = "all",
common_samples.l = FALSE,
na_thresh.n = 0.2,
var_thresh.n = .Machine$double.eps,
imputed_thresh.n = 0.2) {
if (length(genes.vc) == 1 && genes.vc == "all")
genes.vc <- c("WT", ProMetIS::genes.vc())
stopifnot(all(genes.vc %in% c("WT", ProMetIS::genes.vc())))
if (length(sex.vc) == 1 && sex.vc == "all")
sex.vc <- ProMetIS::sex.vc()
stopifnot(all(sex.vc %in% ProMetIS::sex.vc()))
stopifnot(length(tissues.vc) == 1)
if (tissues.vc != "all") {
stopifnot(tissues.vc %in% ProMetIS::tissues.vc())
x <- x[, , names(x)[names(x) %in% c("preclinical",
grep(tissues.vc,
ProMetIS::sets.vc(),
value = TRUE))]]
}
if (common_samples.l) {
x <- MultiAssayExperiment::intersectColumns(x)
}
samp.DF <- MultiAssayExperiment::colData(x)
samp_sel.vc <- rownames(samp.DF[samp.DF[, "gene"] %in% genes.vc &
samp.DF[, "sex"] %in% sex.vc, ])
sub.mae <- x[, samp_sel.vc, ]
sub.DF <- MultiAssayExperiment::colData(sub.mae)
sub_sets.vc <- names(sub.mae)
for (set.c in sub_sets.vc) {
se <- sub.mae[[set.c]]
SummarizedExperiment::colData(se)[, "gene"] <- sub.DF[colnames(se), "gene"]
SummarizedExperiment::colData(se)[, "sex"] <- sub.DF[colnames(se), "sex"]
se <- ProMetIS:::subsetting(x = se,
set.c = set.c,
genes.vc = genes.vc,
sex.vc = sex.vc,
tissues.vc = NULL,
common_samples.l = NULL,
na_thresh.n = na_thresh.n,
var_thresh.n = var_thresh.n,
imputed_thresh.n = imputed_thresh.n)
SummarizedExperiment::colData(se)[, "gene"] <- NULL
SummarizedExperiment::colData(se)[, "sex"] <- NULL
sub.mae[[set.c]] <- se
}
sub.mae <- sub.mae[, , sub_sets.vc]
invisible(sub.mae)
})
#### subsetting (SummarizedExperiment) ####
#' @rdname subsetting
#' @export
setMethod("subsetting", signature(x = "SummarizedExperiment"),
function(x,
set.c,
genes.vc = "all",
sex.vc = "all",
tissues.vc = NULL,
common_samples.l = NULL,
na_thresh.n = 0.2,
var_thresh.n = .Machine$double.eps,
imputed_thresh.n = 0.2) {
if (length(genes.vc) == 1 && genes.vc == "all")
genes.vc <- ProMetIS::wtgenes.vc()
stopifnot(all(genes.vc %in% ProMetIS::wtgenes.vc()))
if (length(sex.vc) == 1 && sex.vc == "all")
sex.vc <- ProMetIS::sex.vc()
stopifnot(all(sex.vc %in% ProMetIS::sex.vc()))
samples.vc <- colnames(x)
if (!("gene" %in% colnames(SummarizedExperiment::colData(x))))
stop("No 'gene' column found in the colData of the SummarizedExperiment")
if (!("sex" %in% colnames(SummarizedExperiment::colData(x))))
stop("No 'sex' column found in the colData of the SummarizedExperiment")
samples_sel.vl <- SummarizedExperiment::colData(x)[, "gene"] %in% genes.vc &
SummarizedExperiment::colData(x)[, "sex"] %in% sex.vc
x <- x[, samples.vc[samples_sel.vl]]
filter.vi <- c(nas_zerovar = NA_integer_,
overimputed = NA_integer_)
# NAs <= 20% and variance >= 1e-5
if (length(genes.vc) > 1) {
class.c <- "gene"
} else if (length(sex.vc) > 1) {
class.c <- "sex"
} else
stop("Only a single gene and sex selected.")
filtered.se <- phenomis::filtering(x,
class.c = class.c,
max_na_prop.n = na_thresh.n,
min_variance.n = var_thresh.n)
na_zerovar_sel.vl <- rownames(x) %in% rownames(filtered.se)
filter.vi["nas_zerovar"] <- sum(!na_zerovar_sel.vl)
# proteomics: observations >= 80% in at least one condition
if (grepl("proteomics", set.c)) {
overimputed_sel.vl <- ProMetIS:::filter_overimputed(x = x,
set.c = set.c,
genes.vc = genes.vc,
sex.vc = sex.vc,
imputed_thresh.n = imputed_thresh.n)
} else
overimputed_sel.vl <- rep(TRUE, dim(x)[1])
filter.vi["overimputed"] <- sum(!overimputed_sel.vl)
# intersection of both conditions
feat_sel.vl <- na_zerovar_sel.vl & overimputed_sel.vl
stopifnot(length(feat_sel.vl) == dim(x)[1] &&
!any(is.na(feat_sel.vl)))
x <- x[feat_sel.vl, ]
# if (sum(!feat_sel.vl))
message("Nb of discard. feat. in '", set.c, "': ",
paste(paste0(names(filter.vi), ": ", filter.vi), collapse = ", "))
invisible(x)
})
#### subsetting (MultiDataSet) ####
#' @rdname subsetting
#' @export
setMethod("subsetting", signature(x = "MultiDataSet"),
function(x,
set.c = NULL,
genes.vc = "all",
sex.vc = "all",
tissues.vc = "all",
common_samples.l = FALSE,
na_thresh.n = 0.2,
var_thresh.n = .Machine$double.eps,
imputed_thresh.n = 0.2) {
if (length(genes.vc) == 1 && genes.vc == "all")
genes.vc <- c("WT", ProMetIS::genes.vc())
stopifnot(all(genes.vc %in% c("WT", ProMetIS::genes.vc())))
if (length(sex.vc) == 1 && sex.vc == "all")
sex.vc <- ProMetIS::sex.vc()
stopifnot(all(sex.vc %in% ProMetIS::sex.vc()))
stopifnot(length(tissues.vc) == 1)
if (tissues.vc != "all") {
stopifnot(tissues.vc %in% ProMetIS::tissues.vc())
x <- x[, names(x)[names(x) %in% c("preclinical",
grep(tissues.vc,
ProMetIS::sets.vc(),
value = TRUE))]]
}
if (common_samples.l) {
x <- MultiDataSet::commonSamples(x)
ref.eset <- x[[names(x)[1]]]
} else if ("preclinical" %in% names(x)) {
ref.eset <- x[["preclinical"]]
} else {
sample_names.ls <- Biobase::sampleNames(x)
all_samples.vc <- Reduce("union", sample_names.ls)
ref_eset.i <- which.max(unlist(lapply(Biobase::sampleNames(x),
function(sample_names.vc)
sum(sample_names.vc %in% all_samples.vc))))[1]
ref.eset <- x[[names(x)[[ref_eset.i]]]]
}
samples.vc <- Biobase::sampleNames(ref.eset)
samples_sel.vl <- Biobase::pData(ref.eset)[, "gene"] %in% genes.vc &
Biobase::pData(ref.eset)[, "sex"] %in% sex.vc
sub.mset <- x[samples.vc[samples_sel.vl], ]
sub_sets.vc <- names(sub.mset)
for (set.c in sub_sets.vc) {
eset <- sub.mset[[set.c]]
eset <- subsetting(x = eset,
set.c = set.c,
genes.vc = genes.vc,
sex.vc = sex.vc,
tissues.vc = NULL,
common_samples.l = NULL,
na_thresh.n = na_thresh.n,
var_thresh.n = var_thresh.n,
imputed_thresh.n = imputed_thresh.n)
sub.mset <- MultiDataSet::add_eset(sub.mset,
eset,
dataset.type = set.c,
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
}
sub.mset <- sub.mset[, sub_sets.vc]
invisible(sub.mset)
})
#### subsetting (ExpressionSet) ####
#' @rdname subsetting
#' @export
setMethod("subsetting", signature(x = "ExpressionSet"),
function(x,
set.c,
genes.vc = "all",
sex.vc = "all",
tissues.vc = NULL,
common_samples.l = NULL,
na_thresh.n = 0.2,
var_thresh.n = .Machine$double.eps,
imputed_thresh.n = 0.2) {
if (length(genes.vc) == 1 && genes.vc == "all")
genes.vc <- ProMetIS::wtgenes.vc()
stopifnot(all(genes.vc %in% ProMetIS::wtgenes.vc()))
if (length(sex.vc) == 1 && sex.vc == "all")
sex.vc <- ProMetIS::sex.vc()
stopifnot(all(sex.vc %in% ProMetIS::sex.vc()))
samples.vc <- Biobase::sampleNames(x)
samples_sel.vl <- Biobase::pData(x)[, "gene"] %in% genes.vc &
Biobase::pData(x)[, "sex"] %in% sex.vc
x <- x[, samples.vc[samples_sel.vl]]
filter.vi <- c(nas_zerovar = NA_integer_,
overimputed = NA_integer_)
# NAs <= 20% and variance >= 1e-5
if (length(genes.vc) > 1) {
class.c <- "gene"
} else if (length(sex.vc) > 1) {
class.c <- "sex"
} else
stop("Only a single gene and sex selected.")
filtered.eset <- phenomis::filtering(x,
class.c = class.c,
max_na_prop.n = na_thresh.n,
min_variance.n = var_thresh.n)
na_zerovar_sel.vl <- Biobase::featureNames(x) %in% Biobase::featureNames(filtered.eset)
# na_zerovar_sel.vl <- ProMetIS:::.filter_na_zerovar(t(Biobase::exprs(x)),
# na_thresh.n = na_thresh.n,
# var_thresh.n = var_thresh.n)
filter.vi["nas_zerovar"] <- sum(!na_zerovar_sel.vl)
# proteomics: observations >= 80% in at least one condition
if (grepl("proteomics", set.c)) {
overimputed_sel.vl <- ProMetIS:::filter_overimputed(x = x,
set.c = set.c,
genes.vc = genes.vc,
sex.vc = sex.vc,
imputed_thresh.n = imputed_thresh.n)
} else
overimputed_sel.vl <- rep(TRUE, dim(x)["Features"])
filter.vi["overimputed"] <- sum(!overimputed_sel.vl)
# intersection of both conditions
feat_sel.vl <- na_zerovar_sel.vl & overimputed_sel.vl
stopifnot(length(feat_sel.vl) == dim(x)["Features"] &&
!any(is.na(feat_sel.vl)))
x <- x[feat_sel.vl, ]
# if (sum(!feat_sel.vl))
message("Nb of discard. feat. in '", set.c, "': ",
paste(paste0(names(filter.vi), ": ", filter.vi), collapse = ", "))
invisible(x)
})
#### filter_overimputed (SummarizedExperiment) ####
#' @rdname filter_overimputed
setMethod("filter_overimputed", signature(x = "SummarizedExperiment"),
function(x,
set.c,
genes.vc = "LAT",
sex.vc = c("M", "F"),
imputed_thresh.n = 0.2) {
imputed.mi <- ProMetIS:::imputation_info(x = x, set.c = set.c)
stopifnot(identical(colnames(x), colnames(imputed.mi)))
if (length(genes.vc) >= 2) {
# general case: no restriction about sex
# or e.g. LAT vs WT on males (or females) only
factor.fc <- factor(SummarizedExperiment::colData(x)[, "gene"],
levels = ProMetIS::wtgenes.vc()[ProMetIS::wtgenes.vc() %in% genes.vc])
} else if (length(genes.vc) == 1 && length(sex.vc) == 2) {
# males vs females on LAT (or WT) only
factor.fc <- factor(SummarizedExperiment::colData(x)[, "sex"],
levels = ProMetIS::sex.vc())
} else
stop("The corresponding imputation metric for this combination of genotype(s) and gender(s) is not currently available.",
call. = FALSE)
imputed_factor.mi <- t(apply(imputed.mi, 1, function(var.vn) {
tapply(var.vn, factor.fc, sum)
}))
imputed_prop.mn <- sweep(imputed_factor.mi, 2, table(factor.fc), "/")
imputed.ml <- imputed_prop.mn >= imputed_thresh.n
imputed.vn <- rowSums(imputed.ml, na.rm = TRUE)
feat_sel.vl <- imputed.vn <= 1
stopifnot(length(feat_sel.vl) == dim(x)[1] &&
!any(is.na(feat_sel.vl)))
return(invisible(feat_sel.vl))
})
#### filter_overimputed (ExpressionSet) ####
#' @rdname filter_overimputed
setMethod("filter_overimputed", signature(x = "ExpressionSet"),
function(x,
set.c,
genes.vc = "LAT",
sex.vc = c("M", "F"),
imputed_thresh.n = 0.2) {
imputed.mi <- ProMetIS:::imputation_info(x = x, set.c = set.c)
stopifnot(identical(Biobase::sampleNames(x), colnames(imputed.mi)))
if (length(genes.vc) >= 2) {
# general case: no restriction about sex
# or e.g. LAT vs WT on males (or females) only
factor.fc <- factor(Biobase::pData(x)[, "gene"],
levels = ProMetIS::wtgenes.vc()[ProMetIS::wtgenes.vc() %in% genes.vc])
} else if (length(genes.vc) == 1 && length(sex.vc) == 2) {
# males vs females on LAT (or WT) only
factor.fc <- factor(Biobase::pData(x)[, "sex"],
levels = ProMetIS::sex.vc())
} else
stop("The corresponding imputation metric for this combination of genotype(s) and gender(s) is not currently available.",
call. = FALSE)
imputed_factor.mi <- t(apply(imputed.mi, 1, function(var.vn) {
tapply(var.vn, factor.fc, sum)
}))
imputed_prop.mn <- sweep(imputed_factor.mi, 2, table(factor.fc), "/")
imputed.ml <- imputed_prop.mn >= imputed_thresh.n
imputed.vn <- rowSums(imputed.ml, na.rm = TRUE)
feat_sel.vl <- imputed.vn <= 1
stopifnot(length(feat_sel.vl) == dim(x)["Features"] &&
!any(is.na(feat_sel.vl)))
return(invisible(feat_sel.vl))
})
#### imputation_info (SummarizedExperiment) ####
#' @rdname imputation_info
setMethod("imputation_info", signature(x = "SummarizedExperiment"),
function(x,
set.c) {
prot_pda.df <- SummarizedExperiment::colData(x)
prot_fda.df <- SummarizedExperiment::rowData(x)
load(system.file("extdata/2_post_processed/metadata_supp.rdata", package = "ProMetIS"))
supp_pda.df <- metadata_supp.ls[[set.c]][["pdata"]]
supp_fda.df <- metadata_supp.ls[[set.c]][["fdata"]]
stopifnot(all(rownames(prot_pda.df) %in% rownames(supp_pda.df)))
stopifnot(all(rownames(prot_fda.df) %in% rownames(supp_fda.df)))
prot_pda.df <- cbind.data.frame(prot_pda.df, supp_pda.df[rownames(prot_pda.df), , drop = FALSE])
prot_fda.df <- cbind.data.frame(prot_fda.df, supp_fda.df[rownames(prot_fda.df), , drop = FALSE])
## checking that the sample names are ordered by increasing ID
# prot_samp.vi <- as.integer(substr(rownames(prot_pda.df), 2, 4))
# stopifnot(identical(prot_samp.vi, sort(prot_samp.vi)))
## getting imputation info
value_origin.vl <- vapply(colnames(prot_fda.df), function(colname.c) {
colname_split.vc <- unlist(strsplit(colname.c, split = "_"))
grepl("^OriginOfValueabundance", colname.c) &
colname_split.vc[length(colname_split.vc)] %in% rownames(prot_pda.df)
}, FUN.VALUE = logical(1))
value_origin.df <- prot_fda.df[, value_origin.vl]
colnames(value_origin.df) <- gsub("_run90methode30K",
"",
gsub("_mgf", "",
gsub("OriginOfValueabundance_", "",
colnames(value_origin.df))))
## re-ordering imputation info to match sample names
value_origin_samp.vc <- vapply(colnames(value_origin.df), function(colname.c) {
colname_split.vc <- unlist(strsplit(colname.c, split = "_"))
colname_split.vc[length(colname_split.vc)]}, FUN.VALUE = character(1))
temp <- value_origin_samp.vc
value_origin_samp.vc <- names(value_origin_samp.vc)
names(value_origin_samp.vc) <- temp
value_origin.df <- value_origin.df[, value_origin_samp.vc[rownames(prot_pda.df)]]
stopifnot(identical(names(value_origin_samp.vc[rownames(prot_pda.df)]), colnames(x)))
colnames(value_origin.df) <- colnames(x)
imputed.mi <- apply(value_origin.df, 2, DAPAR_is.MV)
mode(imputed.mi) <- "integer"
stopifnot(!any(is.na(c(imputed.mi))))
return(invisible(imputed.mi))
})
#### imputation_info (ExpressionSet) ####
#' @rdname imputation_info
setMethod("imputation_info", signature(x = "ExpressionSet"),
function(x,
set.c) {
prot_pda.df <- Biobase::pData(x)
prot_fda.df <- Biobase::fData(x)
load(file.path(ProMetIS::post_processed_dir.c(),
"metadata_supp.rdata"))
supp_pda.df <- metadata_supp.ls[[set.c]][["pdata"]]
supp_fda.df <- metadata_supp.ls[[set.c]][["fdata"]]
stopifnot(all(rownames(prot_pda.df) %in% rownames(supp_pda.df)))
stopifnot(all(rownames(prot_fda.df) %in% rownames(supp_fda.df)))
prot_pda.df <- cbind.data.frame(prot_pda.df, supp_pda.df[rownames(prot_pda.df), , drop = FALSE])
prot_fda.df <- cbind.data.frame(prot_fda.df, supp_fda.df[rownames(prot_fda.df), , drop = FALSE])
## checking that the sample names are ordered by increasing ID
# prot_samp.vi <- as.integer(substr(rownames(prot_pda.df), 2, 4))
# stopifnot(identical(prot_samp.vi, sort(prot_samp.vi)))
## getting imputation info
value_origin.vl <- vapply(colnames(prot_fda.df), function(colname.c) {
colname_split.vc <- unlist(strsplit(colname.c, split = "_"))
grepl("^OriginOfValueabundance", colname.c) &
colname_split.vc[length(colname_split.vc)] %in% rownames(prot_pda.df)
}, FUN.VALUE = logical(1))
value_origin.df <- prot_fda.df[, value_origin.vl]
colnames(value_origin.df) <- gsub("_run90methode30K",
"",
gsub("_mgf", "",
gsub("OriginOfValueabundance_", "",
colnames(value_origin.df))))
## re-ordering imputation info to match sample names
value_origin_samp.vc <- vapply(colnames(value_origin.df), function(colname.c) {
colname_split.vc <- unlist(strsplit(colname.c, split = "_"))
colname_split.vc[length(colname_split.vc)]}, FUN.VALUE = character(1))
temp <- value_origin_samp.vc
value_origin_samp.vc <- names(value_origin_samp.vc)
names(value_origin_samp.vc) <- temp
value_origin.df <- value_origin.df[, value_origin_samp.vc[rownames(prot_pda.df)]]
stopifnot(identical(names(value_origin_samp.vc[rownames(prot_pda.df)]), Biobase::sampleNames(x)))
colnames(value_origin.df) <- Biobase::sampleNames(x)
imputed.mi <- apply(value_origin.df, 2, DAPAR_is.MV)
mode(imputed.mi) <- "integer"
stopifnot(!any(is.na(c(imputed.mi))))
return(invisible(imputed.mi))
})
# select the main sample/feature metadata that will be kept exported in the .tsv files
# and the supplementary ones that will stored as a "metadata_supp.rdata" file
# See the details of the selected columns in ".sample_metadata_select" and
# ".variable_metadata_select" functions
metadata_select <- function(mset,
step.c) { # e.g. step.c = "2_post_processed"
if (is.null(names(mset))) { # preclinical expression set
preclinical.eset <- mset
mset <- MultiDataSet::createMultiDataSet()
mset <- MultiDataSet::add_eset(mset,
preclinical.eset,
dataset.type = "preclinical",
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
}
mset_names.vc <- names(mset)
metadata_supp_file.c <- paste0("../inst/extdata/", step.c, "/metadata_supp.rdata")
if (file.exists(metadata_supp_file.c)) {
load(metadata_supp_file.c)
} else {
metadata_supp.ls <- vector(mode = "list", length = length(ProMetIS::sets.vc()))
names(metadata_supp.ls) <- ProMetIS::sets.vc()
}
for (set.c in mset_names.vc) {
eset <- mset[[set.c]]
# sample metadata
pdata.df <- Biobase::pData(eset)
samplemeta.vc <- .sample_metadata_select(set.c)
samplemeta.vc <- samplemeta.vc[samplemeta.vc %in% colnames(pdata.df)]
samplemeta_supp.vc <- setdiff(colnames(pdata.df), samplemeta.vc)
if (length(samplemeta_supp.vc)) {
pdata_supp.df <- pdata.df[, samplemeta_supp.vc, drop = FALSE]
} else
pdata_supp.df <- data.frame()
Biobase::pData(eset) <- pdata.df[, samplemeta.vc]
# variable metadata
fdata.df <- Biobase::fData(eset)
variablemeta.vc <- .variable_metadata_select(fdata.df = fdata.df, set.c = set.c)
variablemeta.vc <- variablemeta.vc[variablemeta.vc %in% colnames(fdata.df)]
variablemeta_supp.vc <- setdiff(colnames(fdata.df), variablemeta.vc)
if (length(variablemeta_supp.vc)) {
fdata_supp.df <- fdata.df[, variablemeta_supp.vc, drop = FALSE]
} else
fdata_supp.df <- data.frame()
if (grepl("metabolomics", set.c)) {
annot_level.vc <- fdata.df[, "annot_level"]
annot_level.vc <- as.character(annot_level.vc)
annot_level_na.vi <- which(is.na(annot_level.vc) | annot_level.vc == "NA")
annot_level.vc[annot_level_na.vi] <- ""
fdata.df[, "annot_level"] <- annot_level.vc
monoiso_col.c <- "monoisotopic_mass"
if (monoiso_col.c %in% colnames(fdata.df)) {
fdata.df[, monoiso_col.c] <- as.character(fdata.df[, monoiso_col.c])
fdata.df[is.na(fdata.df[, monoiso_col.c]), monoiso_col.c] <- ""
}
}
Biobase::fData(eset) <- fdata.df[, variablemeta.vc]
stopifnot(methods::validObject(eset))
mset <- MultiDataSet::add_eset(mset,
eset,
dataset.type = set.c,
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
metadata_supp.ls[[set.c]] <- list(pdata = pdata_supp.df,
fdata = fdata_supp.df)
}
mset <- mset[, mset_names.vc]
save(metadata_supp.ls, file = metadata_supp_file.c)
message("Supplementary metadata written in:\n", metadata_supp_file.c)
return(invisible(mset))
}
.sample_metadata_select <- function(set.c) {
first.vc <- c("gene",
"mouse_id",
"sex")
if (set.c == "preclinical")
first.vc <- c(first.vc,
c("gene_name",
"mgi_id",
"variant",
"impc_id",
"impc_genotype",
"impc_phenotype"))
return(first.vc)
}
.variable_metadata_select <- function(fdata.df, set.c) {
# post-processing
if (set.c == "preclinical")
varmeta.vc <- c("category",
"measurement",
"category_full",
"transformation")
if (grepl("metabolomics", set.c))
varmeta.vc <- c("chromato",
"name",
"chebi_id",
"annot_level",
"annot_confidence")
if (grepl("proteomics", set.c))
varmeta.vc <- c("accession",
"description",
"uniprot_id")
# hypothesis testing
limma_col.vc <- grep("limma", colnames(fdata.df), value = TRUE)
if (length(limma_col.vc))
varmeta.vc <- c(varmeta.vc,
limma_col.vc)
# VIP
vip_col.vc <- grep("OPLSDA_VIP-pred", colnames(fdata.df), value = TRUE)
if (length(vip_col.vc))
varmeta.vc <- c(varmeta.vc,
vip_col.vc)
# Feature selection
biosign_col.vc <- grep("biosign_", colnames(fdata.df), value = TRUE)
if (length(biosign_col.vc))
varmeta.vc <- c(varmeta.vc,
biosign_col.vc)
# mixOmics
mixomics_col.vc <- grep("mixomics_", colnames(fdata.df), value = TRUE)
if (length(mixomics_col.vc))
varmeta.vc <- c(varmeta.vc,
mixomics_col.vc)
# complementary metabolomics annotation
if (grepl("metabolomics", set.c)) {
if (grepl("(hyper|hilic)", set.c))
varmeta.vc <- c(varmeta.vc,
"kegg_id",
"kegg_pathway_family",
"kegg_pathways",
"kegg_subpathways",
"hmdb_id",
"pubchem_id",
"formula",
"monoisotopic_mass",
"inchikey",
"inchi")
if (grepl("acquity", set.c))
varmeta.vc <- c(varmeta.vc,
"kegg_id",
"hmdb_id",
"id_comment")
varmeta.vc <- c(varmeta.vc,
"MT",
"mz",
"rt",
"isotopes",
"adduct",
"pcgroup",
"redund_group",
"redund_iso_add_frag")
}
return(varmeta.vc)
}
#' @export
abbrev_mset <- function(mset, full_to_short.l = TRUE) {
mset_names.vc <- names(mset)
abbrev.mset <- MultiDataSet::createMultiDataSet()
for (set.c in mset_names.vc) {
eset <- mset[[set.c]]
abbrev.c <- ProMetIS::sets_abbrev.vc(full_to_short.l = full_to_short.l)[set.c]
exp_data <- Biobase::experimentData(eset)
exp_data@title <- abbrev.c
Biobase::experimentData(eset) <- exp_data
stopifnot(methods::validObject(eset))
abbrev.mset <- MultiDataSet::add_eset(abbrev.mset,
eset,
dataset.type = abbrev.c,
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
}
abbrev.mset <- abbrev.mset[, ProMetIS::sets_abbrev.vc(full_to_short.l = full_to_short.l)[mset_names.vc]]
invisible(abbrev.mset)
}
.sets_abbrev.vc <- function(full_to_short.l = TRUE) {
abbrev.vc <- gsub("metabolomics", "met",
gsub("proteomics", "prot",
gsub("liver", "liv",
gsub("plasma", "plas",
gsub("hyper", "h",
gsub("acqui", "a",
gsub("hilic", "hil",
gsub("-pos", "-p",
gsub("-neg", "-n",
ProMetIS::sets.vc(), fixed = TRUE),
fixed = TRUE))))))))
full_to_short.vc <- abbrev.vc
names(full_to_short.vc) <- ProMetIS::sets.vc()
short_to_full.vc <- ProMetIS::sets.vc()
names(short_to_full.vc) <- abbrev.vc
if (full_to_short.l) {
return(full_to_short.vc)
} else {
return(short_to_full.vc)
}
}
## Proteomics ----
# Functions from the DAPAR R/Bioconductor package
DAPAR_is.OfType <- function(data, type)
{
return(type == data)
}
DAPAR_is.MV <- function(data)
{
POV = DAPAR_is.OfType(data, "POV")
MEC = DAPAR_is.OfType(data, "MEC")
isNA = is.na(data)
df <- POV | MEC | isNA
return(df)
}
## Metabolomics ----
.standards <- function() {
standard_file.c <- "//10.0.238.33/Data/Phenostore/data/ProMetIS/cs1_phenomin/0_raw/metabolomics_standards.xlsx"
standard.df <- as.data.frame(readxl::read_excel(standard_file.c,
sheet = 1,
skip = 4,
n_max = 17),
stringsAsFactors = FALSE)
colnames(standard.df) <- gsub("Maximum shift (mn)", "max_shift_mn",
gsub("Mass accuracy (ppm)", "mass_accur_ppm",
gsub("CV* (%)", "cv_percent",
gsub("Rt (min)", "rt_min",
colnames(standard.df),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE)
standard_colname_comp.df <- as.data.frame(readxl::read_excel(standard_file.c,
sheet = 1,
skip = 3,
n_max = 2),
stringsAsFactors = FALSE)
standard_colname_comp.vc <- gsub("Compound", "compound",
gsub("Elemental formula", "formula",
gsub("Exact Mass", "mass_exact",
gsub("EI+", "EI_pos",
gsub("EI-", "EI_neg",
gsub("Concentration", "concentration",
gsub("Retention time", "rt",
gsub("Mass measurement", "mass_measured",
colnames(standard_colname_comp.df),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE),
fixed = TRUE)
colnames(standard.df) <- paste0(vapply(standard_colname_comp.vc,
function(x) {
unlist(strsplit(x, split = "...", fixed = TRUE))[1]
}, FUN.VALUE = character(1), USE.NAMES = FALSE),
vapply(colnames(standard.df),
function(x) {
name.c <- unlist(strsplit(x, split = "...", fixed = TRUE))[1]
if (name.c != "")
name.c <- paste0("@", name.c)
if (name.c %in% c("@max_shift_mn", "@cv_percent"))
name.c <- paste0("rt", name.c)
return(name.c)
}, FUN.VALUE = character(1), USE.NAMES = FALSE))
colnames(standard.df)[1] <- "standard"
standard.df[, "ei_pos_mz"] <- as.numeric(vapply(standard.df[, "EI_pos"],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1)))
standard.df[, "ei_pos_c8_rt"] <- as.numeric(vapply(standard.df[, 8],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1))) * 60
standard.df[, "ei_neg_c8_rt"] <- as.numeric(vapply(standard.df[, 12],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1))) * 60
standard.df[, "ei_neg_mz"] <- as.numeric(vapply(standard.df[, "EI_neg"],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1)))
standard.df[, "ei_pos_hilic_rt"] <- as.numeric(vapply(standard.df[, 24],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1))) * 60
standard.df[, "ei_neg_hilic_rt"] <- as.numeric(vapply(standard.df[, 28],
function(x) unlist(strsplit(x, split = " "))[1],
FUN.VALUE = character(1))) * 60
standard.df <- standard.df[, c("standard",
"compound",
"formula",
"concentration",
"mass_exact",
"ei_pos_mz",
"ei_pos_c8_rt",
"ei_pos_hilic_rt",
"ei_neg_mz",
"ei_neg_c8_rt",
"ei_neg_hilic_rt")]
colnames(standard.df)[5:11] <- c("mass",
"mz_pos",
"rt_pos_c8",
"rt_pos_hilic",
"mz_neg",
"rt_neg_c8",
"rt_neg_hilic")
rownames(standard.df) <- standard.df[, "compound"]
standard.df <- standard.df[order(standard.df[, "standard"], standard.df[, "compound"]), ]
standard.df[, "color"] <- rev(rainbow(nrow(standard.df), end = 4/6))
rownames(standard.df) <- gsub("AMPA 2-amino-3-(3-hydroxy-5-methyl-isoxazol-4-yl)propanoic acid",
"AMPA",
gsub("MCPA 2-methyl-4-chlorophenoxyacetic acid", "MCPA",
rownames(standard.df), fixed = TRUE), fixed = TRUE)
standard.df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.