# Supplemental functions not-exported
#' Matching between unq_ids and mblid for metabolomics to match with 16S data
#' @description Matching unq_ids and mblids for data. First it searches for missing mblids in the
#' data set and capture their unq_ids. Then it subsets the reference according to the unq_ids
#' found. Then it selects the time period based upon the timelabel. For every row of data we check
#' if their unq_ids is in the subsetted reference's unq_id, then we grab the indices of those ones.
#' If there are more than one unq_id, we select the one with the highest read count. If not, we
#' grab the mblid and inputted it in.
#' @param reference The data frame containing references
#' @param data The data frame of the metabolomic data frame of interest
#' @param timelabel The label of time point of interest
missing.imputation <- function(reference,data,timelabel){
message("Imputing ids based on reference...")
# what indexes where missing
missing.index <- which(data$mblid == "")
no.mblid.index <- data$unq_id[missing.index]
#find entries with unq_ids identified prior, with the appropriate time period label.
subsetted.reference <- subset(reference, rlang::.data$unq_id %in% no.mblid.index)
subsetted.reference <- subset(subsetted.reference, rlang::.data$TimePeriod == timelabel)
subsetted.reference <- subset(subsetted.reference, rlang::.data$specimen_type == "Stool")
# extracting appropriate mblid and assign it to the data
for (i in 1:nrow(data)){
if (data$unq_id[i] %in% subsetted.reference$unq_id){
index <- which(subsetted.reference$unq_id == data$unq_id[i])
if (length(index) > 1){
index <- index[which.max(subsetted.reference$SequenceCount[index])]
}
data$mblid[i] <- subsetted.reference$mblid[index]
}
}
message("Check if there are remaining samples with missing ids...")
missing <- data$unq_id[which(data$mblid == "")]
message(paste("Number of samples removed due to inability to find corresponding mblids", length(missing)))
data <- data[-which(data$unq_id %in% missing),]
return(data)
}
#' @title Matching functions between two data sets
#' @description A function to match between two data sets (either metabolomics and taxonomic or
#' between metabolomic data sets). For matching between metabolomics and taxonomic data, translated
#' mblids are used. For matching between complimentary metabolomics data sets, the unq_ids are used.
#' @param data.1 The first data frame to be inputted, what it is depends on the variable `type`
#' @param data.2 The second data frame to be inputted, what it is depends on the variable `type`
#' @param type Which matching to be done. If `type == "cross"` then it is matching between metabolomics
#' (`data.1`)and taxonomic (`data.2`) data sets. If `type == "same"` it's between two metabolomics data
#' sets.
mctools.matching <- function(data.1, data.2, type){
if (type == "cross"){
metabo.data <- data.1
tax.data <- data.2
mblid <- metabo.data$mblid
index <- match(mblid, rownames(tax.data))
unmatch <- which(is.na(index))
message(paste("The number of unmatched samples is", length(unmatch)))
tax.index <- as.vector(stats::na.omit(index))
list <- list(tax.index, unmatch)
names(list) <- c("tax.index","metab.unmatch")
} else if (type == "same"){
metabo.data.1 <- data.1
metabo.data.2 <- data.2
id.1 <- unique(metabo.data.1$unq_id)
id.2 <- unique(metabo.data.2$unq_id)
# getting matches by unq_id
match_unq_id <- match(id.1, id.2)
match_unq_1 <- as.numeric(stats::na.omit(match_unq_id))
match_unq_2 <- which(!is.na(match_unq_id))
# extracting appropriate metabolomic profiles
list <- list(match_unq_1, match_unq_2)
names(list) <- c("match.index.1", "match.index.2")
}
return(list)
}
#' @title Extracting data sets from index after matching
#' @description A function to simplify the extraction process for multiple time points after
#' getting the extraction indices from `mctools.matching`.
#' @param metabo Metabolic data set
#' @param tax Taxonomic data set
extraction <- function(metabo, tax){
index <- mctools.matching(data.1 = metabo, data.2 = tax, type = "cross")
tax.index <- index$tax.index
metab.index <- index$metab.unmatch
metabo <- metabo[-metab.index,]
tax <- tax[tax.index,]
list <- list(metabo, tax)
return(list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.