##' Read raw MAVEN output
##'
##' @param data.fn Filename (including path) of the data file
##' @param exp.vars Not yet implemented
##' @param key.fn Filename (including path) of the key file
##' @param id.cols Character vector containing all of the "id.columns" that come with xcms output
##' @param numeric.sample.names CURRENTLY NOT IMPLEMENTED. If sample names begin with numbers, this must be set TRUE. If SOME but not ALL sample names are numeric, this will fail
##' @param rep.name Name of the "replicate" variable in the sample key, if it exists # IMPLEMENT REP FINDER
##' @export
read_MAVEN <- function(data.fn,
key.fn,
#id.cols = NULL,
id.cols = quos(label, metaGroupId, groupId, goodPeakCount, medMz, medRt, maxQuality, note, compound,compoundId,expectedRtDiff, ppmDiff, parent),
cols = quos(mpg, cyl),
numeric.sample.names=TRUE,
sample.name = rlang::quo(sample), # currently this requires that sample.name is sample
# rep.name="replicate")
rep.name = rlang::quo(replicate)) {
# Read the raw data
d <- readr::read_csv(data.fn, na="") # In the future this should ensure that numeric columns are numeric
# Someday I would like to implement a column name parser, but I won't do this just yet
# Read the sample key
key <- readr::read_csv(key.fn)
# Get the 'experimental variables' (all variables in key other than sample name)
exp.vars = exp_var_finder(key)
# Strip out blank lines from the sample key
key <- dplyr::filter(key, (!is.na(sample) & sample != "X"))
# Determine experimental variables
# Melt the data frame
browser()
### THIS NEEDS TO BE EVERYTHING BUT id.cols, not id.cols
dm <- tidyr::gather(d, key=sample, value = ion.count, .dots=!!!id.cols)
# Remove the rows where everything is NA
dm <- dplyr::filter(dm, !is.na(ion.count))
#browser()
# Check for mismatch between key names and sample names
# WTF is this doing - not the right thing, I think
if(sum(unique(dm$sample) %in% unique(key$sample)) < length(unique(dm$sample))) {
missing.from.key <- unique(key$sample[!(key$sample %in% dm$sample)]) # Looks for samples in dataset that aren't in key
missing.from.key.single <- do.call(paste, as.list(missing.from.key))
missing.from.dm <- unique(dm$sample[!(dm$sample %in% key$sample)]) # Looks for samples in key that aren't in dataset
missing.from.dm.single <- do.call(paste, as.list(missing.from.dm))
warning(paste("Some samples listed in the raw dataset are not listed in the sample key. \n
The following are missing from the sample key:\n",
missing.from.key.single,
"\nThe following are missing from the dataset:\n",
missing.from.dm.single))
}
# Merge key with data
d_merge <- dplyr::full_join(dm, key, by=!!sample.name)
# Set replicate value to factor
if(quo_name(rep.name) %in% exp.vars) {
d_merge <- d_merge %>% select(!!rep.name) %>% as.factor()
d_merge[ , rep.name] <- as.factor(d_merge[ , rep.name])
}
list(raw_data=d_merge, exp.var=exp.var)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.