#' Description: Calculate species summaries and possibly update d.oligo2
#'
#' Arguments:
#' @param d.oligo2 d.oligo2
#' @param bgc.method background correction method
#' Returns:
#' @return Background-corrected data matrix
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords internal
oligo.bg.correction <- function (d.oligo2, bgc.method) {
if ( bgc.method == "6*sd bkg intensity" ){ bgth <- 6 }
d.oligo2 <- threshold.data(d.oligo2, bgth)
d.oligo2 <- apply(d.oligo2, c(1,2), function(x) max(0, x))
d.oligo2
}
# Database utilities for package-internal use only
#' Tests whether the database connection is a phyloarray connection.
#' Expands one element (one "field", "value" pair list) from a list
#' of "field", "value" pair lists
#'
#' @param con a MySQL database connection.
#' @return TRUE when the test succeeds. Otherwise a program halt.
#' @references See citation("microbiome")
#' @author Douwe Molenaar. Maintainer: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
phyloarrayConnection <- function (con) {
# microbiome::InstallMarginal("RMySQL")
if (!(class(con)=='MySQLConnection')) {
stop('Input must be a DBI connection to a phyloarray database')
}
essential <- c("array",
"arraydesign",
"arrayfeature",
"arrayhybridisations",
"featureextraction",
"featuremeasurement",
"hybridisation",
"image",
"oligo",
"oligoclass",
"oligotargetpair",
"phylogeny",
"project",
"sample",
"slide",
"target",
"taxon",
"taxonlevel",
"taxtotax")
if (!(length(intersect(dbListTables(con),essential))==length(essential))) {
stop('Essential tables missing in the connected database. Not a phyloarray database?')
}
return(TRUE)
}
#' Expands one element (one "field", "value" pair list) from a list of "field", "value" pair lists
#' @param elm TBA
#'
#' @return TBA
#' @references See citation("microbiome")
#' @author Douwe Molenaar. Maintainer: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
expandElement <- function (elm) {
if (is.list(elm)) {
if (is.null(elm$field)) {
stop("Missing 'field' field in condition")
}
if (!is.vector(elm$field)) {
stop("Field 'field' must be vector with length 1")
}
if (length(elm$field)>1) {
stop("Field 'field' must be vector with length 1")
}
if (!is.vector(elm$value)) {
stop("'value' field must be vector")
}
if (is.null(elm$value)) {
stop("Missing 'value' field in condition")
}
if (is.character(elm$value)) {
elm$value <- paste("'",elm$value,"'",sep='')
}
return(paste("(",paste(elm$field,elm$value,sep='=',collapse=' OR '),")",sep=''))
}
else {
stop("Argument 'condition' must be a list of lists with 'field' and 'value' pairs")
}
}
#' Expands a list of "field", "value" pair lists into an SQL condition
#'
#' Arguments:
#'@param condition condition
#'
#' Returns:
#' @return TBA
#'
#' @references See citation("microbiome")
#' @author Douwe Molenaar. Maintainer: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @examples # TBA
#' @keywords utilities
expandCondition <- function (condition) {
if (is.null(condition)) {
return(NULL)
} else {
if (is.list(condition)) {
return(paste(" WHERE",paste(lapply(condition, expandElement),collapse=' AND ')))
} else {
stop("Argument 'condition' must be a list of lists with 'field' and 'value' pairs")
}
}
}
#' Description: populate radiobuttons
#'
#' Arguments:
#' @param tt TBA
#' @param title TBA
#' @param var.names TBA
#' @param var.values TBA
#' @param var.init TBA
#'
#' Returns:
#' @return A list.
#'
#' @references
#' See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
populate.radiobuttons <- function(tt, title, var.names, var.values, var.init) {
title.font <- tkfont.create(weight = "bold", size = 10)
frm <- tkframe(tt, relief = "groove", borderwidth = 2)
label.widget <- tklabel(frm, text = title,font = title.font)
tkpack(label.widget, side = "top")
for (i in 1:length(var.values)){
button.widget <- tkradiobutton(frm, text = var.names[i],
variable = var.init, value = var.values[i])
tkpack(button.widget,side = "top")
}
tkpack(frm,side = "top")
return(list(frame = frm, var = var.init))
}
#' Description: Select projects to analyze
#'
#' Arguments:
#' @param con valid MySQL connection
#' @param multi enable selection of multiple options
#' @param condition TBA
#' Returns:
#' @return vector of project names
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
choose.projects <- function (con, multi = TRUE, condition = NULL) {
prjs <- fetch.projects(con, condition = condition)
projects <- select.list(sort(prjs$projectName), multiple = multi, title = "Select studies:")
prjs <- fetch.projects(con, condition = list(list(field = 'projectName', value = projects)))
return(prjs)
}
#' choose.samples
#'
#' @param con MySQL connection
#' @param multi multiple selections allowed
#' @param title title
#' @param condition TBA
#'
#' @return sample vector
#' @export
#' @references See citation("microbiome")
#' @author Douwe Molenaar. Maintainer: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @examples # TBA
#' @keywords utilities
choose.samples <- function (con, multi = TRUE, title = 'Select samples:', condition = NULL) {
smps <- fetch.samples(con, condition = condition)
samples <- select.list(smps$sampleID, multiple = multi, title = title)
smps <- fetch.samples(con, condition = list(list(field = 'sampleID', value = samples)))
return(smps)
}
#' Description: Calculate d.oligo2
#'
#' Arguments:
#' @param featuretab featuretab
#' @param d.scaled d.scaled
#' @param oligo.ids oligo.ids
#' Returns:
#' @return d.oligo2
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
get.doligo2 <- function (featuretab, d.scaled, oligo.ids) {
d.oSplit <- split(cbind(featuretab[,1:3],d.scaled), featuretab$oligoID)
d.oSplit.pruned <- d.oSplit[oligo.ids]
d.oligo <- t(sapply(d.oSplit, function(x) apply((x[,4:dim(x)[2]]), 2, mean, na.rm=T))) # hybs separate
sampleID <- get.sampleid(d.oligo)
d.oligo2 <- t(sapply(d.oSplit,
function(x){
temp <- apply((x[,4:dim(x)[2]]), 2, mean, na.rm=T)
temp2 <- sapply(split(temp, sampleID), mean, na.rm=T)
return(temp2)
}
))# hybs averaged
d.oligo2
}
#' Description: Pick sampleIDs from d.oligo column names
#'
#' Arguments:
#' @param d.oligo d.oligo matrix
#' Returns:
#' @return sampleID vector
#'
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
get.sampleid <- function (d.oligo) {
sampleID <- sapply(colnames(d.oligo), function(z) { # edit by S.Tims
# allows samples with "." in the samplename
s <- strsplit(z, split="\\.")[[1]]
if(length(s) > 4){
s <- head(s,-3)
s <- paste(s, collapse = ".")
} else { s <- s[1] }
return(s)})
sampleID
}
#' Description: Minmax scaling.
#'
#' Arguments:
#' @param dat data matrix in original absolute scale
#' @param quantile.points quantiles for minmax
#' @param minmax.points pre-calculated quantiles for minmax in log10 scale; overrides quantile.points argument
#' @param robust Select minmax version.
#'
#' Returns:
#' @return normalized data matrix in absolute scale
#'
#' @note With robust = FALSE, the standard minmax is carried out. This
#' shifts and scales each array such that their min and max values are
#' identical across arrays. The robust = TRUE will perform the scaling
#' such that the upper quantiles and minimum values of the data match
#' (instead of maximum values).
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
scaling.minmax <- function (dat, quantile.points = NULL, minmax.points = NULL, robust = FALSE) {
if ( is.null(minmax.points) ) {
maxabs <- mean(apply(dat, 2, quantile, max(quantile.points), na.rm = TRUE))
minabs <- mean(apply(dat, 2, quantile, min(quantile.points), na.rm = TRUE))
} else {
maxabs <- max(minmax.points)
minabs <- min(minmax.points)
}
if (!robust) {
r <- apply(dat, 2, function (x) {
x = (((x - min(x, na.rm = T))/(max(x, na.rm = T) - min(x, na.rm = T)))*(maxabs - minabs)) + minabs;
return(x)})
} else {
r <- apply(dat, 2, function (x) {
# Shift data to start from zero
xz <- x - min(x, na.rm = T);
# Check the quantile points
maxq <- quantile(xz, max(quantile.points), na.rm = TRUE);
# Determine the scaling factor such that the max quantiles will
# match between arrays
k <- maxabs/maxq;
# Scale the data to match max quantiles
xs <- k * xz + min(dat, na.rm = TRUE);
xs})
}
r
}
#' Description: Write log file
#'
#' Arguments:
#' @param naHybs hybridisation that were removed due to NAs
#' @param params parameters
#'
#' Returns:
#' @return List of scaling methods
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
WriteLog <- function (naHybs, params) {
scriptVersion1 <- sessionInfo()$otherPkgs$microbiome$Version # microbiome package number
scriptVersion2 <- sessionInfo()$otherPkgs$HITChipDB$Version # microbiome package number
## Write log of parameters used in profiling in to the file
tmpTime <- strsplit(as.character(Sys.time()), split=" ")[[1]]
tmpDate <- tmpTime[1]
tmpTime <- paste(strsplit(tmpTime[2], split=":")[[1]], collapse=".")
profTime <- paste(tmpDate,tmpTime,sep="_")
logfilename <- paste(params$wdir,"/",profTime,"_profiling_log.txt", sep="")
cat("Log of profiling script\n", "\n", file=logfilename)
cat("profiling date: ",profTime, "\n", file=logfilename, append=T)
cat("script version microbiome: ", scriptVersion1, "\n",file=logfilename, append=T)
cat("script version HITChipDB: ", scriptVersion2, "\n",file=logfilename, append=T)
cat("data retrieved from db: ",params$useDB, "\n", file=logfilename, append=T)
cat("project IDs: ",params$prj$projectID, "\n", file=logfilename, append=T)
cat("sample IDs: ",params$samples$sampleID, "\n", file=logfilename, append=T)
cat("excluded oligos: ",params$rm.phylotypes$oligos, "\n", file=logfilename, append=T)
cat("excluded species: ",params$rm.phylotypes[["species"]], "\n", file=logfilename, append=T)
cat("excluded level 1: ",params$rm.phylotypes[["L1"]], "\n", file=logfilename, append=T)
cat("excluded level 2: ",params$rm.phylotypes[["L2"]], "\n", file=logfilename, append=T)
cat("excluded hybridisations: ",naHybs, "\n", file=logfilename, append=T)
cat("remove non-specific oligos: ",params$remove.nonspecific.oligos, "\n",file=logfilename, append=T)
cat("phylogeny: ",params$phylogeny, "\n", file=logfilename, append=T)
cat("scaling: ",params$scal, "\n", file=logfilename, append=T)
cat("data in directory: ",params$wdir, "\n",file=logfilename, append=T)
## Save profiling parameters
paramfilename <- paste(params$wdir,"/",profTime,"_profiling_params.Rdata", sep="")
save(logfilename, profTime, scriptVersion1, scriptVersion2, params, naHybs, file = paramfilename)
list(log.file = logfilename, parameter.file = paramfilename)
}
#' Description: Writed data into the output directory
#' @param finaldata preprocessed data matrices in absolute scale (from the chipdata function)
#' @param output.dir output directory
#' @param tax.table tax.table used in summarization
#' @param tax.table.full tax.table.full unfiltered phylogenyinfo
#' @param meta sample metadata samples x features
#' @param verbose verbose
#' @return Preprocessed data in absolute scale, tax.table, and parameters
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
WriteChipData <- function (finaldata, output.dir, tax.table, tax.table.full, meta, verbose = TRUE) {
## Write oligoprofile in original (non-log) domain
fname <- paste(output.dir, "/oligoprofile.tab", sep = "")
mydat <- finaldata[["oligo"]]
WriteMatrix(cbind(rownames(mydat), mydat), fname, verbose)
## Write the other levels in log domain
for (level in setdiff(names(finaldata), "oligo")) {
for (method in names(finaldata[[level]])) {
fname <- paste(output.dir, "/", level, "-", method, ".tab", sep = "")
mydat <- finaldata[[level]][[method]]
WriteMatrix(cbind(rownames(mydat), mydat), fname, verbose)
}
}
# Write metadata template
fname <- paste(output.dir, "/meta.tab", sep = "")
WriteMatrix(meta, fname, verbose)
# Write tax.table that shall be used for probe summarization
fname <- paste(output.dir, "/taxonomy.tab", sep = "")
WriteMatrix(tax.table, fname, verbose)
# Write filtered tax.table
fname <- paste(output.dir, "/taxonomy.filtered.tab", sep = "")
WriteMatrix(tax.table, fname, verbose)
# Write unfiltered tax.table
fname <- paste(output.dir, "/taxonomy.full.tab", sep = "")
WriteMatrix(tax.table.full, fname, verbose)
# Write metadata
fname <- paste(output.dir, "/meta.tab", sep = "")
WriteMatrix(meta, fname, verbose)
# Return path to the output directory
output.dir
}
#' Description: Format string vector to mysql query format
#'
#' Arguments:
#' @param s string vector
#'
#' Returns:
#' @return mysql query version
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
mysql.format <- function (s) {
paste("('",paste(as.character(s),collapse="','",sep="'"),"')",sep="")
}
#' Description: Calculate species summaries and possibly update d.oligo2
#'
#' Arguments:
#' @param d.oligo2 d.oligo2
#' @param bgc.method background correction method
#' Returns:
#' @return Background-corrected data matrix
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
oligo.bg.correction <- function (d.oligo2, bgc.method) {
if ( bgc.method == "6*sd bkg intensity" ){ bgth <- 6 }
d.oligo2 <- threshold.data(d.oligo2, bgth)
d.oligo2 <- apply(d.oligo2, c(1,2), function(x) max(0, x))
d.oligo2
}
#' Description: Between-arrays normalization
#'
#' Arguments:
#' @param dat data matrix in original absolute scale
#' @param method normalization method
#' @param bg.adjust background adjustment
#' @param minmax.quantiles quantiles for minmax
#' @param minmax.points minmax end points
#'
#'
#' Returns:
#' @return Normalized data matrix in absolute scale
#'
#' @export
#' @importFrom preprocessCore normalize.quantiles
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
ScaleProfile <- function (dat, method = 'minmax', bg.adjust = NULL, minmax.quantiles = c(0.005, 0.995), minmax.points = NULL) {
# d.scaled <- ScaleProfile(fdat.orig, params$normalization, bg.adjust = NULL, minmax.points = params$minmax.points)
# dat <- fdat.orig; method = params$normalization; bg.adjust = NULL; minmax.quantiles = c(0.005, 0.995); minmax.points = NULL
message(paste("Normalizing with", method))
## Table dat is a copy of featuretab containing
## logarithms of the values in
## featuretab
if (method=='minmax') {
r <- scaling.minmax(dat, quantile.points = minmax.quantiles, minmax.points = minmax.points, robust = FALSE)
} else if (method=='minmax.robust') {
r <- scaling.minmax(dat, quantile.points = minmax.quantiles, minmax.points = minmax.points, robust = TRUE)
} else if (method=='quantile') {
dn <- dimnames(dat)
r <- normalize.quantiles(dat)
dimnames(r) <- dn
} else if (method=='normExpQuant') {
## Impute NA's with sample medians
na.inds <- which(is.na(dat), arr.ind=T)
r <- apply(dat, 2, function(x){x[is.na(x)] <- median(x, na.rm=T); return(x)})
dn <- dimnames(dat)
rc <- apply(10^(r), 2, bg.adjust)
r <- normalize.quantiles(log10(rc+1))
dimnames(r) <- dn
r[na.inds] <- NA
} else {
stop("No between-array normalization recognized!!")
}
return(r)
}
#' Description: determine detection threshold for the data
#'
#' Arguments:
#' @param dat data
#' @param sd.times standard deviation threshold
#'
#' Returns:
#' @return thresholded data matrix
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
threshold.data <- function(dat, sd.times = 6){
thr <- apply(dat, 2, function(x){
DD <- density(as.numeric(x), adjust = 1.2, na.rm = T);
noise_mode <- DD$x[which(DD$y==max(DD$y))[1]];
noise_sd <- sd(x[x < noise_mode], na.rm = T);
low.thresh <- noise_mode + sd.times*noise_sd;
low.thresh
})
# Subtract background from signal intensities in each sample
data.mat <- t(apply(dat, 1, function(Tr){ Tr-thr }))
return(data.mat)
}
#' Description: List oligos associated with removed phylotypes from different levels
#'
#' Arguments:
#' @param rm.phylotypes rm.phylotypes
#' @param tax.table tax.table
#'
#' Returns:
#' @return probe name vector
#'
#' @export
#' @references See citation("microbiome")
#' @author Contact: Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
sync.rm.phylotypes <- function (rm.phylotypes, tax.table) {
# If remove L0 is not NULL, then
# add L1 groups under this group to removal list
if (!is.null(rm.phylotypes$L0)) {
rm.phylotypes$L1 <- c(rm.phylotypes$L1, tax.table[which(tax.table[["L0"]] == rm.phylotypes$L0), "L1"])
rm.phylotypes$L1 <- unique(rm.phylotypes$L1)
}
# If remove L1 is not NULL, then add L2 groups under this group to removal list
if (!is.null(rm.phylotypes$L1)) {
rm.phylotypes$L2 <- c(rm.phylotypes$L2, tax.table[which(tax.table[["L1"]] == rm.phylotypes$L0), "L2"])
rm.phylotypes$L2 <- unique(rm.phylotypes$L2)
}
# If remove L2 is not NULL, then
# add species groups under this group to removal list
if (!is.null(rm.phylotypes$L2)) {
rm.phylotypes$species <- c(rm.phylotypes$species, tax.table[which(tax.table[["L2"]] == rm.phylotypes$L0), "species"])
rm.phylotypes$species <- unique(rm.phylotypes$species)
}
rm.phylotypes
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.