#' Synaptic Smoothing pipeline
#'
#' A highly specialized pipeline for processing synaptic vesicle release assay results.
#' It takes in a directory containing an excel file (.xlsx) in a very specific TPP format (Tracy's Proprietary Processing)
#' and returns a set of processed csvs for each assay along with an excel file containing a
#' summary of the results and some analytics. Each sheet from the input excel file should have 5 header rows from row 1 they
#' must be 1) F0 (average starting fluorescence), 2) Fstim (average stim fluorescence), 3) Fun (max unquenched fluro), 4) FC_Fun-F0 (fold change of
#' unquenched max relative to baseline(F0)), and 5) deltaF (increase in fluor in stimulated vs. F0). First column (under metrics labels)
#' must be a space (extra column) for time values. The script will remove this column and if it contains data rather than
#' time stamps then the data will be lost. The worksheets used to generate the csv's will often have extra columns at the end of the sheet.
#' They start at row 6 (after the header metrics) and report average and error fluor across all measurements for a given time. There can be
#' 2-4 of these or none. It doesn't matter as any column after the last value in the row 1 metrics will be removed. If this function
#' returns a Java out of memory heap space error then try restarting R and then run the following line before loading typGumbo
#' options(java.parameters = c("-XX:+UseConcMarkSweepGC", "-Xmx8192m"))
#'
#'
#'
#' @param in.dir directory where all input .csv files are located.
#' i.e. in.dir = "LViN15", or in.dir = "LViN15/results". Can also enter path directly
#' to excel file i.e. in.dir = "LViN15/LViN15.xlsx"
#' @param csv Logical. Indicate whether you want to write .csv files to output directory
#' @param xlsx Logical. Indicate whether you want to write .xlsx file with separate sheets
#' for each output matrix to output directory. NOTE: this uses the xlsx::write.xlsx function
#' which takes a lot of time and memory and should not be used for very large data sets (larger than
#' 1000 observations)
#' @param filter logical. indicating whether data should be filtered to exclude measurements with pctFC
#' less than 2. Even with this filter off the data will be filtered to remove measurements with pctDeltaF
#' greater than 0 (i.e. requires positive fluorescence change)
#'
#' @importFrom readxl excel_sheets
#' @importFrom readxl read_xlsx
#' @importFrom xlsx write.xlsx
#' @export
syn.smooth <- function(in.dir, csv = TRUE, xlsx = TRUE, filter = TRUE) {
## pull the full path names of all the .csv files in the in.dir
if (grepl(".xlsx", in.dir) | grepl(".xls", in.dir)) {
excelfile <- in.dir
in.dir <- dirname(in.dir)
} else {
if (grepl("/$", in.dir)) {
in.dir <- gsub("/$", "", in.dir)}
files <- dir(in.dir, full.names = TRUE)
excelfile <- files[grepl(pattern = ".xlsx", files)]
}
if (!(dir.exists(in.dir))) stop("
\nso ummm. here's the thing... \nthat directory that you specified in in.dir doesn't actually exist on your machine.
I know, right? I'm as shocked as you are right now.
maybe you put a leading slash in front of a relative path?\n
honestly I don't know what you did but I don't think that directory is there. \n
Try again?\n
With an actual existing directory path?")
sheets <- readxl::excel_sheets(excelfile)
trimMatMess <- function(x) {
chopRows <- !is.na(x[,2])
chopCols <- !is.na(x[1,])
return(x[chopRows[,1],chopCols[1,]])
}
for ( i in sheets ) {
temp <- readxl::read_xlsx(path = excelfile, sheet = i, col_names = FALSE)
colnames(temp) <- NULL
temp <- trimMatMess(temp)
write.csv(temp, file = paste0(in.dir,"/",i, ".csv"), row.names = FALSE)
}
csvs <- dir(in.dir, full.names = TRUE, recursive = TRUE)
csvs <- csvs[grepl(pattern = ".csv", csvs)]
# pull filenames for run files (csv's)
runfiles <- dir(in.dir)
runfiles <- runfiles[grepl(pattern = ".csv", runfiles)]
# make directory for output
if (filter) {
out.dir <- paste0(in.dir, "processed")
} else {
out.dir <- paste0(in.dir, "processedNoFCfilter")
}
dir.create(out.dir)
globalresults <- list() ##added in V0.3
Ms <- NULL
for (f in runfiles) {
results <- list()
fname <- sub("\\.csv", "", f)
#read and trim data from csv
dir.create(paste(out.dir, "/",fname, sep = "")) #create sub-directory for individual csv
full <- utils::read.csv(paste(in.dir,"/",f, sep = ""), header = FALSE) #read current csv
#define measurement matrix
meas <- full[c(6:nrow(full)),c(2:ncol(full))] #remove metrics rows (top 5) and time stamps (first column)
colnames(meas) <- meas[1,]
meas <- meas[-1,]
row.names(meas) <- c(1:nrow(meas))
meas <- apply(meas, 2, as.numeric) #convert from character to numeric
#define metrics matrix
mets <- full[c(1:5),]
mets <- base::t(mets)
colnames(mets) <- mets[1,]
mets <- as.matrix(mets[-1,])
mets <- apply(mets, 2, as.numeric)
row.names(mets) <- colnames(meas)
#all(colnames(meas) == row.names(mets)) #should return true (for testing)
## Fold Change unquenched/Baseline (FC_Fun_F0) filters. Can be toggled using filter = TRUE/FALSE argument
FC_Over2 <- mets[,4] > 2
FC_Over3 <- mets[,4] > 3 ##currently unused
total <- nrow(mets)
pct.FC_Fun_F0.greater.than.2 <- sum(FC_Over2)/total #output
pct.FC_Fun_F0.greater.than.3 <- sum(FC_Over3)/total #output
# Change in absolute fluorescence stim - Baseline (DeltaF) filters
# this filter is in place even if filter = FALSE is set
DF_Over0 <- mets[,5] > 0
pct.DeltaF.greater.than.0 <- sum(DF_Over0)/total #output
info <- rbind(pct.FC_Fun_F0.greater.than.2, pct.FC_Fun_F0.greater.than.3,pct.DeltaF.greater.than.0)
measure <- row.names(info)
info <- cbind.data.frame(measure, info)
row.names(info) <- NULL
if (filter) {
chop <- cbind(FC_Over2,DF_Over0)
chop <- apply(chop, 1, sum) == 2
cleanmeas <- meas[,chop] #output
cleanmets <- mets[row.names(mets) %in% colnames(cleanmeas),]
} else {
cleanmeas <- meas[,DF_Over0] #output
cleanmets <- mets[row.names(mets) %in% colnames(cleanmeas),]
}
# calculate measurements - average baseline (F0) for point of interest
DelF <- NULL
for (i in c(1:(ncol(cleanmeas)))) {
v <- cleanmeas[,i] - cleanmets[i,1]
DelF <- cbind(DelF,v)
}
colnames(DelF) <- colnames(cleanmeas)
# calculate percentage unquenched increase over F0
pctFun <- NULL
for (i in c(1:(ncol(DelF)))) {
v <- DelF[,i]/(cleanmets[i,3]-cleanmets[i,1])
pctFun <- cbind(pctFun,v)
}
colnames(pctFun) <- colnames(DelF)
mean <- apply(pctFun, 1, mean)
n <- ncol(pctFun)
sem <- apply(pctFun, 1, function(x) stats::sd(x) / sqrt(n))
pctFun <- cbind(pctFun, mean, sem, n)
Ms <- c(Ms, nrow(pctFun))
results[["data.raw"]] <- meas
results[["metrics.raw"]] <- mets
results[["data.cleaned"]] <- cleanmeas
results[["metrics.cleaned"]] <- cleanmets
results[["DelF"]] <- DelF
results[["pctFun"]] <- pctFun
results[["info"]] <- info
globalresults[[fname]] <- pctFun
if (csv) {
utils::write.csv(meas, file = paste(out.dir,"/",fname,"/","data.raw.csv", sep = ""), row.names = FALSE)
utils::write.csv(mets, file = paste(out.dir,"/",fname,"/","metrics.raw.csv", sep = ""), row.names = TRUE)
utils::write.csv(cleanmeas, file = paste(out.dir,"/",fname,"/","data.cleaned.csv", sep = ""), row.names = FALSE)
utils::write.csv(cleanmets, file = paste(out.dir,"/",fname,"/","metrics.cleaned.csv", sep = ""), row.names = TRUE)
utils::write.csv(DelF, file = paste(out.dir,"/",fname,"/","deltaF.csv", sep = ""), row.names = FALSE)
utils::write.csv(pctFun, file = paste(out.dir,"/",fname,"/","pctFun.csv", sep = ""), row.names = FALSE)
utils::write.csv(info, file = paste(out.dir,"/",fname,"/","info.csv", sep = ""), row.names = FALSE)
}
if (xlsx) {
xlsx::write.xlsx(meas, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "dataRaw", row.names = FALSE, append = FALSE)
xlsx::write.xlsx(mets, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "metricsRaw", row.names = TRUE, append = TRUE)
xlsx::write.xlsx(cleanmeas, file = paste( out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "dataClean", row.names = FALSE, append = TRUE)
xlsx::write.xlsx(cleanmets, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "metricsClean", row.names = TRUE, append = TRUE)
xlsx::write.xlsx(DelF, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "DeltaF", row.names = FALSE, append = TRUE)
xlsx::write.xlsx(pctFun, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "pctFun", row.names = FALSE, append = TRUE)
xlsx::write.xlsx(info, file = paste(out.dir,"/",fname,"/",fname,"summary.xlsx", sep = ""), sheetName = "info", row.names = FALSE, append = TRUE)
}
}
fname <- sub("\\.csv", "", runfiles[1])
xlsx::write.xlsx((globalresults[[fname]]), file = paste(out.dir,"/", "PCT_FUN_Summary.xlsx", sep = ""), sheetName = fname, row.names = FALSE, append = FALSE)
for (f in 2:length(runfiles)) {
fname <- sub("\\.csv", "", runfiles[f])
xlsx::write.xlsx((globalresults[[fname]]), file = paste(out.dir,"/", "PCT_FUN_Summary.xlsx", sep = ""), sheetName = fname, row.names = FALSE, append = TRUE)
}
## Generate Prism friendly output
Ms <- max(Ms) + 2
forPrism <- data.frame(matrix(ncol = 3, nrow = Ms))
excelfile2 <- paste0(out.dir, "/PCT_FUN_Summary.xlsx")
sheets2 <- readxl::excel_sheets(path = excelfile2)
for ( i in sheets2 ) {
temp <- readxl::read_xlsx(path = paste(out.dir,"/", "PCT_FUN_Summary.xlsx", sep = ""), sheet = i, col_names = FALSE)
temp <- temp[,c((ncol(temp)-2):(ncol(temp)))]
n <- rep(i, 3)
temp <- rbind(n, temp)
#normalize rows if unequal row numbers
while (nrow(temp) < Ms) {
temp[nrow(temp) + 1,] <- NA
}
if (i == sheets2[1]) {
forPrism[,c(1:3)] <- temp
} else {
forPrism <- cbind.data.frame(forPrism, temp)
}
}
utils::write.table(forPrism, file = paste0(out.dir, "/forPrism.csv"), sep = ",",row.names = FALSE, col.names = FALSE)
## Convert Prism labels to single character string separated by commas
VecRow <- function(x) {
out <- NULL
for(i in 1:(length(x)-1)) {
out <- paste0(out, x[i], ",")
}
out <- paste0(out,x[length(x)])
return(out)
}
labelsPzm <- VecRow(sheets2)
utils::write.table(labelsPzm, file = paste0(out.dir, "/labelsForPrism.csv"), quote = FALSE, sep = "", row.names = FALSE, col.names = FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.