# #Parse names for ECM, Ligand and Concentration values
# #
# #\code{parseContents} returns a datatable with new columns for the ECM, ligand
# #and concentration at each spot.
# #
# #@param dt a datatable with a Name column to be parsed
# #@return The input datatable with new columns for the ECM, Ligand and
# # Concentration values.
# #@section Usage: This function assumes a Name column exists and that its
# # format is Concentration_ECM_Ligand or ligand_ECM or Col I_AF 488 or ECM. If Name only
# # contains an ECM, a NULL string is assigned to the Ligand and a
# # concentration value of 0 is assigned to Concentration. If there are 2 words
# # separated by an underscore, the first is the Ligand and the second is the
# # ECM. The concentration is assigned a value of 0. If there are 3 words,
# # the first is assigned to the concentration.
# #
# # In the special case of the AF 488 fiducial, the Ligand is assigned the value
# # AF 488.
# #
# parseContents <- function(dt){
# contentList <- lapply(strsplit(dt$Name,"_"),function(n){
# if(length(n)==1){ECM<-n[[1]]
# Ligand<-""
# Concentration<-0
# return(list(Ligand = Ligand, ECM = ECM,Concentration=Concentration))
#
# } else if(length(n)==2) {
# Ligand<-n[[1]]
# ECM<-n[[2]]
# if(Ligand=="Col I"&ECM=="AF 488") {
# Ligand<-"AF 488"
# ECM<-"Col I"
# }
# Concentration <- 0
# return(list(Ligand = Ligand, ECM = ECM,Concentration = Concentration))
#
# } else{
# Concentration <- as.integer(sub("X","",x = n[[1]]))
# Ligand <- n[[2]]
# ECM <- n[[3]]
# return(list(Ligand = Ligand, ECM = ECM,Concentration = Concentration))
# }
# }
# )
# contents <- data.table::rbindlist(contentList)
# contents$ConcentrationRank <- match(contents$Concentration,sort(unique(contents$Concentration)))
# dtA <- cbind(dt,contents)
# }
#
# #Parse BufferWinners names for ECM, Ligand and Concentration values
# #
# #\code{parseBufferWinnerContents} returns a datatable with new columns based on the Name column.
# #
# #@param dt a datatable with a Name column to be parsed
# #@return The input datatable with new columns of Condition1, Condition2, Condition3 and Condition4.
# #@section Usage: This function assumes a Name column exists and uses an underscore as field separators.
# #
# parseBufferWinnerContents <- function(dt) {
# #parse the content names in the gal file for Ligand, ECM and concentrations
# contentList <- lapply(strsplit(dt$Name,"_"),function(n) {
# return(list(Condition1 = n[[1]], Condition2 = n[[2]],Condition3 = n[[3]],Condition4 = n[[4]]))
# }
# )
# contents <- data.table::rbindlist(contentList)
# dtA <- cbind(dt,contents)
# return(dtA)
# }
#
# #Parse 4wellvalidationContents for ECM, Glycerol, Triton, EDTA and Buffer values
# #
# #\code{parse4wellvalidationContents} returns a datatable with new columns based on the Name column.
# #
# #@param dt a datatable with a Name column to be parsed
# #@return The input datatable with new columns of ECM, Glycerol, Triton, EDTA and Buffer.
# #@section Usage: This functions assumes a Name column exists and uses an underscore as field separators.
# #
# parse4wellvalidationContents <- function(dt){
# #parse the content names in the gal file for Ligand, ECM and concentrations
# contentList <- lapply(strsplit(dt$Name,"_"),function(n) {
# return(list(ECM = n[[1]], Gycerol = n[[2]],Triton = n[[3]],EDTA = n[[4]],Buffer = n[[5]]))
# }
# )
# contents <- data.table::rbindlist(contentList)
# dtA <- cbind(dt,contents)
# return(dtA)
# }
#Parse simulated data for ECM, Ligand and ConcentrationRank values
#
#\code{parseSimulatedContents} returns a datatable with new columns based on
#the Name column.
#
#@param dt a datatable with a Name column to be parsed
#@return The input datatable with new columns of ECM, Ligand and ConcentrationRank.
#@section Usage: This function assumes a Name column exists and uses an
# underscore as the field separators. The format must be either
# ECM or ECM_Ligand_ConcentrationRank. ConcentrationRank will be coereced into
# an integer by removing any non-numeric values.
#
parseSimulatedContents <- function(dt){
#parse the content names in the gal file for Ligand, ECM and concentrations
contentList <- lapply(strsplit(dt$Name,"_"),function(n) {
if (length(n)==2) stop("Invalid format in gal Name field. There should be 1 ECM or 1 ECM with 1 Ligand and a concentration value.")
if(length(n)==1){ECM<-n[[1]]
Ligand<-""
ConcentrationRank<-0
} else {
ECM <- n[[1]]
Ligand <- n[[2]]
ConcentrationRank <- as.integer(gsub("[^[:digit:]]*","",n[[3]]))
}
return(list(Ligand = Ligand, ECM = ECM,ConcentrationRank = ConcentrationRank))
}
)
contents <- data.table::rbindlist(contentList)
dtA <- cbind(dt,contents)
}
# #' Coerce the All ECM contents to standard format
# #'
# #' \code{coerceAllECM}
# #'
# #'@param df A dataframe read from a gal file
# #'@return A dataframe with the Name column reformatted to match the Controlled Vocabulary standard.
# #'
# #'@export
# coerceAllECM <- function(df){
# df$Name <- gsub("__","_",df$Name)
# df$Name <- gsub("PBSCOL I","PBS_none_COL1_UNKNOWN",df$Name)
# df$Name <- gsub("^PBS$","PBS_none",df$Name)
# df$Name <- gsub("COL I$","COL1_UNKNOWN",df$Name)
# df$Name <- gsub("^-$","blank_none",df$Name)
#
# contentMatrix <- limma::strsplit2(x = df$Name,split="_")
#
# df$Name <- apply(contentMatrix,1,function(x){
# pUIDs <- paste0(x[seq(1,length(x),2)],"_",x[seq(2,length(x),2)])
# pUIDs <- gsub("^_$","",pUIDs)
# pUID <- paste0(pUIDs,collapse="-")
# pUID <- gsub("[-]*$","",pUID)
# }
# )
# return(df)
# }
#
# #Parse controlled vocabulary data for ECM, Ligand and ConcentrationRank values
# #
# #\code{parseCVContents} returns a datatable with new columns based on the Name
# #column.
# #
# #@param dt A dataframe or datatable with a Name column to be parsed.
# #@return The input datatable with a new column of the ECM names pasted
# # togethers with an underscore.
# #@section Usage: This functions assumes a Name column exists and uses a dash
# # symbol - as a field separator between each protein and an underscore between
# # a protein's name and its Uniprot ID.
# #
# parseCVContents <- function(dt){
# dt$ECM <- lapply(dt$Name,function(x){
# tmp<-strsplit(unlist(strsplit(x,split = "[-]")),split="_")
# ECMNames<-lapply(tmp, function(x){
# x[1]
# })
# paste(ECMNames,collapse="_")
# })
# }
#
#
# # Extract Intensity Endpoints
# #
# extractIntsEndpts<-function(DT){
# #Use the metadata in the columns Endpoint.xxx to extract the columns of
# #mean intensity data and endpoint name
# epColNames<-grep("(Endpoint)",colnames(DT),value = TRUE)
# intColNames<-paste0("Mean.Intensity.Alexa.",sub("Endpoint.","",epColNames))
# selectColNames<-c(epColNames,intColNames)
# ieDT<-DT[,selectColNames,with=FALSE]
# return(ieDT)
# }
#
# # Assign endpoint names
# #
# assignEndPoints<-function(iedt,dt){
# Endpoints<-unique(unlist(iedt[,grep("(Endpoint)",colnames(dt),value = TRUE),with=FALSE]))
# #TODO Generalize this code to determine how many endpoints are in the staining set
# #This code now assumes there are 3 stained endpoints and ignores DAPI
# ei<-do.call("cbind",lapply(Endpoints,function(ep,ieDT){
# #create a column of intensities for each endpoint based on the Endpoint column
# endpointIntsPtrs<-which(ieDT[,1:3,with=FALSE]==ep,arr.ind = TRUE)
# endpointIntsPtrs<-endpointIntsPtrs[order(endpointIntsPtrs[,1]),]
# iValuesM<-as.matrix(ieDT[,4:6,with=FALSE])
# epValues<-iValuesM[endpointIntsPtrs]
# },ieDT=iedt))
# colnames(ei)<-Endpoints
# DT<-cbind(dt,ei)
# }
# # Summarize the cell level data to the well level
# #
# wellLevelData<-function(cd){
# #browser()
# #Create a datatable of the columns to be summariazed
# intNames<-grep(pattern="(Intensity|Area|Well$|WellCellCount)",x=names(cd),value=TRUE)
# #Get the metadata column names which are unconstrained
# if("Spot" %in% names(cd)){
# mdNames<-grep(pattern="(Intensity|Area|WellCellCount|Object.ID|Parent.Object.ID..MO|Elongation.Factor|Circularity.Factor|Perimeter|Max.Feret.Diameter|Center.X|X|Center.Y|Y|Position|WellIndex|Spot|Grid|Column|Row|ID|Name|ArrayRow|ArrayColumn)",x=names(cd),value=TRUE,invert=TRUE)
# } else { mdNames<-grep(pattern="(Intensity|Area|WellCellCount|Object.ID|Parent.Object.ID..MO|Elongation.Factor|Circularity.Factor|Perimeter|Max.Feret.Diameter|Center.X|X|Center.Y|Y|Position|WellIndex)",x=names(cd),value=TRUE,invert=TRUE) }
# keep<-cd[,intNames,with=FALSE]
# keepMd<-cd[,mdNames,with=FALSE]
# #Take the mean of each column stratified by well
# wd<-keep[,lapply(.SD,mean),by=Well]
# #Get the unique metadata value for each well
# #The metadata of a spotted well is not unique
# md<-keepMd[,lapply(.SD,unique),by=Well]
# data.table::setkey(wd,Well)
# data.table::setkey(md,Well)
# all<-wd[md]
# return(all)
# }
# Add the array row, column and index to a GAL file
#
addArrayPositionNoRotate<-function(df,gridsPerRow=4){
#Handle dataframes that have the name block instead of grid
blockToGrid<-FALSE
if("Block" %in% colnames(df))
{
blockToGrid<-TRUE
data.table::setnames(df,"Block","Grid")
}
#Return the dataframe in the correct space
#These values are in the printer space
rowsPerGrid<-max(df$Row)
colsPerGrid<-max(df$Column)
#Assign the grid row. These will run from 1:12 when using a 4x12 print head
df$arrayGridRow<-ceiling(df$Grid/gridsPerRow)
#Assign the ArrayRow which is in the imaging space
df$ArrayRow<-(df$arrayGridRow-1)*rowsPerGrid+df$Row
#Assign the ArrayColumn in imaging space (No rotation)
df$ArrayColumn<-((df$Grid-1)%%(gridsPerRow))*colsPerGrid+df$Column
#Order the array by ArrayRow then ArrayColumn
df<-df[order(df$ArrayRow,df$ArrayColumn),]
#Remove the arrayGridRow column used for calculations
df<-subset(df,select=-c(arrayGridRow))
#Assign a spot number in sequential order by row then column
df$Spot<-1:(max(df$ArrayColumn)*max(df$ArrayRow))
#Handle dataframes that have the name block instead of grid
if(blockToGrid) data.table::setnames(df,"Grid","Block")
return(df)
}
#' Wrapper function to log intensity values in a data.table
#' @export
logIntensities <- function(dt){
intensityNames <- grep("Intensity",colnames(dt), value=TRUE, ignore.case = TRUE)
dtLog <- dt[,lapply(.SD,log2),.SDcols=intensityNames]
setnames(dtLog,colnames(dtLog),paste0(colnames(dtLog),"Log2"))
return(cbind(dt,dtLog))
}
#' Utility function that returns log2 with lower non-infinite bound
#' @param x Vector to be log2 transformed
#' @return log2 values of x
#' @export
boundedLog2 <- function(x){
#use the smallest non zero value as the minimum
xMin <- min(x, na.rm=TRUE)
if (xMin==0) xMin <- unique(x[order(x)])[2]
x[x==0]<- xMin
xLog2 <- log2(x)
return(xLog2)
}
#' Utility function that returns logit with lower and upper non-infinite bound
#' @param x Vector to be logit transformed
#' @return logit values of x
#' #' @export
boundedLogit <- function(x){
xMin <- min(x, na.rm=TRUE)
if (xMin==0) xMin <- unique(x[order(x)])[2]
if(is.na(xMin)) xMin<-.01
x[x==0]<- xMin/2
xMax <- max(x, na.rm=TRUE)
if (xMax==1) xMax <- unique(x[order(x)])[length(unique(x))-1]
if(length(xMax)==0) xMax<-.99
x[x==1]<- (xMax+1)/2
xLogit <- log2(x/(1-x))
return(xLogit)
}
#' Replace the long names for hyaluronic acid with abbreviations
#'
#' @param x a chanracter vector that may contain names to be abbreviated
#' @return The same character vector with abbrevaitaions as needed. hyaluronic_acid_greater_than_500kDa becomes
#' HA>500kDa and hyaluronic_acid_less_than_500kDa becomes HA<500kDa.
#' @export
compressHA <- function(x){
x <- gsub("(hyaluronic_acid_greater_than_500kDa)","HA>500kDa",x)
x <- gsub("(hyaluronic_acid_less_than_500kDa)","HA<500kDa",x)
x <- gsub("hyaluronicacid","HA",x)
x <- gsub("lessthan","<",x)
x <- gsub("greaterthan",">",x)
return(x)
}
#' Create a median normalized loess model of an array
#'
#'@param data A dataframe with ArrayRow, ArrayColumn and signal intensity columns
#'@param value The column name of the signal intensity column
#'@param span The span value passed to loess. Values between 0 and 1 determine the
#'proportion of the population to be included in the loess neighborhood.
#'@return a vector of median normalized loess values of the signal
#'@export
loessModel <- function(data, value, span){
dataModel <- loess(as.formula(paste0(value," ~ ArrayRow+ArrayColumn")), data,span=span)
dataPredicted <- predict(dataModel)
predictedMedian <- median(dataPredicted, na.rm = TRUE)
dataNormed <- dataPredicted/predictedMedian
}
#' Returns a character vector of alphanumeric well names
#'
#' \code{wellAN} is a convenience function that provides alphanumeric names
#' for well plates or arrays.
#' @param nrRows The number of rows in the plate or array.
#' @param nrCols The number of columns in the plate or array.
#' @return A character vector of the well names
#' @export
wellAN<-function(nrRows,nrCols){
if(nrRows>702)stop("Too many rows to convert. Well alphanumerics are limited to 2 letters")
Well=unlist(c(lapply(1:nrRows, function(x){
paste0(paste0(LETTERS[(x-1)%/%26],LETTERS[(1+(x-1)%%26)]), lapply(1:nrCols,function(x){
if(x<10) x<-paste0("0",x)
return(as.character(x))
}))
})
))
return(Well)
}
#' Change the periods in the column names to spaces
#'
#'\code{cleanColumnNames} substitues a space for any single period in the colmun names of a datatable. This is useful at the end of an anlaysis to have human readable display names.
#' @param dt a datatable
#' @return the same datable with spaces in place of periods.
#' @export
cleanColumnNames<-function(dt){
data.table::setnames(dt,gsub("[.]"," ",make.names(colnames(dt))))
return(dt)
}
#' Get barcodes from the Synapse Plate Tracker files
#' @param studyName Character string of the study name
#' @return Barcodes of the plates in the study
#' @export
getBarcodes <- function(studyName, synId='syn8440875'){
library(synapseClient)
synapseLogin()
barcodes <- synGet(synId) %>%
synapseClient::getFileLocation() %>%
data.table::fread(check.names = TRUE) %>%
dplyr::filter(StudyName==str_to_lower(studyName)) %>%
dplyr::select(Barcode) %>%
stringr::str_split(",") %>%
unlist()
return(barcodes)
}
#' Get the spot level data for a study
#'
#' @param paths The paths to the spot level files
#' @return A datatable with the annotated data for all plates in the study. Any data for fiducials and
#' blank spots is filtered out.
#' @export
getSpotLevelData <- function(paths){
slDT <- lapply(paths, fread) %>% rbindlist()
slDT$BW <- paste(slDT$Barcode,slDT$Well,sep="_")
slDT <- slDT[!grepl("fiducial|Fiducial|gelatin|blank|air|PBS",slDT$ECMp),]
return(slDT)
}
#' Add in the barcodes the MEPs came from
#'
#' Add a barcode column that has one or more barcodes of the data's well plates
#' @param dt3 A spot level datatable with MEP_Drug and Barcode columns
#' @param dt4 A MEP level datatable with a MEP_Drug column
#' @return The input MEP level datatable with a Barcode column added
#' @export
addBarcodes <- function(dt3, dt4){
barcodesList <- lapply(unique(dt4$MEP_Drug), function(m){
Barcodes=paste(unique(dt3$Barcode[dt3$MEP_Drug==m]), collapse = ",")
})
bmdDT <- data.table(Barcode = unlist(barcodesList), MEP_Drug=unique(dt4$MEP_Drug))
dt4 <- merge(bmdDT,dt4,by="MEP_Drug")
return(dt4)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.