# Helper function to create level 3 and 4 data
#` Calculate standard error of the mean
#'
#' Omit na values if present
#' @param x a numeric vector
#' @return the standard error of the mean for x as a numeric value
#'@export
se <- function(x) {
if(sum(!is.na(x))>1){
se <- sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
} else {
se <- 0
}
return(se)
}
#' Summarize cell level data to the spot level
#'
#' Median summarize the cell level normalized values and the most biologically
#' interpretable raw data at each spot, calculate the standard errors and add
#' SE columns for all median summarized data
#' @param dt The datatable of cell level data to be summarized
#' @return A datatable of spot-level, median summarized data with standard error values and
#' metadata
#' @export
summarizeToSpot <- function(dt, seNames=NULL){
#Summarize cell data to medians of the spot parameters
parameterNames<-grep(pattern="(Children|_CP_|_PA_)",x=names(dt),value=TRUE)
#Remove spot-normalized or summarized parameters
parameterNames <- grep("SpotNorm|Wedge|Sparse|OuterCell|Center|^Nuclei_PA_Gated_EduPositive$",parameterNames,value=TRUE,invert=TRUE)
slDT<-dt[,lapply(.SD, numericMedian), by="Barcode,Well,Spot", .SDcols=parameterNames]
#Use seNames to select the parameters that get SE values
if(!is.null(seNames)){
seNamesPattern<-paste(seNames,collapse="|")
seNames <- grep(seNamesPattern,parameterNames,value=TRUE)
slDTse <- dt[,lapply(.SD,se), by="Barcode,Well,Spot", .SDcols=seNames]
} else{
slDTse <- dt[,lapply(.SD,se), by="Barcode,Well,Spot"]
}
#Add _SE to the standard error column names
setnames(slDTse, grep("Barcode|^Well$|^Spot$",colnames(slDTse), value = TRUE, invert = TRUE), paste0(grep("Barcode|^Well$|^Spot$",colnames(slDTse), value = TRUE, invert = TRUE),"_SE"))
#Merge back in the spot and well metadata
metadataNames <- grep("(Row|Column|PrintOrder|Block|^ID$|Array|CellLine|Ligand|Drug|Endpoint|ECMp|MEP|Well_Ligand|ECM|ImageID|Barcode|^Well$|^WellIndex$|^PrintSpot$|^Spot$|Pin|Lx|[[:digit:]]{3}nm$|StainingSet|Density)", x=colnames(dt), value=TRUE)
setkey(dt,Barcode, Well,Spot)
mDT <- dt[,metadataNames,keyby="Barcode,Well,Spot", with=FALSE]
slDT <- mDT[slDT, mult="first"]
#Merge in the standard err values
setkey(slDT, Barcode, Well, Spot)
setkey(slDTse, Barcode, Well, Spot)
slDT <- slDT[slDTse]
return(slDT)
}
#' QA the spot level data
#'
#' Median summarize the cell level normalized values and the most biologically
#' interpretable raw data at each spot, calculate the standard errors and add
#' SE columns for all median summarized data
#' @param dt Datatable of spot level data
#' @param lthresh The threshold used in the loess model to define low cell count
#' regions
#' @return A datatable of spot-level, median summarized data with standard error values and
#' metadata
#' @export
QASpotData <- function(dt, lthresh = lthresh){
#Add a count of replicates
dt <- dt[,Spot_PA_ReplicateCount := .N,by="Ligand,ECMp"]
#Add the loess model of the SpotCellCount on a per well basis
dt <- dt[,Spot_PA_LoessSCC := loessModel(.SD, value="Spot_PA_SpotCellCount", span=.5), by="Barcode,Well"]
#Add well level QA Scores to spot level data
dt <- dt[,QAScore := calcQAScore(.SD, threshold=lthresh, maxNrSpot = max(dt$ArrayRow)*max(dt$ArrayColumn),value="Spot_PA_LoessSCC"),by="Barcode,Well"]
return(dt)
}
#' Add the porportions of gated cells in the spot populations
#' @param dt A datatable
#' @return nothing is returned, the proportions are added to the input datatable
#' @export
addSpotProportions <- function(dt){
#Calculate DNA proportions based on cell cycle state
if("Nuclei_PA_Cycle_State" %in% colnames(dt)){
dt <- dt[,Nuclei_PA_Cycle_DNA2NProportion := calc2NProportion(Nuclei_PA_Cycle_State),by="Barcode,Well,Spot"]
dt <- dt[,Nuclei_PA_Cycle_DNA4NProportion := 1-Nuclei_PA_Cycle_DNA2NProportion]
}
#Create proportions and logits of mutlivariate gates
if ("Cytoplasm_PA_Gated_KRTClass" %in% colnames(dt)){
#Determine the class of each cell based on KRT5 and KRT19 class
#0 double negative
#1 KRT5+, KRT19-
#2 KRT5-, KRT19+
#3 KRT5+, KRT19+
#Calculate gating proportions for EdU and KRT19
dt <- dt[,Cytoplasm_PA_Gated_BasalLuminalNegativeProportion := sum(Cytoplasm_PA_Gated_KRTClass==0)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cytoplasm_PA_Gated_BasalNegativeLuminalPositiveProportion := sum(Cytoplasm_PA_Gated_KRTClass==2)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cytoplasm_PA_Gated_BasalPositiveLuminalNegativeProportion := sum(Cytoplasm_PA_Gated_KRTClass==1)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cytoplasm_PA_Gated_BasalPositivedLuminalPositiveProportion := sum(Cytoplasm_PA_Gated_KRTClass==3)/length(ObjectNumber),by="Barcode,Well,Spot"]
}
if ("Cells_PA_Gated_EdUKRT5Class" %in% colnames(dt)){
#Determine the class of each cell based on KRT5 and EdU class
#0 double negative
#1 KRT5+, EdU-
#2 KRT5-, EdU+
#3 KRT5+, EdU+
#Calculate gating proportions for EdU and KRT5
dt <- dt[,Cells_PA_Gated_EdUKRT5NegativeProportion := sum(Cells_PA_Gated_EdUKRT5Class==0)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUPositiveKRT5NegativeProportion := sum(Cells_PA_Gated_EdUKRT5Class==2)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUNegativeKRT5PositiveProportion := sum(Cells_PA_Gated_EdUKRT5Class==1)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUKRT5PositiveProportion := sum(Cells_PA_Gated_EdUKRT5Class==3)/length(ObjectNumber),by="Barcode,Well,Spot"]
}
if ("Cells_PA_Gated_EdUKRT14Class" %in% colnames(dt)){
#Determine the class of each cell based on KRT14 and EdU class
#0 double negative
#1 KRT14+, EdU-
#2 KRT14-, EdU+
#3 KRT14+, EdU+
#Calculate gating proportions for EdU and KRT14
dt <- dt[,Cells_PA_Gated_EdUKRT14NegativeProportion := sum(Cells_PA_Gated_EdUKRT14Class==0)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUPositiveKRT14NegativeProportion := sum(Cells_PA_Gated_EdUKRT14Class==2)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUNegativeKRT14PositiveProportion := sum(Cells_PA_Gated_EdUKRT14Class==1)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUKRT14PositiveProportion := sum(Cells_PA_Gated_EdUKRT14Class==3)/length(ObjectNumber),by="Barcode,Well,Spot"]
}
if ("Cells_PA_Gated_EdUKRT19Class" %in% colnames(dt)){
#Determine the class of each cell based on KRT19 and EdU class
#0 double negative
#1 KRT19+, EdU-
#2 KRT19-, EdU+
#3 KRT19+, EdU+
#Calculate gating proportions for EdU and KRT19
dt <- dt[,Cells_PA_Gated_EdUKRT19NegativeProportion := sum(Cells_PA_Gated_EdUKRT19Class==0)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUPositiveKRT19NegativeProportion := sum(Cells_PA_Gated_EdUKRT19Class==2)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUNegativeKRT19PositiveProportion := sum(Cells_PA_Gated_EdUKRT19Class==1)/length(ObjectNumber),by="Barcode,Well,Spot"]
dt <- dt[,Cells_PA_Gated_EdUKRT19PositiveProportion := sum(Cells_PA_Gated_EdUKRT19Class==3)/length(ObjectNumber),by="Barcode,Well,Spot"]
}
}
# #' @export
# summarizeFBS <- function(dt){
# #Summarize all by ligand and ECMp
# #This will only change the FBS data
# dtFBS <- dt[grepl("FBS",dt$Ligand),]
# #Remove anything after FBS
# dtFBS$Ligand <- gsub("FBS.*","FBS", dtFBS$Ligand)
# #Create the new MEP name
# dtFBS$MEP <- paste(dtFBS$ECMp,dtFBS$Ligand,sep="_")
# if("StainingSet" %in% colnames(dtFBS)){
# #Find the medians or the unique metadata values
# dtFBSMedians <- dtFBS[, lapply(.SD, numericMedianUniqueMetadata), keyby = "Ligand,ECMp,StainingSet"]
# } else {
# #Find the medians or the unique metadata values
# dtFBSMedians <- dtFBS[, lapply(.SD, numericMedianUniqueMetadata), keyby = "Ligand,ECMp"]
# }
# #If the FBS is from multiple plates its barcode has been reset to NA.
# #replace this with the word Multiple
# dtFBSMedians$Barcode[is.na(dtFBSMedians$Barcode)] <- "Multiple"
# #Delete the FBS wells from the original dt
# dt <- dt[!grepl("FBS",dt$Ligand),]
# #Bind in the summarized FBS values
# dt <- rbind(dt,dtFBSMedians)
# return(dt)
# }
# #' Summarize spot level data to the MEP level
# #'
# #' Median summarize the spot level normalized values the most biologically
# #' interpretable raw data at each spot, calculate the standard errors and add
# #' SE columns for all median summarized data
# #' @param l3 The datatable of spot level data to be summarized
# #' @return A datatable of MEP level, median summarized data with standard error values and
# #' metadata
# #' @export
# createl4 <- function(l3, seNames=NULL){
# #Add a count of replicates
# l3 <- l3[,Spot_PA_ReplicateCount := .N,by="Ligand,ECMp"]
# l4Names<-grep("Loess$|RUV|Norm|^Ligand$|^ECMp|Barcode|Spot_PA_SpotCellCount$|Spot_PA_ReplicateCount$", x=names(l3),value=TRUE)
# #remove the _SE values
# l4Names <- grep("_SE|NormMethod|AnnotID",l4Names, value = TRUE, invert = TRUE)
# l4Keep<-l3[,l4Names,with=FALSE]
# l4DT<-l4Keep[,lapply(.SD,numericMedian),keyby="Ligand,ECMp,Barcode"]
# #Use seNames to select the parameters that get SE values
# if(!is.null(seNames)){
# seNamesPattern<-paste(seNames,collapse="|")
# seNames <- grep(seNamesPattern,l4Names,value=TRUE)
# l4DTse <- l4Keep[,lapply(.SD,se),keyby="Ligand,ECMp,Barcode", .SDcols=seNames]
# } else{
# l4DTse <- l4Keep[,lapply(.SD,se),keyby="Ligand,ECMp,Barcode"]
# }
#
# #Add _SE to the standard error column names
# setnames(l4DTse, grep("Barcode|^Well$|^Spot$|Ligand|ECMp",colnames(l4DTse), value = TRUE, invert = TRUE), paste0(grep("Barcode|^Well$|^Spot$|Ligand|ECMp",colnames(l4DTse), value = TRUE, invert = TRUE),"_SE"))
#
# l3Names <- grep("Barcode|Well|CellLine|Ligand|ECM|Endpoint488|Endpoint555|Endpoint647|EndpointDAPI|ECMp|MEP|Lx|PinDiameter", colnames(l3), value=TRUE)
# #Merge back in the replicate metadata
# mDT <- l3[,l3Names,keyby="Ligand,ECMp,Barcode", with=FALSE]
# setkey(mDT,Ligand,ECMp,Barcode)
# l4DT <- mDT[l4DT, mult="first"]
# l4DT <- l4DTse[l4DT]
# l4DT <- summarizeFBS(l4DT)
# return(l4DT)
# }#End of createl4
#' Summarize spot level data to the MEP level
#'
#' Median summarize the spot level normalized values the most biologically
#' interpretable raw data at each spot, calculate the standard errors and add
#' SE columns for all median summarized data
#' @param dt The datatable of spot level data to be summarized
#' @return A datatable of MEP level, median summarized data with standard error values and
#' metadata
#' @export
preprocessLevel4 <- function(dt, seNames=NULL){
library(magrittr)
#Add a count of replicates
dt <- dt[,Spot_PA_ReplicateCount := .N,by="Ligand,ECMp,Drug,CellLine,Drug1Conc"]
rawSignalNames <- grep("_SE",grep("Log2|Logit|_PA_|Intensity|AreaShape|Texture",colnames(dt), value=TRUE), value=TRUE, invert=TRUE)
l4Signals<- dt[,lapply(.SD, numericMedian), by="Ligand,ECMp,Drug,Drug1Conc,CellLine", .SDcols=rawSignalNames]
#Use seNames to select the parameters that get SE values
if(!is.null(seNames)){
seNamesPattern<-paste(seNames,collapse="|")
seSignalNames <- grep(seNamesPattern,rawSignalNames,value=TRUE)
l4Ses <- dt[,lapply(.SD,se),keyby="Ligand,ECMp,Drug,Drug1Conc,CellLine", .SDcols=seSignalNames]
} else{
l4Ses <- dt[,lapply(.SD,se),keyby="Ligand,ECMp,Drug,Drug1Conc,CellLine"]
}
#Add _SE to the standard error column names
setnames(l4Ses, grep("Ligand|ECMp|Drug|Drug1Conc|CellLine",colnames(l4Ses), value = TRUE, invert = TRUE), paste0(grep("Ligand|ECMp|Drug|Drug1Conc|CellLine",colnames(l4Ses), value = TRUE, invert = TRUE),"_SE"))
#Merge back in the replicate metadata
metadataNames <- grep("_SE|Barcode|^BW$|ArrayRow|ArrayColumn|^Well$|WellIndex|^Spot$|^PrintSpot$|^Well_Ligand$|ImageID|ClarionID|QA_|ECMSet|^Row$|^Column$|^Block$|PlateRow|^QAScore$|^ID$|LigandSet|PrintOrder",colnames(dt), value=TRUE,invert=TRUE) %>%
setdiff(rawSignalNames)
mdDT <- unique(dt[,metadataNames, with=FALSE])
setkey(l4Signals,Ligand,ECMp,Drug,Drug1Conc,CellLine)
setkey(l4Ses,Ligand,ECMp,Drug,Drug1Conc,CellLine)
setkey(mdDT,Ligand,ECMp,Drug,Drug1Conc,CellLine)
l4DT <- merge(mdDT,merge(l4Signals,l4Ses))
return(l4DT)
}#End of preprocesslevel4
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.