R/PreProcessCellFunctions.R

#'Read the well metadata from a multi-sheet Excel file
#'
#' @param fn full path and file name for the well metadata file
#' @return a datatable with the well level metadata 
#' @export 
getWellMetadata <- function(fn){
  if(!length(fn)==1) stop(paste("There must be 1 xlsx metadata file in the",barcode, "Analysis folder"))
  data.table(readMetadata(fn), key="Well")
}

#'Get deposition values from an Aushon log dta files
#'
#' @param fn full path and file name for the log metadata file
#' @return a datatable with an integer Depostion column
#' @export 
getLogMetadata <- function(fn){
  if(!length(fn)==1) stop(paste("There must be 1 xml file in the",barcode, "Analysis folder"))
  readLogData(fn)
}
#'Read the spot metadata from a gal file
#'
#' @param fn full path and file name for the spot metadata file
#' @return a datatable with the spot level metadata 
#' @export 
getSpotMetadata <- function(fn){
  if(!length(fn)==1) stop(paste("There must be 1 gal file in the",barcode, "Analysis folder"))
  smd <- readSpotMetadata(fn)
  setnames(smd, "Name", "ECMp")
  return(smd)
}
#'Create a single datatable for all wells in an 8 well plate using!An! metadata
#'
#'8 well MEMAs are printed with the B row rotated 180 degrees from the A row. This routine rotates
#'the B rows and creates. The Spot values are indices in the physical coordinate system.
#'PrintSpot values are indices in the rotated printing coordinate system.
#'@param spotMetadata A datatable of spot level metadata
#'@param wellMetadata A datatable of well level metadata
#'@return A spot level datatable that has the well level metadata properly merged
#'@export
mergeSpot8WellMetadata <- function(spotMetadata,wellMetadata){
  wmdL <- apply(wellMetadata,1,  function(x){
    if(grepl("A",x[["Well"]])){
      dt <- cbind(spotMetadata,data.frame(t(x), stringsAsFactors = FALSE))
      dt <- dt[,PrintSpot := Spot]
    } else {
      dt <- cbind(rotateMetadata(spotMetadata),data.frame(t(x), stringsAsFactors = FALSE))
      dt <- dt[,PrintSpot := max(Spot)+1-Spot]
    }
  })
  rbindlist(wmdL)
}
#' Merge the spot well level metadata for a 96 well plate
#' 
#' This function assumes a 12 pin printhead in 6 row and 2 column configuration.
#' The printhead is oriented 90 ccw from the well plate. The 12 well pattern is
#' duplicated 8 times in the well plate to print 8 identical sets of 12 arrays
#'@param spotMetadata A datatable of spot level metadata
#'@param wellMetadata A datatable of well level metadata
#'@return A spot level datatable that has the well level metadata properly merged
#'@export
mergeSpot96WellMetadata <- function(spotMetadata,wellMetadata){
  #The ArrayRow and ArrayColumn indices are oriented with A01 well in the upper left
  #These are coordinates within each well
  #The gal file coordinates are rotated 90 degrees ccw from the array coordinates
  nrArrayRow <- max(spotMetadata$Column)
  nrArrayColumn <- max(spotMetadata$Row)
  spotMetadata <- spotMetadata[,ArrayRow := (nrArrayColumn+1-Column)]
  spotMetadata <- spotMetadata[,ArrayColumn := Row]
  #Assign Spots as sequential indices within each array
  spotMetadata <- spotMetadata[,Spot := ArrayColumn+(ArrayRow-1)*nrArrayRow]
  #PrintSpots are the same as spot in these plates
  spotMetadata <- spotMetadata[,PrintSpot := Spot]
  #Copy the 12 pin spot metadata 8 times to fill the 96 well plate
  smdPlate <- rbindlist(lapply(1:(length(unique(wellMetadata$Well))/length(unique(spotMetadata$Block))),function(x){
    cbind(spotMetadata,PlatePrintIndex = x)
  }))
  ###Assign PrintHead rows and columns in order to define wells
  #PlatePrint coordinates are for the print head sectors in the 96 wellplate
  nrPlatePrintCol <- 2
  nrPlatePrintRow <- max(smdPlate$PlatePrintIndex)/nrPlatePrintCol
  smdPlate$PlatePrintRow <- ceiling(smdPlate$PlatePrintIndex/nrPlatePrintCol)
  smdPlate$PlatePrintCol <- ((smdPlate$PlatePrintIndex-1) %% nrPlatePrintCol)+1
  #PrintHead coords are the rows and columns of pins/blocks within the print head
  nrPrintHeadCol <- 2
  nrPrintHeadRow <- max(smdPlate$Block)/nrPrintHeadCol
  smdPlate$PrintHeadRow <- ceiling(smdPlate$Block/nrPrintHeadCol)
  smdPlate$PrintHeadCol <- ((smdPlate$Block-1) %% nrPrintHeadCol)+1
  #Use PlatePrint and PrintHead to determine WellIndex
  smdPlate$PlateRow <- (smdPlate$PlatePrintRow-1)*nrPrintHeadCol+(3-smdPlate$PrintHeadCol)
  smdPlate$PlateCol <- smdPlate$PlateCol <- (smdPlate$PlatePrintCol-1)*nrPrintHeadRow+smdPlate$PrintHeadRow
  nrPlateRow <- max(smdPlate$PlateRow)
  nrPlateCol <- max(smdPlate$PlateCol)
  smdPlate <- smdPlate[,WellIndex := (PlateRow-1)*nrPlateCol+PlateCol]
  smdPlate <-smdPlate[,Well:=wellAN(nrPlateRow, nrPlateCol)[smdPlate$WellIndex]]
  #Cleanup working columns used to create Well values
  smdPlate <- smdPlate[,PlatePrintRow :=NULL]
  smdPlate <- smdPlate[,PlatePrintCol :=NULL]
  smdPlate <- smdPlate[,PrintHeadRow :=NULL]
  smdPlate <- smdPlate[,PrintHeadCol :=NULL]
  smdPlate <- smdPlate[,WellIndex :=NULL]
  #Merge in the well metadata
  metadata <- merge(smdPlate,wellMetadata,by="Well")
}

#' Get metadata from either An! or !An! files
#' @param metadataFiles A list of of full file names for metadata. The items in 
#' the list must have names of annotMetadata, logMetadata, spotMetadata or wellMetadata
#' @param useAnnotMetadata Logical indicating whether to use metadata in Annot files
#' or excel files
#' @return A datatable containing the metadata
#' 
#' @export
getMetadata <- function(metadataFiles, useAnnotMetadata=TRUE){
  #Use metadata from an2omero files
  if(useAnnotMetadata){
    metadata <- processan2omero(metadataFiles[["annotMetadata"]])
    MEMA8Well <- length(unique(metadata$Well))==8
    MEMA96Well <- length(unique(metadata$Well))==96
  } else { #Process xml, gal and excel files to get all metadata
    #Read the log metadata from an Aushon log file
    ldf <- getLogMetadata(metadataFiles[["logMetadata"]])
    #Read in the spot metadata from the gal file
    galmd <- getSpotMetadata(metadataFiles[["spotMetadata"]])
    spotMetadata <- merge(galmd,ldf, by = c("Row","Column"), all=TRUE)
    #Read the well metadata from a multi-sheet Excel file
    wellMetadata <- getWellMetadata(metadataFiles[["wellMetadata"]])
    #Determine if this is metadata for an 8 or 96 well plate 
    MEMA8Well <- setequal(unique(wellMetadata$Well),c("A01","A02","A03","A04","B01","B02","B03","B04"))
    MEMA96Well <- length(unique(wellMetadata$Well))==96
    if(MEMA8Well){
      #Create a single datatable for all wells in an 8 well plate using!An! metadata
      metadata <- mergeSpot8WellMetadata(spotMetadata,wellMetadata)
    } else if (MEMA96Well) {
      # Merge the spot well level metadata for a 96 well plate
      metadata <- mergeSpot96WellMetadata(spotMetadata,wellMetadata)
    } else {
      stop("Only 8 well and 96 well plates are supported")
    }
    #Add MEP and convenience labels for wells and ligands
    metadata <- metadata[,MEP:=paste(ECMp,Ligand,sep = "_")]
    metadata <- metadata[,Well_Ligand:=paste(Well,Ligand,sep = "_")]
    metadata <- metadata[,MEP_Drug:=paste(MEP,Drug,sep = "_")]
    metadata <- metadata[,Barcode := barcode]
    # Eliminate Variations in the Endpoint metadata
    endpointNames <- grep("End",colnames(metadata), value=TRUE)
    endpointWL <- regmatches(endpointNames,regexpr("[[:digit:]]{3}|DAPI",endpointNames))
    setnames(metadata,endpointNames,paste0("Endpoint",endpointWL))
    #Create short display names, then replace where not unique
    #Use entire AnnotID for ligands with same uniprot IDs
    # metadata$Ligand[grepl("NRG1",metadata$Ligand)] <- simplifyLigandAnnotID(ligand = "NRG1",annotIDs = metadata$Ligand[grepl("NRG1",metadata$Ligand)])
    # metadata$Ligand[grepl("TGFB1",metadata$Ligand)] <- simplifyLigandAnnotID(ligand = "TGFB1",annotIDs = metadata$Ligand[grepl("TGFB1",metadata$Ligand)])
    # metadata$Ligand[grepl("CXCL12",metadata$Ligand)] <- simplifyLigandAnnotID(ligand = "CXCL12",annotIDs = metadata$Ligand[grepl("CXCL12",metadata$Ligand)])
    # metadata$Ligand[grepl("IGF1",metadata$Ligand)] <- simplifyLigandAnnotID(ligand = "IGF1",annotIDs = metadata$Ligand[grepl("IGF1",metadata$Ligand)])
  }
  return(metadata)
}

#' Get the data from the Cell Profiler pipeline
#' @export
getCPData <- function(dataBWInfo, curatedOnly=TRUE, curatedCols= "ImageNumber|ObjectNumber|AreaShape|_MedianIntensity_|_IntegratedIntensity_|_Center_|_PA_|Texture", verbose=FALSE){
  dtL <- lapply(unique(dataBWInfo$Well), function(well){
    if(verbose) message(paste("Reading and annotating data for",barcode, well,"\n"))
    nuclei <- convertColumnNames(fread(dataBWInfo$Path[grepl("Nuclei",dataBWInfo$Location)&grepl(well,dataBWInfo$Well)], verbose=FALSE, showProgress=FALSE))
    if (curatedOnly) nuclei <- nuclei[,grep(curatedCols,colnames(nuclei)), with=FALSE]
    setnames(nuclei,paste0("Nuclei_",colnames(nuclei)))
    setnames(nuclei,"Nuclei_CP_ImageNumber","Spot")
    setnames(nuclei,"Nuclei_CP_ObjectNumber","ObjectNumber")
    setkey(nuclei,Spot,ObjectNumber) 
    
    if(any(grepl("Cells",dataBWInfo$Location)&grepl(well,dataBWInfo$Well))){
      cells <- convertColumnNames(fread(dataBWInfo$Path[grepl("Cells",dataBWInfo$Location)&grepl(well,dataBWInfo$Well)], verbose=FALSE, showProgress=FALSE))
      if (curatedOnly) cells <- cells[,grep(curatedCols,colnames(cells)), with=FALSE]
      setnames(cells,paste0("Cells_",colnames(cells)))
      setnames(cells,"Cells_CP_ImageNumber","Spot")
      setnames(cells,"Cells_CP_ObjectNumber","ObjectNumber")
      setkey(cells,Spot,ObjectNumber)
    } 
    
    if(any(grepl("Cytoplasm",dataBWInfo$Location)&grepl(well,dataBWInfo$Well))){
      cytoplasm <- convertColumnNames(fread(dataBWInfo$Path[grepl("Cytoplasm",dataBWInfo$Location)&grepl(well,dataBWInfo$Well)], verbose=FALSE, showProgress=FALSE))
      if (curatedOnly) cytoplasm <- cytoplasm[,grep(curatedCols,colnames(cytoplasm)), with=FALSE]
      setnames(cytoplasm,paste0("Cytoplasm_",colnames(cytoplasm)))
      setnames(cytoplasm,"Cytoplasm_CP_ImageNumber","Spot")
      setnames(cytoplasm,"Cytoplasm_CP_ObjectNumber","ObjectNumber")
      setkey(cytoplasm,Spot,ObjectNumber)
    } 
    
    #Merge the data from the different locations if it exists
    if(exists("cells")&exists("cytoplasm")) {
      dt <- cells[cytoplasm[nuclei]]
    } else {
      dt <- nuclei
    }
    #Add the well name and barcode as parameters
    dt <- dt[,Well := well]
    dt <- dt[,Barcode := barcode]
    #Scale the intensity values
    intensityNames <- grep("Intensity",colnames(dt), value=TRUE)
    scaledInts <- dt[,intensityNames, with=FALSE]*2^16
    dt <- cbind(dt[,!intensityNames, with=FALSE],scaledInts)
    return(dt)
  })
}
#' Read and convert INCell data to CP format
#' @param cellDataFilePaths A character vector of the file paths to the cell level data
#' @export
getICData <- function(cellDataFilePaths, endPoint488, endPoint555, endPoint647, verbose=FALSE){
  if(verbose) message(paste("Reading and annotating INCell data for",barcode,"\n"))
  #Read and combine the 2 header rows after the summary information
  hdrRows <- read.csv(cellDataFilePaths,skip = 18, nrows=2, header=FALSE,stringsAsFactors = FALSE)
  hdr <- sub("^_","",paste(hdrRows[1,],hdrRows[2,],sep="_"))
  #Read the cell level and spot summary data
  df <- read.csv(cellDataFilePaths,skip = 20, header=FALSE, stringsAsFactors = FALSE)
  #remove the spot summary data and convert to a data.table
  if(any(which(df$V1==""))) {
    df <- df[-(min(which(df$V1=="")):nrow(df)),]
  }
  dt <- data.table(df)
  #Name the columns
  setnames(dt,names(dt), hdr)
  if("NA_NA" %in% colnames(dt)) dt <- dt[,NA_NA := NULL]
  #Create a spot column from the field value
  dt$Spot <- gsub(".*fld ","",dt$Well)
  dt$Spot <- as.numeric(gsub(")","",dt$Spot))
  #Convert well names to alphanumeric with 2 digit columns
  wellRow <- str_match(dt$Well,"[:alpha:]")
  wells <- str_match(dt$Well,"[:digit:][:digit:]?") %>%
    as.numeric() %>%
    sprintf("%02d",.) %>%
    paste0(wellRow,.)
  
  #Convert all columns besides the Well to numeric values
  dt <- dt[,lapply(.SD, as.numeric),.SDcols = grep("Well",colnames(dt),value=TRUE,invert=TRUE)]
  dt$Well <- wells
  # dt$PlateRow <- wellRow
  # dt <- dt[,PlateCol := as.numeric(gsub("[[:alpha:]]","",dt$Well))]
  #Convert INCell names to CP versions
  dt <- convertColumnNames(dt)
  #Assume first nuclear channel is DAPI
  #Create a list of lists with IC names and corresponding CP names if available
  ICtoCPNames <- list(
    list("Nuclei_NucIntensity","Nuclei_CP_Intensity_MedianIntensity_Dapi"),
    list("Nuclei_NucArea","Nuclei_CP_AreaShape_Area"),
    list("Nuclei_NuccgX","Nuclei_CP_AreaShape_Center_X"),
    list("Nuclei_NuccgY","Nuclei_CP_AreaShape_Center_Y"),
    list("Nuclei_NucElongation","Nuclei_CP_AreaShape_Eccentricity"),
    list("Nuclei_NucCellIntensity","Cell_CP_Intensity_MedianIntensity_Dapi"),
    list("Nuclei_IxANuc","Nuclei_CP_Intensity_IntegratedIntensity_Dapi"),
    list("Cells_CellIntensity",paste0("Cytoplasm_CP_Intensity_MedianIntensity_",endPoint488)),
    list("Reference1_CellIntensity",paste0("Cytoplasm_CP_Intensity_MedianIntensity_",endPoint555)),
    list("Reference2_NucIntensity",paste0("Nuclei_CP_Intensity_MedianIntensity_",endPoint647)),
    list("Cell","ObjectNumber")
  )
  #Change names within dt to match downstream CP pipeline
  foo <- sapply(ICtoCPNames,function(x) {
    if(x[[1]] %in% colnames(dt)) setnames(dt,x[[1]],x[[2]])
  })
  rm(foo)
  
  dt$Barcode <- barcode
  dtL <-list(dt)
}
#'Clean up issues that have been problems in previous pipelines
#'@param dt A datatable of MEMA data and metadata
#'@return The same datatable with legacy issues corrected
#'@export
cleanLegacyIssues <- function(dt){
  #Remove problematic features
  dt <- dt[,grep("Euler",colnames(dt),invert=TRUE), with=FALSE]
  #Change Edu back to EdU
  if(any(grepl("Edu",colnames(dt)))){
    edUNames <- grep("Edu",colnames(dt),value=TRUE)
    setnames(dt,edUNames,gsub("Edu","EdU",edUNames))
  }
  #Add the pin diameter metadata in microns
  if(any(grepl("MCF7|PC3|YAPC",unique(dt$CellLine)))){
    dt$PinDiameter <- 180
  } else {
    dt$PinDiameter <- 350
  }
  return(dt)
}

#` Filter our debris and cell clusters
#'@export
filterObjects <- function(dt,nuclearAreaThresh=50,nuclearAreaHiThresh=4000){
  if(any(grepl("Nuclei_CP_AreaShape_Area",colnames(dt)))){
    dt <- dt[dt$Nuclei_CP_AreaShape_Area > nuclearAreaThresh,]
    dt <- dt[dt$Nuclei_CP_AreaShape_Area < nuclearAreaHiThresh,]
  }
  return(dt)
}
#' Add local XY and polar coordinates
#' @export
addPolarCoords <- function(dt){
  if(any(grepl("Nuclei_CP_AreaShape_Center",colnames(dt)))){
    #Add the local polar coordinates
    dt <- dt[,Nuclei_PA_Centered_X :=  Nuclei_CP_AreaShape_Center_X-median(Nuclei_CP_AreaShape_Center_X), by=c("Well","Spot")]
    dt <- dt[,Nuclei_PA_Centered_Y :=  Nuclei_CP_AreaShape_Center_Y-median(Nuclei_CP_AreaShape_Center_Y), by=c("Well","Spot")]
    dt <- dt[, Nuclei_PA_AreaShape_Center_R := sqrt(Nuclei_PA_Centered_X^2 + Nuclei_PA_Centered_Y^2), by=c("Well","Spot")]
    dt <- dt[, Nuclei_PA_AreaShape_Center_Theta := calcTheta(Nuclei_PA_Centered_X, Nuclei_PA_Centered_Y), by=c("Well","Spot")]
  }
  return(dt)
}

#'Add spot level normalizations for median intensities
#'@export
spotNormIntensities <- function(dt){
  intensityNamesAll <- grep("_CP_Intensity_Median",colnames(dt), value=TRUE)
  intensityNames <- grep("Norm",intensityNamesAll,invert=TRUE,value=TRUE)
  for(intensityName in intensityNames){
    #Median normalize the median intensity at each spot
    setnames(dt,intensityName,"value")
    dt <- dt[,paste0(intensityName,"_SpotNorm") := medianNorm(value),by="Barcode,Well,Spot"]
    setnames(dt,"value",intensityName)
  }
  return(dt)
}
#' Add measures of adjacency based on cell positions in the population
#' 
#' @param neighborhoodNucleiRadii Defines the neighborhood annulus
#' @param neighborsThresh Gates sparse cells on a spot
#' @param wedgeAngs Size in degrees of spot wedges used in perimeter gating
#' @param outerThresh Defines outer cells used in perimeter gating
#'@export
calcAdjacency <- function(dt, neighborhoodNucleiRadii=7,neighborsThresh=0.4,wedgeAngs=20, outerThresh=0.5){
  densityRadius <- sqrt(median(dt$Nuclei_CP_AreaShape_Area, na.rm = TRUE)/pi)
  #Count the number of neighboring cells
  dt <- dt[,Nuclei_PA_AreaShape_Neighbors := cellNeighbors(.SD, radius = densityRadius*neighborhoodNucleiRadii), by = "Barcode,Well,Spot"]
  #Rules for classifying perimeter cells
  dt <- dt[,Spot_PA_Sparse := Nuclei_PA_AreaShape_Neighbors < neighborsThresh]
  #Add a local wedge ID to each cell based on conversations with Michel Nederlof
  dt <- dt[,Spot_PA_Wedge:=ceiling(Nuclei_PA_AreaShape_Center_Theta/wedgeAngs)]
  #Define the perimeter cell if it exists in each wedge
  #Classify cells as outer if they have a radial position greater than a thresh
  dt <- dt[,Spot_PA_OuterCell := labelOuterCells(Nuclei_PA_AreaShape_Center_R, thresh=outerThresh),by="Barcode,Well,Spot"]
  #Require a perimeter cell not be in a sparse region
  denseOuterDT <- dt[!dt$Spot_PA_Sparse  & dt$Spot_PA_OuterCell]
  denseOuterDT <- denseOuterDT[,Spot_PA_Perimeter := findPerimeterCell(.SD) ,by="Barcode,Well,Spot,Spot_PA_Wedge"]
  setkey(dt,Barcode,Well,Spot,ObjectNumber)
  setkey(denseOuterDT,Barcode,Well,Spot,ObjectNumber)
  dt <- denseOuterDT[,list(Barcode,Well,Spot,ObjectNumber,Spot_PA_Perimeter)][dt]
  dt$Spot_PA_Perimeter[is.na(dt$Spot_PA_Perimeter)] <- FALSE
  return(dt)
}

#' Gate cells based on specific stains if they are in the dataset
#' 
#' @param dt A datatable that contains cell level data and metadata
#' @export
gateCells <- function(dt, synId="syn10138929"){
  #Read parameter file from Synapse
  library(synapseClient)
  synapseLogin()
  parms <- synGet(synId) %>%
    synapseClient::getFileLocation() %>%
    data.table::fread(check.names = TRUE) %>%
    dplyr::filter(Barcode==unique(dt$Barcode))
  overrideParms <- nrow(parms)==1
  #Set 2N and 4N DNA status
  #Use override parameter if it exists
  if(overrideParms) {
    if(!parms$CycleStateThresh==0){
      #if CycleStateThresh == 0 then do not generate a Cycle State signal
      dt$Nuclei_PA_Cycle_State <- 1
      dt$Nuclei_PA_Cycle_State[dt$Nuclei_CP_Intensity_IntegratedIntensity_Dapi > parms$CycleStateThresh] <- 2
    }
  } else {
    dt <- dt[,Nuclei_PA_Cycle_State := gateOnlocalMinima(Nuclei_CP_Intensity_IntegratedIntensity_Dapi)]
  }
  
  #Create staining set specific derived parameters
  if("Nuclei_CP_Intensity_MedianIntensity_EdU" %in% colnames(dt)){
    #Use override parameters if they exist
    if(overrideParms) {
      if(!parms$EdUPositiveThresh==0){
        #if EdUPositiveThresh == 0 then do not generate a Gated EdU Positive signal
        dt$Nuclei_PA_Gated_EdUPositive <- 0
        dt$Nuclei_PA_Gated_EdUPositive[dt$Nuclei_CP_Intensity_MedianIntensity_EdU > parms$EdUPositiveThresh] <- 1
      }
    } else {
      #Use the entire plate to set the autogate threshold if there's no control well
      if(any(grepl("FBS",unique(dt$Ligand)))){
        dt <- dt[,Nuclei_PA_Gated_EdUPositive := kmeansCluster(.SD,value =  "Nuclei_CP_Intensity_MedianIntensity_EdU",ctrlLigand = "FBS"), by="Barcode"]
      } else {
        dt <- dt[,Nuclei_PA_Gated_EdUPositive := kmeansCluster(.SD,value =  "Nuclei_CP_Intensity_MedianIntensity_EdU",ctrlLigand = "."), by="Barcode"]
      }
      #Modify the auto gate threshold to be 3 sigma from the EdU- median
      EdUDT <- dt[Nuclei_PA_Gated_EdUPositive==0,.(EdUMedian=median(Nuclei_CP_Intensity_MedianIntensity_EdU, na.rm=TRUE),EdUSD=sd(Nuclei_CP_Intensity_MedianIntensity_EdU, na.rm=TRUE)),by="Barcode,Well"]
      EdUDT <- EdUDT[,Median3SD := EdUMedian+3*EdUSD]
      dt <- merge(dt,EdUDT,by=c("Barcode","Well"))
      dt$Nuclei_PA_Gated_EdUPositive[dt$Nuclei_CP_Intensity_MedianIntensity_EdU>dt$Median3SD]<-1
    }
  }
  
  #Gate KRT5 using override parameters if they exist
  if("Cytoplasm_CP_Intensity_MedianIntensity_KRT5" %in% colnames(dt)){  
    if(overrideParms) {
      if(!parms$KRT5PositiveThresh==0){
        #if KRT5PositiveThresh == 0 do not generate a KRT5 gated Positive signal
        dt$Cytoplasm_PA_Gated_KRT5Positive <- 0
        dt$Cytoplasm_PA_Gated_KRT5Positive[dt$Cytoplasm_CP_Intensity_MedianIntensity_KRT5 > parms$KRT5PositiveThresh] <- 1
      }
    } else if(grepl("HMEC",unique(dt$CellLine))) {
      dt <- dt[,Cytoplasm_PA_Gated_KRT5Positive := gateOnQuantile(Cytoplasm_CP_Intensity_MedianIntensity_KRT5,probs=.02),by="Barcode"]
    } else {
      dt <- dt[,Cytoplasm_PA_Gated_KRT5Positive := kmeansCluster(.SD,value =  "Cytoplasm_CP_Intensity_MedianIntensity_KRT5",ctrlLigand = "."), by="Barcode"]
    }
  }
  
  #Gate KRT14 using override parameters if they exist
  if("Cytoplasm_CP_Intensity_MedianIntensity_KRT14" %in% colnames(dt)){  
    if(overrideParms) {
      if(!parms$KRT14PositiveThresh==0){
        #if KRT14PositiveThresh == 0 do not generate a KRT14 gated Positive signal
        dt$Cytoplasm_PA_Gated_KRT14Positive <- 0
        dt$Cytoplasm_PA_Gated_KRT14Positive[dt$Cytoplasm_CP_Intensity_MedianIntensity_KRT14 > parms$KRT14PositiveThresh] <- 1
      }
    } else {
      dt <- dt[,Cytoplasm_PA_Gated_KRT14Positive := gateOnQuantile(Cytoplasm_CP_Intensity_MedianIntensity_KRT14,probs=.02),by="Barcode"]
    }
  }
  
  #Gate KRT19 using override parameters if they exist
  if("Cytoplasm_CP_Intensity_MedianIntensity_KRT19" %in% colnames(dt)){  
    if(overrideParms) {
      if(!parms$KRT19PositiveThresh==0){
        #if KRT19PositiveThresh == 0 do not generate a KRT19 gated Positive signal
        dt$Cytoplasm_PA_Gated_KRT19Positive <- 0
        dt$Cytoplasm_PA_Gated_KRT19Positive[dt$Cytoplasm_CP_Intensity_MedianIntensity_KRT19 > parms$KRT19PositiveThresh] <- 1
      }
    } else {
      dt <- dt[,Cytoplasm_PA_Gated_KRT19Positive := kmeansCluster(.SD, "Cytoplasm_CP_Intensity_MedianIntensity_KRT19",ctrlLigand = "."), by="Barcode"]
    }
  }
  
  #Calculate a lineage ratio of luminal/basal or KRT19/KRT5
  if ("Cytoplasm_CP_Intensity_MedianIntensity_KRT19" %in% colnames(dt)&
      "Cytoplasm_CP_Intensity_MedianIntensity_KRT5" %in% colnames(dt)){
    dt <- dt[,Cytoplasm_PA_Intensity_LineageRatio := Cytoplasm_CP_Intensity_MedianIntensity_KRT19/Cytoplasm_CP_Intensity_MedianIntensity_KRT5]
    
    #Determine the class of each cell based on KRT5 and KRT19 class
    #0 double negative
    #1 KRT5+, KRT19-
    #2 KRT5-, KRT19+
    #3 KRT5+, KRT19+
    dt <- dt[,Cytoplasm_PA_Gated_KRTClass := Cytoplasm_PA_Gated_KRT5Positive+2*Cytoplasm_PA_Gated_KRT19Positive]
  }
  
  if ("Cytoplasm_CP_Intensity_MedianIntensity_KRT14" %in% colnames(dt)&
      "Cytoplasm_CP_Intensity_MedianIntensity_KRT19" %in% colnames(dt)){
    #Calculate a lineage ratio of luminal/basal or KRT19/KRT14
    dt <- dt[,Cytoplasm_PA_Intensity_LineageRatio := Cytoplasm_CP_Intensity_MedianIntensity_KRT19/Cytoplasm_CP_Intensity_MedianIntensity_KRT14]
    #Determine the class of each cell based on KRT5 and KRT19 class
    #0 double negative
    #1 KRT14+, KRT19-
    #2 KRT14-, KRT19+
    #3 KRT14+, KRT19+
    dt <- dt[,Cytoplasm_PA_Gated_KRTClass := Cytoplasm_PA_Gated_KRT14Positive+2*Cytoplasm_PA_Gated_KRT19Positive]
  }
  
  if ("Cytoplasm_PA_Gated_KRT5Positive" %in% colnames(dt)&
      "Nuclei_PA_Gated_EdUPositive" %in% colnames(dt)){
    #Determine the class of each cell based on KRT5 and EdU class
    #0 double negative
    #1 KRT5+, EdU-
    #2 KRT5-, EdU+
    #3 KRT5+, EdU+
    #Use override parameters if they exist
    dt <- dt[,Cells_PA_Gated_EdUKRT5Class := Cytoplasm_PA_Gated_KRT5Positive+2*Nuclei_PA_Gated_EdUPositive]
  }
  
  if ("Cytoplasm_PA_Gated_KRT14Positive" %in% colnames(dt)&
      "Nuclei_PA_Gated_EdUPositive" %in% colnames(dt)){
    #Determine the class of each cell based on KRT5 and EdU class
    #0 double negative
    #1 KRT14+, EdU-
    #2 KRT14-, EdU+
    #3 KRT14+, EdU+
    #Use override parameters if they exist
    dt <- dt[,Cells_PA_Gated_EdUKRT14Class := Cytoplasm_PA_Gated_KRT14Positive+2*Nuclei_PA_Gated_EdUPositive]
  }
  
  if("Nuclei_PA_Gated_EdUPositive" %in% colnames(dt)&
     "Cytoplasm_PA_Gated_KRT19Positive" %in% colnames(dt)){
    #Determine the class of each cell based on KRT19 and EdU class
    #0 double negative
    #1 KRT19+, EdU-
    #2 KRT19-, EdU+
    #3 KRT19+, EdU+
    dt <- dt[,Cells_PA_Gated_EdUKRT19Class := Cytoplasm_PA_Gated_KRT19Positive+2*Nuclei_PA_Gated_EdUPositive]
  }
  
  return(dt)
}

#' Write the cell level raw data and metadata Level 1 data. 
#' @param dt A datatable of cell level data to be written to disk
#' @param path The path to write the file
#' @param barcode The barcode that will be encoded into the file name
#'@return  None called for the side effect of writing to disk
#'@export
writeCellLevel <- function(dt,path,barcode, verbose=FALSE){
  if(verbose) message(paste("Writing",barcode,"level 1 full file to disk\n"))
  writeTime<-Sys.time()
  fwrite(dt, paste0(path,barcode, "/Analysis/", barcode,"_","Level1.tsv"), sep = "\t", quote=FALSE)
  if(verbose) message(paste("Write time:", Sys.time()-writeTime,"\n"))
}  

#' Format 96 well plate raw data from the Cell Profiler Pipeline
#' 
#' Reformats the metadata for one row of a 96 well plate to match the pipeline. 
#' The Spot column is converted to an index within each well and a label for the Well is added
#' to the output data.table
#' 
#' @param dt data.table with a Spot column
#' @param nrArrayRows The number of rows in the printed array
#' @param nrArrayColumns The number of columns in the printed array
#' @return A data.table with an alphanumeric Well column and reindexed Spot column
#' @export
formatCP96Well <- function(dt, nrArrayRows, nrArrayColumns){
  nrArraySpots <-nrArrayRows*nrArrayColumns
  dt$WellIndex <- dt$Spot %/% nrArraySpots +1
  dt$Spot <- ((dt$Spot-1) %% nrArraySpots)+1
  dt$Well <-  dt$WellIndex %>%
    sprintf("%02d",.) %>%
    paste0(gsub("Row","",dt$Well),.)
  dt[,WellIndex := NULL]
  return(dt)
}
MEP-LINCS/MEMA documentation built on May 20, 2019, 2:47 p.m.