
#Functions to support MEMAs printed in 8 well plates

#' Rotate the metadata 180 degrees in Array space
#'@param DT A data.table of metadata with Spot, ArrayRow and ArrayColumn columns.
#'@return The same data.table rotated 180 degrees in array space. The ArrayRow, Arraycolumn and Spot values are updated.
#' @export
rotateMetadata <- function(DT){
  DT$ArrayRow <- max(DT$ArrayRow)+1-DT$ArrayRow
  DT$ArrayColumn <- max(DT$ArrayColumn)+1-DT$ArrayColumn
  DT$Spot <- as.integer(max(DT$Spot)+1-DT$Spot)

#' Read in and parse an Aushon XML log file
#' @param logFile An Aushon logfile
#' @return A datatable keyed by Row and Column with Depositions and
#'  PrintOrder columns.
#' @export
  #Only keep the sample attributes
  #Bind the XML data into a data table
  #Create Row and Column data by shifting the values by 1
  #Convert deposition to an integer
  #Remove the 0 deposition entries
  #Create a print order column
  #Remove unneeded columns
  data <- data[,c("Row","Column","PrintOrder","Depositions"), with=FALSE]
  #Rotate by 90 degrees CW to match gal file orientation
  tmp <- data$Row
  data$Row <- data$Column
  data$Column <- 1+max(tmp)-tmp
  DT <- data.table::data.table(data,key="Row,Column")

#' Convert column names in a data.table
#' @param DT A data.table
#' @return DT The same data.table with duplicated columns, invalid column name characters and trailing spaces removed.
#' @export
convertColumnNames <- function (DT) {
  #Delete any duplicate names keeping the first instance
  DT <- DT[, unique(colnames(DT)), with = FALSE]
  #Replace invalid characters with a '.'
  data.table::setnames(DT, colnames(DT), make.names(colnames(DT)))
  #Remove all '.'s
  data.table::setnames(DT, colnames(DT), gsub("[.]", "", colnames(DT)))

#' Return the median of a vector as a numeric value
#'\code{numericMedian} is a helper function for use within data.table that ensure the all medians are returned as numeric instead of numeric or integer values.
#' @param x integer or double vector
#' @return The median of x as a numeric value
numericMedian <- function(x) as.numeric(median(x, na.rm = TRUE))

#' Process the metadata in an an2omero file
#' @param fileName The full name with path for the an2omero file
#' @return A datatable where each row is a unique spot in one well plate 
processan2omero <- function (fileName) {
  #Process the file
  dt <- fread(fileName,header = TRUE)
  #Assign WellIndex values
  setkey(dt, Well)
  wi <- data.table(Well = unique(dt$Well), WellIndex = 1:length(unique(dt$Well)))
  dt <- merge(dt,wi)
  #Rename to preprocessing pipeline variable names
  dt$EndpointDAPI <- dt[["395nm"]]
  dt$Endpoint488 <- dt[["488nm"]]
  dt$Endpoint555 <- dt[["555nm"]]
  dt$Endpoint647 <- dt[["640nm"]]
  dt$Endpoint750 <- dt[["750nm"]]
  #Shorten and combine Annot names
  dt$CellLine <- gsub("_.*","",dt$CellLine)
  dt$ECM1 <- compressHA(dt$ECM1)
  dt$ECM2 <- compressHA(dt$ECM2)
  dt$ECM3 <- compressHA(dt$ECM3)
  #Chain ECM proteins if the second one is not COL1
  dt$ECMp <-paste0(gsub("_.*","",dt$ECM1),"_",gsub("_.*","",dt$ECM2),"_",gsub("_.*","",dt$ECM3)) %>%
    gsub("_NA","",.) %>%
  #Chain ligands
  dt$Ligand <-paste0(gsub("_.*","",dt$Ligand1),"_",gsub("_.*","",dt$Ligand2)) %>%
  dt$MEP <- paste0(dt$ECMp,"_",dt$Ligand)
  dt$Drug <- gsub("_.*","",dt$Drug1)
  dt$MEP_Drug <-paste0(dt$MEP,"_",dt$Drug)
  dt$EndpointDAPI <-gsub("_.*","",dt$EndpointDAPI)
  dt$Endpoint488 <-gsub("_.*","",dt$Endpoint488)
  dt$Endpoint555 <-gsub("_.*","",dt$Endpoint555)
  dt$Endpoint647 <-gsub("_.*","",dt$Endpoint647)
  #Add a WellSpace spot index that recognizes the arrays are rotated 180 degrees
  dt$PrintSpot <- dt$Spot
  nrArrayRows <- max(dt$ArrayRow)
  nrArrayColumns <- max(dt$ArrayColumn)
  dt$PrintSpot[grepl("B", dt$Well)] <- (nrArrayRows*nrArrayColumns+1)-dt$PrintSpot[grepl("B", dt$Well)]
  #Add a drug concentration in uM/L
  ConcFactor <- str_extract(dt$Drug1ConcUnit,".*_per_.|.molar|volume_percent") %>%
    sapply(., function(x){
             mol_per_L = 1e6,
             mmol_per_L = 1e3,
             umol_per_L = 1,
             nmol_per_L = 1e-3,
             pmol_per_L = 1e-6,
             mmolar = 1e6,
             umolar = 1e3,
             nmolar = 1,
             pmolar = 1e-3,
  #Error handling for when there is no drug metadata
  dt$Drug1Conc <- dt$Drug1Conc*ConcFactor
  dt$Drug1Conc[is.na(dt$Drug1Conc)] <- 0

#' Read in and merge the Omero URLs
#' Adds Omero image IDs based on the WellName values
#' @param path The path to the file named barcode_imageIDs.tsv
#' @return a datatable with WellIndex, ArrayRow, ArrayColumn and ImageID columns
#' @export
getOmeroIDs <- function(path){
  dt <- fread(path)[,list(WellName,Row,Column,ImageID)]
  #Extract well index and convert to alphanumeric label
  dt[,WellIndex := as.integer(gsub(".*_Well","",WellName))]
  dt[,WellName := NULL]
markdane/MEMA documentation built on May 21, 2019, 11:48 a.m.