#' @title Read data
#'
#' @description Reads experiment associated data. Variables get suffix
#' .cell, .nucl, .cyto
#'
#' @param data list contianing a lists of filesnames, all corresponding
#' a separate experiment
#' @param vars Variables to read, defaults to NULL (all variables)
#' @param treatVar Treatment / Grouping variables as given in the
#' treatment file
#' @param well Well Variable within the treatment file
#' @param files CellProfiler output files and treatment file
#' @param normalizeWells convert A01->A1
#' @param force force continuation if dimensions don't match
#'
#' @import data.table
#'
#' @export
readData <- function(data, vars=NULL, treatVar=c("Treatment", "Konzentration"), wellVar="well",
files=c("Treatment.csv", "Cells.txt", "Primarieswithoutborder.txt", "Cytoplasm.txt"),
normalizeWells=T, sepTreat="_", adaptVars=F, force=F,
filterVars=c("Intensity_IntegratedIntensity_DNA", "Intensity_MedianIntensity_DNA", "AreaShape_Area")) {
## All needed files
if (length(files) < 3) {
warning("Please provide three filenames as file parameter: Treatment, Cell (Cells.txt), Nucl (Primariesiwthoutborder.txt), Cytoplasm (Sytoplasm.txt) measurements")
}
## test if all necessary files/variables exist
### FIXME
cytoF <- T
nuclF <- T
if (length(files) == 3) {
cytoF <- F
}
if (length(files) == 2) {
cytoF <- F
nuclF <- F
}
### FIXME normalizeWells!!!!!!
varsTmp <- NULL
for (i in 1:length(data)) {
for (j in 1:length(data[[i]])) {
##read treatment
treat <- read.csv(paste(data[[i]][[j]], files[1], sep=""), dec=",")
if (!wellVar %in% colnames(treat)) {
stop(paste("Please provide a valid Well variable in the treatment file!\n",
"Identified vars: ", paste(colnames(treat), collapse="|"), "\n",
"FILE: ", paste(data[[i]][[j]], files[1], sep="")))
}
if (length(which(treatVar %in% colnames(treat))) != length(treatVar)) {
stop(paste("Could not identify treatment / conc vars!\n",
"Please provide valid colnames as treatVar parameter!\n",
"treatVar=c('Treatment','Konzentration')",
"Identified vars: ", paste(colnames(treat), collapse="|"), "\n",
"FILE: ", paste(data[[i]][[j]], files[1], sep="")))
}
cell <- data.frame(fread(paste(data[[i]][[j]], files[2], sep=""), nrows=10))
if (nuclF) {
nucl <- data.frame(fread(paste(data[[i]][[j]], files[3], sep=""), nrows=10))
}
if (cytoF) {
cyto <- data.frame(fread(paste(data[[i]][[j]], files[4], sep=""), nrows=10))
}
if (!all(c("Metadata_Well","ImageNumber","ObjectNumber") %in% colnames(cell))) {
stop(paste("Metdata_Well, ImageNumber or ObjectNumber columns could not be identified in Cells.txt\n",
"FILE: ", paste(data[[i]][[j]], files[1], sep="")))
}
### check if any of first 9 rows has NA for metadata_info
if (cytoF && nuclF) {
if (any(is.na(cell$Metadata_Well)) || any(is.na(nucl$Metadata_Well)) || any(is.na(cyto$Metadata_Well))) {
stop(paste("Metadata_Well corrupt! (NA) \n",
"FILE: ", paste(data[[i]][[j]], files[2], " or ", files[3], " or ", files[4], sep="")))
}
}
if (nuclF) {
if (any(is.na(cell$Metadata_Well)) || any(is.na(nucl$Metadata_Well))) {
stop(paste("Metadata_Well corrupt! (NA) \n",
"FILE: ", paste(data[[i]][[j]], files[2], " or ", files[3], " or ", files[4], sep="")))
}
} else {
if (any(is.na(cell$Metadata_Well))) {
stop(paste("Metadata_Well corrupt! (NA) \n",
"FILE: ", paste(data[[i]][[j]], files[2], " or ", files[3], " or ", files[4], sep="")))
}
}
### add suffix to cell, nucl, cyto
sC <- which(colnames(cell) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(cell)[-sC] <- paste(colnames(cell)[-sC], ".cell", sep="")
if (cytoF) {
sC <- which(colnames(cyto) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(cyto)[-sC] <- paste(colnames(cyto)[-sC], ".nucl", sep="")
}
if (nuclF) {
sC <- which(colnames(nucl) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(nucl)[-sC] <- paste(colnames(nucl)[-sC], ".cyto", sep="")
}
if (nuclF) {
dat_merged <- merge(cell, nucl, by=c('Metadata_Well','ImageNumber', 'ObjectNumber'))
} else {
dat_merged <- cell
}
if (cytoF) {
dat_merged <- merge(dat_merged, cyto, by=c('Metadata_Well','ImageNumber', 'ObjectNumber'))
}
if (is.null(varsTmp)) {
varsTmp <- colnames(dat_merged)
}
if (!all(varsTmp == colnames(dat_merged)) && !adaptVars) {
stop(paste("Different numbers of vars: ", length(varsTmp), " vs ",
length(dat_merged[1,]),"\nSet adaptVars to T for an automatic intersect"))
}
varsTmp <- intersect(varsTmp, colnames(dat_merged))
}
}
if (is.null(vars) && adaptVars) {
vars <- unlist(do.call(rbind, strsplit(varsTmp, "\\."))[,1])
}
## read
dtAll <- list()
for (i in 1:length(data)) {
for (j in 1:length(data[[i]])) {
##read treatment
treat <- read.csv(paste(data[[i]][[j]], files[1], sep=""), dec=",")
treat$TREAT_CMPL <- apply(treat[,which(colnames(treat) %in% treatVar),drop=F], 1, function(x) paste(trimws(x), collapse=sepTreat, sep=""))
if (is.null(vars)) {
cell <- data.frame(fread(paste(data[[i]][[j]], files[2], sep="")))
if (nuclF) {
nucl <- data.frame(fread(paste(data[[i]][[j]], files[3], sep="")))
}
if (cytoF) {
cyto <- data.frame(fread(paste(data[[i]][[j]], files[4], sep="")))
}
} else {
vars <- c(vars, "Metadata_Well", "ImageNumber", "ObjectNumber", filterVars) ##TODO: check if fitler Vars works
vars <- unique(as.character(vars))
cell <- data.frame(fread(paste(data[[i]][[j]], files[2], sep=""), select=vars))
if (nuclF) {
nucl <- data.frame(fread(paste(data[[i]][[j]], files[3], sep=""), select=vars))
}
if (cytoF) {
cyto <- data.frame(fread(paste(data[[i]][[j]], files[4], sep=""), select=vars))
}
}
### Check again for all measurements!
if (F) {
if (any(is.na(cell$Metadata_Well)) || any(is.na(nucl$Metadata_Well))) {
stop(paste("Metadata_Well corrupt! (NA) \n",
"FILE: ", paste(data[[i]][[j]], files[2], " or ", files[3], sep="")))
}
}
if (nuclF) {
##merge cell/nucl
if (length(cell[,1]) != length(nucl[,1])) {
### FIXME!
warning("Unterschiedliche Dimensionen!") ## TODO: oben testen
}
}
### add suffix to cell, nucl, cyto
sC <- which(colnames(cell) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(cell)[-sC] <- paste(colnames(cell)[-sC], ".cell", sep="")
if (cytoF) {
sC <- which(colnames(cyto) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(cyto)[-sC] <- paste(colnames(cyto)[-sC], ".cyto", sep="")
}
if (nuclF) {
sC <- which(colnames(nucl) %in% c('Metadata_Well','ImageNumber', 'ObjectNumber'))
colnames(nucl)[-sC] <- paste(colnames(nucl)[-sC], ".nucl", sep="")
}
if (nuclF) {
dat_merged <- merge(cell, nucl, by=c('Metadata_Well','ImageNumber', 'ObjectNumber'))
} else {
dat_merged <- cell
}
if (cytoF) {
dat_merged <- merge(dat_merged, cyto, by=c('Metadata_Well','ImageNumber', 'ObjectNumber'))
}
## Change A01 -> A1
if (normalizeWells) {
charWell <- substr(dat_merged$Metadata_Well, 1, 1)
numWell <- as.numeric(substr(dat_merged$Metadata_Well, 2, nchar(as.character(dat_merged$Metadata_Well))))
total <- paste(charWell, numWell, sep="")
dat_merged$Metadata_Well <- as.character(total)
}
dat_merged$TREATMENT <- treat$TREAT_CMPL[match(dat_merged$Metadata_Well, treat[,wellVar])]
dat_merged$PLATTE <- j
dat_merged$VERSUCH <- i
dat_merged$WELL <- dat_merged$Metadata_Well
dtAll[[length(dtAll)+1]] <- dat_merged
}
}
return(do.call(rbind, dtAll))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.