Nothing
#### Read in Colonyzer output and return a rod.read-like object ####
# This function essentially replaces the database functionalities of ROD and is therefore quite slow.
# I recommend that you run it once, and write the returned table to file for future use.
# It is *extremely* fast and flexible compared to using ROD itself however...
####
colonyzer.read<-function(path=".",files=c(),experiment="ExptDescription.txt",ORF2gene="",libraries="LibraryDescriptions.csv",screenID=""){
# Are we reading all files in a folder?
if (length(strsplit(path,".")[[1]])>1){pathT=TRUE}else{pathT=FALSE}
# If no Colonyzer files specified, use all .dat in working directory
if (length(files)==0){
fs<-list.files(path=path,pattern="\\.out$")
# For historical datasets, there are no .out files, only headerless .dat files
if(length(fs)==0) fs<-list.files(path=path,pattern="\\.dat$")
} else {fs<-files}
# If we're using a path, then add path to filenames
if (pathT==TRUE) fs<-paste(path,fs,sep="/")
print("List of data files to read:")
print(fs)
# Open the library descriptions
libs=read.delim(libraries,sep="\t",header=TRUE,stringsAsFactors=FALSE)
libs$ORF=toupper(libs$ORF)
# What library are we using? e.g. library="AndrewsOE_v2" library="SDL_v2"
liblist=unique(libs$Library)
NLIB=length(liblist)
libdict=1:NLIB
names(libdict)=liblist
#libs=libs[libs$Library==library,]
# Make an array for storing info about spots
NROW=max(libs$Row,na.rm=TRUE)
NCOL=max(libs$Column,na.rm=TRUE)
NPLATE=max(libs$Plate,na.rm=TRUE)
SPOTARRAY=array("missing",dim=c(NLIB,NPLATE,NROW,NCOL))
# Fill the spot array object
for (x in 1:length(libs$ORF)) SPOTARRAY[libdict[[libs[x,"Library"]]],libs[x,"Plate"],libs[x,"Row"],libs[x,"Column"]]=libs[x,"ORF"]
getORF<-function(lib,plate,row,col) SPOTARRAY[libdict[[lib]],plate,row,col]
# Open the experimental description
expt=read.delim(experiment,sep="\t",header=TRUE,stringsAsFactors=FALSE)
# Watch the mismatch between library names (e.g. BooneSDLV2 and SDL_v2)
# barcode-> plate dictionaries for all treatments and repeats
barcStart=expt$Start.Time
names(barcStart)=expt$Barcode
barcTreat=expt$Treatment
names(barcTreat)=expt$Barcode
barcMed=expt$Medium
names(barcMed)=expt$Barcode
barcScreen=expt$Screen
names(barcScreen)=expt$Barcode
if(!"RepQuad"%in%colnames(expt)) stop("Woah there! We need a RepQuad column in expt. description file")
barcQuad=expt$RepQuad
names(barcQuad)=expt$Barcode
barcPlate=expt$Plate
names(barcPlate)=expt$Barcode
barcLib=expt$Library
names(barcLib)=expt$Barcode
if("Client"%in%colnames(expt)){
barcClient=expt$Client
names(barcClient)=expt$Barcode
}
if("ExptDate"%in%colnames(expt)){
barcExptDate=expt$ExptDate
names(barcExptDate)=expt$Barcode
}
if("User"%in%colnames(expt)){
barcUser=expt$User
names(barcUser)=expt$Barcode
}
if("PI"%in%colnames(expt)){
barcPI=expt$PI
names(barcPI)=expt$Barcode
}
if("Condition"%in%colnames(expt)){
barcCond=expt$Condition
names(barcCond)=expt$Barcode
}
if("Inoc"%in%colnames(expt)){
barcInoc=expt$Inoc
names(barcInoc)=expt$Barcode
}
# Open the ORF2GENE file
orf2gene=read.delim(ORF2gene,sep="\t",header=FALSE,stringsAsFactors=FALSE)
colnames(orf2gene)=c("orf","gene")
orf2gene$orf=toupper(orf2gene$orf)
# Add a "missing" row
orf2gene=rbind(orf2gene,c("missing","missing"))
orf2gene=rbind(orf2gene,c("MISSING","MISSING"))
# Create an ORF2Gene dictionary
getGene=orf2gene$gene
names(getGene)=orf2gene$orf
#colnames(iman)=c("FILENAME","ROW","COLUMN","TOPLEFTX","TOPLEFTY","WHITEAREA","TRIMMED","THRESHOLD","INTENSITY","EDGEPIXELS","COLR","COLG","COLB","BKR","BKG","BKB","EDGELEN","XDIM","YDIM")
# ROD-like columns
#colNames=c("Image.Name","Row","Col","X.Offset","Y.Offset","Area","Trimmed","Threshold","Intensity","Edge.Pixels","Colony.Color.R","Colony.Color.G","Colony.Color.B","Background.Color.R","Background.Color.G","Background.Color.B","Edge.length","Tile.Dimensions.X","Tile.Dimensions.Y")
#colClasses=c("character",rep("numeric",length(colNames)-1))
# Read in the image analysis output
# Have to define colClasses here since from R3.1-10, automatic conversion for string representation of numbers
if(grepl("\\.out",fs[1])){hasHeader=TRUE}else{hasHeader=FALSE}
if("data.table" %in% rownames(installed.packages())){
library(data.table) # Faster version of read.delim...
iman=do.call(rbind, lapply(fs, data.table::fread,header=hasHeader,sep="\t",stringsAsFactors=FALSE))
iman=data.frame(iman)
}else{
iman=do.call(rbind, lapply(fs, read.delim,header=hasHeader,sep="\t",stringsAsFactors=FALSE))
}
if(!hasHeader){
colnames(iman)=c("Filename","Row","Column","X.Offset","Y.Offset","Area","Trimmed","Threshold","Intensity","Edge.Pixels","redMean","greenMean","blueMean","redMeanBack","greenMeanBack","blueMeanBack","Edge.Length","Tile.Dimensions.X","Tile.Dimensions.Y")
iman$x=iman$X.Offset+iman$Tile.Dimensions.X/2.0
iman$y=iman$Y.Offset+iman$Tile.Dimensions.Y/2.0
iman$Diameter=(iman$Tile.Dimensions.X+iman$Tile.Dimensions.Y)/2.0
if(max(iman$Intensity)>1.0) iman$Intensity=iman$Intensity/(iman$Tile.Dimensions.X*iman$Tile.Dimensions.Y*255)
if(max(iman$Trimmed)>1.0) iman$Trimmed=iman$Trimmed/(iman$Tile.Dimensions.X*iman$Tile.Dimensions.Y*255)
}
# Sometimes users include multiple copies of the same image analysis files (e.g. in concatenated collection & separately)
#iman=unique(iman) # This operation is quite time-consuming for large data.frames...
# Create extra columns
if(nchar(iman$Filename[1])==31){
if(!"Barcode"%in%colnames(iman)) iman$Barcode=substr(iman$Filename,1,11)
iman$Date.Time=substr(iman$Filename,13,31)
}else{
if(!"Barcode"%in%colnames(iman)) iman$Barcode=substr(iman$Filename,1,15)
iman$Date.Time=substr(iman$Filename,17,35)
}
# Dump any images which are not in the experimental description file
iman=iman[iman$Barcode%in%expt$Barcode,]
smalliman=iman[(iman$Row==1)&(iman$Column==1),]
# Make sure that there are not multiple copies of files present
counts=c(table(smalliman$Filename))
probs=counts[counts>1]
if(length(probs)>0){
print("ERROR: The following images were found in multiple locations")
print(names(probs))
print("You should probably look up their locations in the relevant .json file, decide which location is correct and delete the files at other locations before reading data in again")
return(NULL)
}
fmt="%Y-%m-%d_%H-%M-%S"
# Create a dictionary for filename->photo number
getPhotoNum<-function(filename,fmt="%Y-%m-%d_%H-%M-%S"){
# Get plate name from filename
#nlst=strsplit(filename,"_")[[1]]
#lenlst=length(nlst)
#platename=paste(nlst[1:(lenlst-2)],collapse="_")
lenFname=nchar(filename)
lenDate=nchar(format(Sys.time(), fmt))
platename=substring(filename,1,lenFname-lenDate-1)
# Filter iman data frame by filename
#tmp=na.omit(smalliman[(smalliman$Barcode==platename),])
tmp=smalliman[(smalliman$Barcode==platename),]
tmp=tmp[order(tmp$Filename),]
tmp$PhotoNum=1:length(tmp$Filename)
return(as.numeric(tmp$PhotoNum[tmp$Filename==filename]))
}
fnames=unique(iman$Filename)
photoNum=sapply(fnames,getPhotoNum,fmt)
names(photoNum)=fnames
iman$Inoc.Time=barcStart[iman$Barcode]
iman$Treatments=barcTreat[iman$Barcode]
iman$Medium=barcMed[iman$Barcode]
iman$Screen.Name=barcScreen[iman$Barcode]
iman$RepQuad=barcQuad[iman$Barcode]
iman$MasterPlate.Number=barcPlate[iman$Barcode]
iman$Timeseries.order=as.numeric(photoNum[iman$Filename])
iman$Library.Name=barcLib[iman$Barcode]
iman$ORF=mapply(getORF, iman$Library.Name, iman$MasterPlate.Number, iman$Row, iman$Column)
iman$Gene=getGene[toupper(iman$ORF)]
iman$ScreenID=rep(screenID,length(iman$Filename))
if("Client"%in%colnames(expt)){iman$Client=barcClient[iman$Barcode]}
if("ExptDate"%in%colnames(expt)){iman$ExptDate=barcExptDate[iman$Barcode]}
if("User"%in%colnames(expt)){iman$User=barcUser[iman$Barcode]}
if("PI"%in%colnames(expt)){iman$PI=barcPI[iman$Barcode]}
if("Condition"%in%colnames(expt)){iman$Condition=barcCond[iman$Barcode]}
if("Inoc"%in%colnames(expt)){iman$Inoc=barcInoc[iman$Barcode]}
t0<-as.POSIXlt(as.character(iman$Inoc.Time),format=fmt)
t1<-as.POSIXlt(as.character(iman$Date.Time),format=fmt)
iman$Expt.Time=as.numeric(difftime(t1,t0,units="days"))
# Print checks so people are sure they're using the correct data #
print(paste("Number of Barcodes :",length(unique(iman$Barcode))))
print(paste("Number of Plate Photos :",length(unique(iman$Filename))))
print(paste("Number of ORFs :",length(unique(iman$ORF))))
print(paste("Number of Culture Images :",length(iman$Filename)))
print("Treatments :")
print(unique(iman$Treatments))
print("Media:")
print(unique(iman$Medium))
print("Screens:")
print(unique(iman$Screen.Name))
if("Client"%in%colnames(iman)){print("Client :"); print(unique(iman$Client))}
if("ExptDate"%in%colnames(iman)){print("Experiment date :"); print(unique(iman$ExptDate))}
if("User"%in%colnames(iman)){print("User :"); print(unique(iman$User))}
if("PI"%in%colnames(iman)){print("PI :"); print(unique(iman$PI))}
if("Condition"%in%colnames(iman)){print("Condition :"); print(unique(iman$Condition))}
if("Inoc"%in%colnames(iman)){print("Inoculation type :"); print(unique(iman$Inoc))}
platesize<-max(as.numeric(iman$Row))*max(as.numeric(iman$Column))
if (length(iman$Date.Time)%%platesize!=0){
warning("Number of cultures not multiple of plate size")}
print("Inoculation DateTimes:")
print(unique(iman$Inoc.Time))
return(iman)
}
# For testing older code, we can write a synthetic ROD object to file using this function.
# Hopefully this will not be required.
rod.write<-function(iman,outf){
# This is how we can construct a ROD file from the returned object...
ROD=data.frame(
Barcode=iman$Barcode,
Area=iman$Area,
SpotRow=iman$Row,
TrimmedArea=iman$Trimmed,
SpotColumn=iman$Column,
Intensity=iman$Intensity,
EdgePixels=iman$Edge.Pixels,
Threshold=iman$Threshold,
XOffset=iman$X.Offset,
YOffset=iman$Y.Offset,
Treatments=iman$Treatments,
Medium=iman$Medium,
ImageName=iman$Image.Name,
ORFName=iman$ORF,
DateOfImage=iman$Date.Time,
ScreenName=iman$Screen,
LibraryName=iman$Library,
MasterPlateNumber=iman$MasterPlate.Number,
TimeseriesOrder=iman$Timeseries.order,
ColonyColorR=iman$Colony.Color.R,
ColonyColorG=iman$Colony.Color.G,
ColonyColorB=iman$Colony.Color.B,
BackgroundColorR=iman$Background.Color.R,
BackgroundColorG=iman$Background.Color.G,
BackgroundColorB=iman$Background.Color.B,
Edgelength=iman$Edge.length,
TileDimensionsX=iman$Tile.Dimensions.X,
TileDimensionsY=iman$Tile.Dimensions.Y
)
# Write table as rod output
write(file=outf,"ROD Export Version 5")
write(file=outf,append=TRUE,"Barcode\tArea\tSpot Row\tTrimmed Area\tSpot Column\tIntensity\tEdge Pixels\tThreshold\tX Offset\tY Offset\tTreatments\tMedium\tImage Name\tORF Name\tDate of Image\tScreen Name\tLibrary Name\tMasterPlate Number\tTimeseries order\tColony Color R\tColony Color G\tColony Color B\tBackground Color R\tBackground Color G\tBackground Color B\tEdge length\tTile Dimensions X\tTile Dimensions Y")
write.table(ROD,file=outf,append=TRUE,quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.