R/ELISAtools_IO.R
In ELISAtools: ELISA Data Analysis with Batch Correction

Documented in loadData loadDB read.annotation read.annotations read.plate read.plates saveDataText saveDB

###this is module to take care of input/output for the ELISAtools project
#---- by Feng 07/2018
###############
####import the stringi to take care of locale, mainly in mac 
#'@import stringi
#'@import utils

#'@include ELISAplate.R

#S3 method to quickly annotate the plate with sample ids
#return a 96 well plate with ids. user need to input the sample Id
#but std always be std1 std2 std3
#'@title S3 method to annotate ELISA plate
#'@description to write annotations for an ELISA plate as
#'    an input to guide the functions to read OD values
#' @details Based on the input to quickly write the annotations
#'  for ELISA plate. The output is in a 96-well format and 
#'	will be used to giude the reading of OD plates. This way
#'	only a nxm dataframe can be used. To write non-regular
#'	annotation, you have to do it mannually. 
#'
#'	@param sample.id character vector to specify the names/ids of
#'		the samples on the plate. Note, standard/calibration sample
#'		ids/names is fixed to be "s1","s2", etc, which are specified
#'		by the software and users don't need to privide.
#'	@param sample.prefix characters will be added to the beginning of sample names 
#'	@param sample.suffix characters will be added to the end of sample names
#'	@param num.sample numeric number of samples to write
#'	@param num.std numeric number of standards
#'	@param byRow.sample boolean indicate whether to write sample names 
#'		horizontally by row (TURE) or vertically by column (FALSE)
#'	@param byRow.replicates boolean indicate whether to write sample replicates 
#'		horizontally by row (TURE) or vertically by column (FALSE)
#'	@param replicates.sample numeric number of replicates for each sample
#'	@param replicates.std numeric number of replicates for each standards
#'	@param rows numeric vector to specify which rows to be included in the annotation
#'	@param columns numeric vector to specify which columns 
#'		to be included in the annotation
#'	@param std.first boolean to indicate whether to write standards first or
#'		the samples first.
#' @return a dataframe holding the annotations for the plate.
#'
# #'@examples
# #'
# #'sample.id<-c(1:24)
# #'sample.prefix<-"sam"
# #'sample.suffix<-"_d"
# #'num.sample<-length(sample.id)
# # 'replicates.sample<-2
# # 'replicates.std<-2
# #'num.std<-6
# #'byRow.sample=FALSE;
# #'rows<-c(3:8)
# #'cols<-c(3:12)
# #'ann<-annotate.plate(sample.id=sample.id, sample.prefix=sample.prefix, sample.suffix=sample.suffix,
# #'		num.sample=num.sample, byRow.sample=F, byRow.replicates=T, num.std=num.std,
# #'		rows<-rows, cols<-cols,
# #'		replicates.sample=replicates.sample, replicates.std=replicates.std
# #'		)
# #'
# #'write.table(ann, file=file.path(tempdir(),"annote.txt"), sep="\t", row.names=T,
# #'		col.names=TRUE)
# #' @seealso  \code{\link{SensorgramData-class}} \code{\link{plot}} \code{\link{SaveSPRData}}
# # @export
annotate.plate<-function (sample.id, sample.prefix, sample.suffix,
		num.sample,num.std=8,
		byRow.sample=TRUE, 
		byRow.replicates=TRUE,
		replicates.sample=3, replicates.std=3, 
		rows, columns,std.first=TRUE
		)
{	
	#check the data integraty
	#first see number matches?
	if(missing(sample.id))
	{
		stop("***Error: missing sample id!!");
	}
	len.sample<-length(sample.id);
	row.size<-12; #how many wells a row has?not a total number of rows each plate has
	column.size<-8
	rows.all<-c(1:column.size);
	columns.all<-c(1:row.size);
	if(missing(rows))
	{
		rows<-rows.all;
	} 
	
	if(missing(columns))
	{
		columns<-columns.all;
	}
	num.well<-length(rows)*length(columns);
	#cat("num.well:",num.well,"\n")
	#cat("num fo sample:",num.sample*replicates.sample+num.std*replicates.std,"\n");
	#cat("num std:", num.std,"\n");
	if(num.well!=(num.sample*replicates.sample+num.std*replicates.std))
	{
		stop("Error:sample number and standard number do not equal to \n\tnumber of wells on the plate, please check!!")
	}
	#now populate the plate data frame
	#row.index<-rows
	row.name<-c("A","B","C", "D", "E","F", "G","H")
	row.name<-row.name[rows]
	#column.index<-c(1:row.size)
	#column.index<-column.index[-exclude.column]
	col.name<-c("1","2","3", "4", "5","6", "7","8", "9","10","11","12")
	col.name<-col.name[columns]
	
	#prepare the sample annotations
	
	if(!missing(sample.prefix))
	{
		sample.id<-paste0(sample.prefix, sample.id)
	}
	if(!missing(sample.suffix))
	{
		sample.id<-paste0( sample.id, sample.suffix)
	}
	#add in std 
	std.id<-paste0("s",c(1:num.std))
	#byRow.replicates<-match.arg(byRow.replicates)
	#if(byRow.replicates){
	#	#horizontally
	#	std.id<-rep(std.id,rep(replicates.std,num.std))
	#	sample.id<-rep(sample.id,rep(replicates.sample, length(sample.id)))
	#}
	#sample.ids<-c(std.id,sample.id)
	#std.first<-match.arg(std.first)
	#if(!std.first){
	#	sample.ids<-c(sample.id, std.id)
	#}
	plate<-NULL
	#byRow.sample<-match.arg(byRow.sample)
	if(byRow.sample){
		if(byRow.replicates){
			#make the sample id & std id ready by repeating and concatenating them
			std.id<-rep(std.id,rep(replicates.std,num.std))
			sample.id<-rep(sample.id,rep(replicates.sample, length(sample.id)))
			sample.ids<-c(std.id,sample.id)
			#std.first<-match.arg(std.first)
			if(!std.first){
				sample.ids<-c(sample.id, std.id)
			}
			#building the data.frame
			for(i in 1:length(rows))
			{
				plate<-rbind(plate,sample.ids[c(1:length(columns))+(i-1)*length(columns)])
			}
		} else { #in this case, we also need to replicate elements first, but vertically. 
			##luckily in this case, the repeats and well number are identical.
			
			#this case is bit difficult, we need to check more
			if((replicates.std!=replicates.sample)&&(num.std%%length(columns)!=0)&&(num.sample%%length(columns)!=0))
			{
				stop("ERROR: incompatible format: 1)non-equal replicate number between standard and sample;2)std not cover the whole row.");
			}
			#index<-1;
			rep.num<-0;
			sample.ids<-c(std.id,sample.id)
			if(!std.first){
				sample.ids<-c(sample.id, std.id)
			}
			if(replicates.std==replicates.sample)  ##replicates numbers are identical, we just go ahead to replicate rows
			{
				rep.num<-rep(replicates.sample, length(sample.ids)/length(columns))
			} else { #in this case, replicates between std and sample are not equal. tricky, we need to do more to figure out	
					#good things though, num.std must be a multiple of the row numbers
				#figure out rep number for each row
				rep.num<-c(rep(replicates.std,num.std/length(columns)),
						rep(replicates.sample, num.sample/length(columns)));
				if(!std.first)
				{
					rep.num<-c(rep(replicates.sample, num.sample/length(columns)),
						rep(replicates.std,num.std/length(columns)));
				}
			} #done.
						
			for(i in 1:length(rep.num))
			{
				for(j in c(1:rep.num[i]))
				{
					plate<-rbind(plate,sample.ids[c(1:length(columns))+(i-1)*length(columns)])
				}
			}
		}
	} else {  ###vertically fill the sample
		if(byRow.replicates){ ##this is a harder one, we need first to check for consistency, this is redundant 
			#this case is bit difficult, we need to check more
			if((replicates.std!=replicates.sample)&&(num.std%%length(rows)!=0)&&(num.sample%%length(rows)!=0))
			{
				stop("ERROR: incompatible format: 1)non-equal replicate number between standard and sample;2)std not cover the whole column.");
			}
			rep.num<-0;
			sample.ids<-c(std.id,sample.id)
			if(!std.first){
				sample.ids<-c(sample.id, std.id)
			}
			if(replicates.std==replicates.sample)
			{
				rep.num<-rep(replicates.sample, length(sample.ids)/length(rows));
			} else { #again this is hard, but the good thing is that we have the number of std is a multiply of column length
			#figure out rep number for each row
				rep.num<-c(rep(replicates.std,num.std/length(rows)),
						rep(replicates.sample, num.sample/length(rows)));
				if(!std.first)
				{
					rep.num<-c(rep(replicates.sample, num.sample/length(rows)),
						rep(replicates.std,num.std/length(rows)));
				}
			}
			for(i in 1:length(rep.num))
			{
				for(j in c(1:rep.num[i]))
				{
					plate<-cbind(plate,sample.ids[c(1:length(rows))+(i-1)*length(rows)]);
				}
			}
		} else { #this should be easy???
			#make the sample id & std id ready by repeating and concatenating them
			std.id<-rep(std.id,rep(replicates.std,num.std))
			sample.id<-rep(sample.id,rep(replicates.sample, length(sample.id)))
			sample.ids<-c(std.id,sample.id)
			#std.first<-match.arg(std.first)
			if(!std.first){
				sample.ids<-c(sample.id, std.id)
			}
			#building the data.frame
			for(i in 1:length(columns))
			{
				plate<-cbind(plate,sample.ids[c(1:length(rows))+(i-1)*length(rows)])
			}
		}
		
		
	} #end of filling sample byRow or not
	
	rownames(plate)<-row.name;
	colnames(plate)<-col.name;
	
	return(plate);
}

#reading the annotation for the plate
#this will give us the information about sample identities
#'@title Read the annotation of single ELISA plate
#'@description Parse the annotations for one single ELISA plate from
#'    a section of a file and output the annotations for standard and unknown
#'		separately.
#' @details The annotation file may contain annotations for more than
#'		one plate. Each plate is marked by "Plate: plate 1..." and 
#'		"~End". This function is fed in with the content for each section
#'		and we do actually parsing in here. Store the annotations into 
#'		data frame. It also parse the standard concentration and 
#'		include this information in the data frame.
#'		For each section,we expect
#'		the following format\cr
#'	\tabular{llllllll}{
#'    \tab	1\tab	2\tab		3\tab		4\tab 			...\cr	
#'   C\tab	s1\tab	s1\tab		sample1\tab		sample1\tab	...\cr
#'   D\tab	s2\tab	s2\tab		sample2\tab		sample\tab  ...\cr	
#'	...\tab	...\tab	...\tab		...\tab		...\tab		...\cr
#'	}
#'		In addition, the row name and column names indicate the
#'		the plate row and column indices.
#'		As input, the stardard and unknown are returned separately in two 
#'		tables.
#'	@param annotation characters to specify the path and name of the annotation file
#'	@param std.conc data.frame containing standard concentration data. 
#'		Only first two columns are used with first one to be the standard IDs
#'		and second the concentrations.
# #'	@param file.dir file path to the annotatoin file.
#' @return a list of data.frames holding the annotations for the plate.
#'
#'@examples
#'#get example annotation file path from the system folder
#' fileName<-system.file("extdata", "annote_single.txt", package="ELISAtools")
#'#prepare the standard concentration file.
#' std.conc<-data.frame(id=c("s1","s2","s3","s4","s5","s6"), conc=c(1:6))

#'#read the data as a data frame.
#' ann<-read.table(fileName, header=TRUE,  sep="\t", stringsAsFactors=FALSE)
#'
#'#call to do the reading.
# #' annotation<-read.annotation(ann,  std.conc)
# #' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} 
# #' @export
read.annotation<-function(annotation,  std.conc)
{
	#check the data integraty 
	if(missing(annotation))
	{
		stop("ERROR: no annotation input specified");
	}
	if(!is.data.frame(annotation))
	{
		stop("ERROR:  annotation input is not in a correct format");
	}
#	if(!file.exists(annotation))
#	{
#		stop("ERROR: annotation does not exist, please check.");
#	}
#	if(missing(file.dir))
#	{
#		file.dir<-file.path("./")
#	}
#	ann<-read.table(file.path(file.dir, annotation), header=T, sep="\t", stringsAsFactors=F);
	ann<-annotation
	rows<-rownames(ann);
	#rows<-as.numeric(rows)
	rows.ind<-c(1:8)
	names(rows.ind)<-c("A","B","C","D","E","F","G","H")
	if(!all(is.element(rows, names(rows.ind))))
	{
		stop("ERROR:the row index is not correct set, please check!")
	}
	rows<-rows.ind[rows]
	cols<-colnames(ann)
	cols<-sub("^[A-Z]+", "", cols,ignore.case=T)
	cols<-as.numeric(cols)
	if(!all(cols<=12 & cols>=1))
	{
		stop("ERROR:the col index is not correct set, please check!")
	}

	#if(missing(rows))
	#{
	#	rows<-c(1:8)
	#}
	#if(missing(cols))
	#{
	#	cols<-c(1:12)
	#}
	
	if(dim(ann)[1]>8||dim(ann)[2]>12)
	{
		stop("ERROR: annotation data frame is not in a correct format.");
	}
	
	if(missing(std.conc))
	{
		stop("ERROR:missing standard concentration data, please specify")
	}
	if(!is.data.frame(std.conc))
	{
		stop("ERROR:std.conc should be a data frame");
	}
	if(dim(std.conc)[2]!=2)
	{
		warning("There are more columns in standard concentration data table, only first two are used",
			immediate.=TRUE);
	}
		
	#start parsing
	#sample.ids<-(as.vector(ann))
	#std.idx<-grep("^s[1-9]+$",sample.ids)
	#std.ids<-(sample.ids[std.idx])
	#sample.ids<-(sample.ids[-std.idx])
	
	annotations.std<-NULL #data.frame(ID=std.ids,row=0,col=0,conc=0)
	annotations.unknown<-NULL #data.frame(ID=std.ids,row=0,col=0)
	
	#parse and read row&col
	#std first
	count.std<-1
	count.unknown<-1
	for(i in 1:dim(ann)[1]) #row
	{
		row<-rows[i];
		#cat("i:",i,";\n")
		for(j in 1:dim(ann)[2]) #col
		{
			col<-cols[j];
			#cat("\tj:",j,";")
			#"NA" 
			if(is.na(ann[i,j]))
			{
				next;
			}
			#white space
			if(length(grep("^\\s*$",ann[i,j]))==1)
			{
				next;
			}
			if(length(grep("^masked|mask$",ann[i,j],ignore.case=T))==1)
			{
				next;
			}
			if(length(grep("^s[1-9]+$",ann[i, j]))==1)
			{
				#cat("\t in std\n");
				if(count.std==1)
				{
					annotations.std<-data.frame(ID=ann[i,j],row=row,col=col,conc=0, stringsAsFactors=F,row.names=NULL)
				} else {
					annotations.std[count.std,1]<-ann[i,j];#<-rbind(annotations.std,data.frame(ID=ann[i,j], row;
					annotations.std[count.std,2]<-row;
					annotations.std[count.std,3]<-col;
				}
				#look up the concentration
				ind<-which(std.conc[,1]==ann[i,j])
				if(length(ind)==0)
				{
					stop("ERROR:can not find the concentration for the std, please check!!")
				}
				if(length(ind)>1)
				{
					warning("more than one concentration specified for std sample, only the first one is used!!") 
				}
				
				annotations.std[count.std,4]<-std.conc[ind[1],2]
				count.std<-count.std+1;
			} else { #sample
				#cat("\tNOT\n")
				if(count.unknown ==1)
				{
					annotations.unknown<-data.frame(ID=ann[i,j],row=row, col=col, stringsAsFactors=F,row.names=NULL)
				} else {
					annotations.unknown[count.unknown,1]<-ann[i,j];
					annotations.unknown[count.unknown,2]<-row;
					annotations.unknown[count.unknown,3]<-col;
				}
					
				count.unknown<-count.unknown+1;
			}
		} 
	}
	if(is.null(annotations.unknown)){
		annotations.unknown<-data.frame();
	}else{
		
		class(annotations.unknown$col)<-"integer"; 
	}
	class(annotations.std$col)<-"integer";
	list(standards=annotations.std, unknowns=annotations.unknown)
}
#
#reading the annotations for plates
#this will give us the information about sample identities
#'@title Read the annotations of plates 
#'@description Parse annotations for multiple ELISA plates from
#'   	files, one annotation file and one standard concentration file,
#'		and output the annotations for standard and unknown
#'		separately.
#' @details The annotation file may contain annotations for more than
#'		one plate. Each plate is marked by "Plate: plate 1..." and 
#'		"~End". This function parses each section in both annotation
#'		file and standard concentration file. Then passes the section
#'		on to do the parsing.
#'		For each section,we expect
#'		the following format\cr
#'	\tabular{llllllll}{
#'    \tab	1\tab	2\tab		3\tab		4\tab 			...\cr	
#'   C\tab	s1\tab	s1\tab		sample1\tab		sample1\tab	...\cr
#'   D\tab	s2\tab	s2\tab		sample2\tab		sample\tab  ...\cr	
#'	...\tab	...\tab	...\tab		...\tab		...\tab		...\cr
#'	}
#'	@param annotation characters to specify the path and name of the annotation file
#'	@param std.conc characters to specify the standard concentration file. 
#'		
#'	@param dir.annotation characters specifying the file to the annotatoin file.
#'	@param dir.stdConc characters specifying the path to the annotatoin file.
#'	@param num.plate numeric indicating the number of plates in the annotation
#'			files.
#' @return a list of annotations for elisa plates.
#'
#'@examples
#'#get example annotation file path from the system folder
#' ann<-system.file("extdata", "annote.txt", package="ELISAtools")
#' std.conc<-system.file("extdata", "stdConc.txt", package="ELISAtools")
#'
#'#read them in and there are 2 plates.
#'	read.annotations(annotation=ann,  std.conc=std.conc, num.plate=2)
# #' @seealso  \code{\link{elisa_run-class}} \code{\link{loadData}} 
#' @export
read.annotations<-function(annotation,  std.conc, dir.annotation, dir.stdConc,num.plate=1)
{
	#check the data integraty 
	if(missing(annotation))
	{
		stop("ERROR: no annotation input specified");
	}
	if(!missing(dir.annotation))
	{
		annotation<-file.path(dir.annotation, annotation)
	}
	
	#annotation<-file.path(dir.annotation, annotation)
	if(!file.exists(annotation))
	{
		stop("ERROR:  annotation input does not exists");
	}
	if(missing(std.conc))
	{
		stop("ERROR: no std conc input specified");
	}
	
	if(!missing(dir.stdConc))
	{
		std.conc<-file.path(dir.stdConc, std.conc);
	}
	if(!file.exists(std.conc))
	{
		cat("std.conc:",std.conc,"\n");
		stop("ERROR: annotation does not exist, please check.");
	}
	
	#read the file first
	con.ann<-file(annotation,"r");
	ann.raw<-readLines(con.ann, skipNul=T); #read all
	close(con.ann);
	con.stdConc<-file(std.conc, "r")
	stdConc.raw<-readLines(con.stdConc, skipNul=T); #read all
	close(con.stdConc);

	#first check to see whether there are enough elements
	anns<-list();
	for( i in 1:num.plate)
	{
		#find the anchors or markers for each section/plate
		ind.start<-grep("^Plate:",ann.raw)
		if(length(ind.start)<1)
		{
			warning("not enough plates found in annotation file, please check");
			cat("not enough plates found in annotation file, please check\n");
			
			break;
			
		}
		ind.start<-ind.start[1]+1;
		ind.end<-grep("^~End",ann.raw);
		if(length(ind.end)<1)
		{
			warning("not enough plates found in annotation file, please check");
			ind.end<-length(ann.raw);
		}
		ind.end<-ind.end[1]-1
		ann<-ann.raw[c(ind.start:ind.end)];
		ann.raw<-ann.raw[c((ind.end+1+1):length(ann.raw))];
		
		#doing standard
		ind.start<-grep("^Plate:", stdConc.raw);
		if(length(ind.start)<1)
		{
			warning("not enough plates found in standard conc file, please check");
			break;
		}
		ind.start<-ind.start[1]+1;
		ind.end<-grep("^~End",stdConc.raw);
		if(length(ind.end)<1)
		{
			warning("not enough plates found in standard conc file, please check");
			ind.end<-length(stdConc.raw);
		}
		ind.end<-ind.end[1]-1
		sConc<-stdConc.raw[c(ind.start:ind.end)];
		stdConc.raw<-stdConc.raw[c((ind.end+1+1):length(stdConc.raw))];
		#now we need to read the second into data frame 
		ann.df<-read.table(text=ann, sep="\t", stringsAsFactors=F, header=T, row.names=1);
		sConc.df<-read.table(text=sConc, sep="\t", stringsAsFactors=F, header=T);
		
		#feed to do parsing
		anns[[i]]<-read.annotation(annotation=ann.df, std.conc=sConc.df);
	}
	return(anns)
}

#read each individual file of ODs 
#reading the annotation for the plate
#this will give us the information about sample identities
#'@title Read the single ELISA OD plate
#'@description Read the individual ELISA plate to parse the ODs.
#'     
#'@details The input is a text file imported from the sdf file.  
#'	We only read the first section with both the OD and blank file.
#'	The OD data are read in according to the annotation file.
#'	
#'	@param ODs characters containing data of ODs for one plate
#'	@param annotation list of data containing annotations of the plate
#'	@param batchID characters specifying the batchID read from the design file
#'	@param expID characters specifying the expID or plateID read from the design file 
#' @return an object of elisa_plate holding data and annotations for 
#'		a single plate.
#'
# #'@examples
# #' #setwd( "E:\\feng\\LAB\\hg\\ELISA\\ELISAtools\\dev")
# #' std.conc<-data.frame(id=c("s1","s2","s3","s4","s5","s6"), conc=c(1:6))
# #' annotation<-"annote.txt"
##'
# #'	read.annotation(annotation,  std.conc)
# #' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} \code{\link{read.plates}}
# #' @export
read.plate<-function(ODs, annotation, batchID, expID)
{
	if(missing(ODs))
	{
		stop("Error: please specify the files of OD data")
	}
	if(!is.character(ODs))
	{
		stop("ERROR: the input OD is not in correct format!!")
	}
	if(missing(annotation))
	{
		stop("ERROR: please specify the annoation data for the plate");
	}
	if(!is.list(annotation))
	{
		stop("ERROR: the input annotation is not in correct format")
	}
	if(is.null(annotation$standards)||!is.data.frame(annotation$standards))
	{
		stop("ERROR: the input annotation is not in correct format")
	}
	if(!is.null(annotation$unknowns)&&!is.data.frame(annotation$unknowns))
	{#could be null, meaning no unknowns
		stop("ERROR: the input annotation is not in correct format")
	}
	
	###now read in the plate 
	#con<-file(fileName,"r");
	#OD.raw<-readLines(con, skipNul=T,n=100); #read in 100 lines, should be way more than enough
	#close(con);
	OD.desc<-NULL
	OD.header<-NULL
	OD.plate<-NULL
	OD.blank<-NULL
	OD.readOnce<-FALSE
	for(i in 1:length(ODs))
	{
		#cat("i:",i,"\n")
		tempStr<-ODs[i];#trimws(ODs[i], which="both");
		if(length(grep("^[ \t\r\n]*~End",tempStr))>0)
		{#we are done.
			#cat("\tbreak");
			break;
		}
		if(length(grep("^Plate", tempStr))>0)
		{
			OD.desc<-tempStr;
			next;
		}
		if(length(grep("Temperature", tempStr))>0)
		{
			OD.header<-tempStr;
			next;
		}
		#if(length(grep("^Plate", tempStr))>0)
		#{
		#	OD.desc<-tempStr;
		#	next;
		#}
		if(nchar(trimws(tempStr,which="both"))>0)
		{
			if(!OD.readOnce)
			{
				OD.plate<-tempStr;
				OD.readOnce<-TRUE;
			} else {
				OD.blank<-tempStr;
			}
			next;
		}
	}
	#now we have everything read in.
	#start parsing.
	#leave the description not parsed
	#header
	OD.header<-strsplit(OD.header, "\t")[[1]]
	OD.header<-OD.header[-c(1:2)];
	
	OD.plate<-strsplit(OD.plate, "\t")[[1]]
	temperature<-as.numeric(OD.plate[2])
	OD.plate<-as.numeric(OD.plate[-c(1:2)]);
	if(is.null(OD.blank))
	{
		OD.blank<-OD.plate;
		OD.blank[]<-0;
	}	else {
		OD.blank<-strsplit(OD.blank, "\t")[[1]]
		#temperature<-as.numeric(OD.blank[2])
		OD.blank<-as.numeric(OD.blank[-c(1:2)]);
	}
	
	#now we are done parsing, put into data frame and get ready to do output
	eplate<-elisa_plate();
	#cat("OD.header length:",length(OD.header), "\n");
	eplate<-load.ODs(eplate,plate.header= OD.header, plate.data=OD.plate, 
		plate.blank=OD.blank, annotation=annotation);
	#cat("\tdone.......\n");
	eplate@batchID<-batchID;
	eplate@expID<-expID;
	eplate@desc<-OD.desc;
	eplate@range.ODs<-c(min(eplate@data.std$OD),max(eplate@data.std$OD));
	return(eplate)
}

#'@title Read the ELISA OD files
#'@description Read the ELISA OD file to parse the ODs.
#'     
#'@details The input is a text file imported from the sdf file.  
#'	The file may contain multiple plates of OD. We will parse 
#'	each file section and then read them according to the 
#'	annotation to load the data. We assume for each file the data
#'	are for the same batch and experiment. If otherwise, please 
#'	split the file into different ones.
#'	
#'	@param fileName characters containing file name of OD data
#'	@param annotations list of data containing annotations of the plates
#'	@param batchID characters specify the batchID read from the design file
#'	@param expID characters specify the expID or plateID read from the design file
#'	@param num.plate numeric number of OD plates in the OD file. 
#'	@param date characters the date running the ELISA exps.
#' @return an object of elisa_run holding data and annotations for 
#'		one or multiple plates.
#'
#'@examples
#'#get example annotation file path from the system folder
#' ann<-system.file("extdata", "annote.txt", package="ELISAtools")
#' std.conc<-system.file("extdata", "stdConc.txt", package="ELISAtools")
#'
#'#read them in and there are 2 plates.
#'	annotations<-read.annotations(annotation=ann,  std.conc=std.conc, num.plate=2)
#'
#'#now start reading the OD plate file
#' fileName <-system.file("extdata", "Assay_3_and_4.txt", package="ELISAtools")
#'	plates<-read.plates(fileName, annotations=annotations, num.plate=2, batchID="b1", expID="e1")
# #' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} \code{\link{read.plates}}
#'
#'@export
read.plates<-function(fileName, annotations, num.plate=1, batchID, expID ,date=NA_character_)
{
	if(missing(fileName))
	{
		stop("Error: please specify the files of OD data")
	}
	if(!file.exists(fileName))
	{
		stop("ERROR: can not find the specified file!!")
	}
	if(missing(annotations))
	{
		stop("ERROR: please specify the annoation data for the plate");
	}
	if(!is.list(annotations))
	{
		stop("ERROR: the input annotation is not in correct format")
	}
	if(missing(batchID))
	{
		stop("ERROR: please specify the batch ID");
	}
		if(missing(expID))
	{
		stop("ERROR: please specify the exp ID");
	}
	#if(is.null(annotation$standards)||class(annotation$standards)!="data.frame")
	#{
	#	stop("ERROR: the input annotation is not in correct format")
	#}
	#if(is.null(annotation$unknowns)||class(annotation$unknowns)!="data.frame")
	#{
	#	stop("ERROR: the input annotation is not in correct format")
	#}
	
	###now read in the plate 
	con<-file(fileName,"r");
	OD.raw<-readLines(con, skipNul=T); #read in 100 lines, should be way more than enough
	close(con);
	OD.raw<-stri_conv(OD.raw, to="UTF-8");
	if(length(annotations)<num.plate)
	{
		stop("ERROR: the num of plate specified is less than the elements of annotatoins, please check");
	}

	eplates<-elisa_run();
	eplates@num.plates<-num.plate
	eplates@batchID<-batchID
	eplates@desc<-expID;
	eplates@date<-date;
	#OD.desc<-NULL
	#OD.header<-NULL
	#OD.plate<-NULL
	#OD.blank<-NULL
	#OD.readOnce<-FALSE
	range.ODs.min<- 100;
	range.ODs.max<- 0
	for(i in 1:num.plate)
	{
		#cat("&&&&&&&&&plate:", i, "\n")
		#find the anchors or markers for each section/plate
		ind.start<-grep("^Plate:",OD.raw)
		if(length(ind.start)<1)
		{
			warning("not enough plates found in OD data file (no starting point), please check");
			#stop("not enough plates found in OD data file (no starting point), please check");
			
			break;	
		}
		ind.start<-ind.start[1];
		ind.end<-grep("^[ \t]*~End[ \t\r\n]*$",OD.raw, ignore.case=T);
		if(length(ind.end)<1)
		{
			warning("not enough plates found in OD data file (no ending point), please check");
			#stop("not enough plates found in OD data file (no ending point), please check");
			ind.end<-length(OD.raw);
		}
		ind.end<-ind.end[ind.end>ind.start][1]
		OD<-OD.raw[c(ind.start:ind.end)];
		OD.raw<-OD.raw[c((ind.end+1):length(OD.raw))];
		
		#now feed to read plate
		eplates@plates[[i]]<-read.plate(OD, annotations[[i]], batchID,expID);
		if(eplates@plates[[i]]@range.ODs[1]<range.ODs.min)
		{
			range.ODs.min<-eplates@plates[[i]]@range.ODs[1];
		}
		if(eplates@plates[[i]]@range.ODs[2]>range.ODs.max)
		{
			range.ODs.max<-eplates@plates[[i]]@range.ODs[2]
		}
		
	}
	
	#now we are done parsing, put into data frame and get ready to do output
	#eplate<-elisa_plate();
	#eplate<-load.ODs(eplate,plate.header= OD.header, plate.data=OD.plate, plate.blank=OD.blank, annotation=ann);
	#eplate@batchID<-batchID;
	#eplate@expID<-expID;
	#eplate@desc<-OD.desc;
	eplates@range.ODs<-c(range.ODs.min, range.ODs.max)
	return(eplates)
}


#####read in 
##for annotation, we assume there is header indicating the row index,and rownames for row index correctly!!.
#'@title Read data according to the design file
#'@description Read the design file and then load the 
#'	data according to the information in the design file.
#'     
#'@details The design file contains all the information necessary
#'	to read data. It has the following format\cr
#'	\tabular{lllllllll}{
#'	ExpID\tab	FileName\tab	Batch\tab	Num_Plate\tab	Date\tab	AnnotationFile\tab	Std_Conc\tab	Dir_Annotation\tab	Dir_StdConc\cr
#'	Exp1\tab	file1.txt\tab	Batch1\tab	1\tab	9/18/2009\tab	annote.txt\tab	stdConc.txt\tab \tab \cr		
#'	Exp2\tab	file2.txt\tab	Batch2\tab	2\tab	9/18/2009\tab	annote.txt\tab	stdConc.txt\tab \tab \cr		
#'	}
#'	The return data is a list of batches (\code{\link{elisa_batch-class}}), 
#'	which are made of one or many elisa runs(\code{\link{elisa_run-class}}) 
#'	. The
#'		run could contain one or many elisa plates (\code{\link{elisa_plate-class}})
#'		with data or annotation of each plate.\cr
#'  \tabular{lllllll}{
#'	list\tab	|\tab	 \tab	 \tab	 \tab	 \tab	 \cr
#'	 \tab	|\tab	--batch1\tab	|\tab	\tab	\tab	\cr		
#'	 \tab	|\tab	 \tab	|\tab	--run1\tab	|\tab	\cr		
#'	 \tab	|\tab	 \tab	|\tab	 \tab	|\tab	--plate1\cr
#'	 \tab	|\tab	 \tab	|\tab	 \tab	|\tab	--plate2\cr
#'	 \tab	|\tab	--batch2\tab	|\tab	\tab	\tab	\cr
#'	 \tab	|\tab	 \tab	|\tab	\tab	\tab	\cr				
#'}
#'			
#'	@param  design.file characters to specify the path and the file name of the design file.
#' @return a list of batches holding different runs of elisa, which could contain
#'		one or many elisa_plates with data and annotations for 
#'		each plate.
#'
#'@examples
#' file.dir<-system.file("extdata", package="ELISAtools")
#' loadData(file.path(file.dir,"design.txt"));
#' @seealso  \code{\link{elisa_batch-class}} \code{\link{elisa_plate-class}} \code{\link{elisa_run-class}}
#'
#'@export
loadData<-function(design.file)
{
	#first check the data integrity
	if(missing(design.file))
	{
		stop("ERROR: design file is missing, please specify");
	}
#	if(missing(dir.annotation))
#	{
#		dir.annotation<-file.path(".")
#	}
#	if(missing(dir.ODs))
#	{
#		dir.ODs<-file.path(".")
#	}
#	if(missing(dir.std))
#	{
#		dir.std<-file.path(".")
#	}
	
	dfile<-read.table(design.file, header=T, sep="\t", stringsAsFactors=F);
	
	#start reading each individual plate
	#into different batch
	batch.IDs<-unique(dfile$Batch)
	batches<-list();#vector(mode="list",length=length(batch.IDs))
	dir_design<-normalizePath(dirname(design.file));
	for(i in 1:length(batch.IDs))
	{
		cat("Reading Data for Batch:",i,"--",batch.IDs[i],"\n")
		ind<-which(dfile$Batch==batch.IDs[i])
		ebatch<-elisa_batch();
		ebatch@batchID<-batch.IDs[i];
		ebatch@num.runs<-length(ind);
		range.ODs.min<-1000;
		range.ODs.max<-0;
		for(j in 1:length(ind))
		{
			cat("\tExperiment:",j,"--", dfile[ind[j],]$ExpID,"\n")
			flush.console();
			
			dir_ann<-dir_design;
			
			if(!is.na(dfile[ind[j],]$Dir_Annotation))
			{
				dir_ann<-normalizePath(dfile[ind[j],]$Dir_Annotation);
			}
			if(dirname(dfile[ind[j],]$AnnotationFile)!=".")
			{
				dir_ann<-normalizePath(dirname(dfile[ind[j],]$AnnotationFile));
			}			
			dir_sc<-dir_design
			
			if(!is.na(dfile[ind[j],]$Dir_StdConc))
			{
				dir_sc<-normalizePath(dfile[ind[j],]$Dir_StdConc);
			}
			if(dirname(dfile[ind[j],]$Std_Conc)!=".")
			{
				dir_sc<-normalizePath(dirname(dfile[ind[j],]$Std_Conc));
			}
			#need to read annotation
			annotations<-read.annotations(annotation=dfile[ind[j],]$AnnotationFile, 
					std.conc=dfile[ind[j],]$Std_Conc, num.plate=dfile[ind[j],]$Num_Plate,
					dir.annotation=dir_ann, 
					dir.stdConc=dir_sc);
			    
			ebatch@runs[[j]]<-read.plates(fileName=file.path(dir_design,dfile[ind[j],]$FileName), 
					annotations=annotations, 
					batchID=dfile[ind[j], ]$Batch, expID=dfile[ind[j],]$ExpID,
					num.plate=dfile[ind[j],]$Num_Plate,date=dfile[ind[j],]$Date
					)
			#cat("in here")
			#ebatch@runs[[j]]@date<-
			if(ebatch@runs[[j]]@range.ODs[1]<range.ODs.min)
			{
				range.ODs.min<-ebatch@runs[[j]]@range.ODs[1];
			}
			if(ebatch@runs[[j]]@range.ODs[2]>range.ODs.max)
			{
				range.ODs.max<-ebatch@runs[[j]]@range.ODs[2]
			}
		}
		ebatch@range.ODs<-c(range.ODs.min, range.ODs.max);
		batches[[ batch.IDs[i] ]]<-ebatch;
	}
	cat("Done!!!\n")
	return(batches)
}

#functions to 
#'@title Read the saved elisa_batch data
#'@description Load the serialized elisa_batch data from disk.
#'     
#'@details Here we deserialize elisa_batch data by wrapping the readRds()
#'		function call.
#'		The serialized elisa_batch data are assumed to have been correctly 
#'		analyzed. We will print a summary for what has been read. 
#'@param  db characters to specify the path and file name the elisa data file.
#' @return a list of batches holding different runs of elisa, which could contain
#'		one or many elisa_plates with data and annotations for 
#'		each plate.
#'
#'@examples
#'#R code to run 5-parameter logistic regression on ELISA data
#'#load the library
#'library(ELISAtools)
#'
#'#get file folder
#'dir_file<-system.file("extdata", package="ELISAtools")
#'
#'batches<-loadData(file.path(dir_file,"design.txt"))
#'
#'#make a guess for the parameters, the other two parameters a and d 
#'#will be estimated based on data.
#'model<-"5pl"
#'pars<-c(7.2,0.5, 0.015) #5pl inits
#'names(pars)<-c("xmid", "scal", "g")
#'
#'
#'#do fitting. model will be written into data set.
#'batches<-runFit(pars=pars,  batches=batches, refBatch.ID=1, model=model  )
#'
#'#now call to do predications based on the model.
#'batches<-predictAll(batches);
#'
#'#now saving the data.
#'saveDB(batches, file.path(tempdir(),"elisa_tool1.rds"));
#'
#' loadDB(file.path(tempdir(),"elisa_tool1.rds"));
#' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} \code{\link{saveDB}}
#'
#'@export
loadDB<-function(db)
{
	if(missing(db))
	{
		stop("ERROR:please specify the input");
	}
	if(!file.exists(db))
	{
		if(substr(db,nchar(db)-3, nchar(db))!=".rds")
		{
			db<-paste0(db,".rds")
		}
		if(!file.exists(db))
		{
			stop("ERROR:Can not find the specified ELISA database, Please specify!!")
		}
	}
	cat("  ***loading ELISA data set: ",db,"\n")
	ret<-readRDS(db);
	cat("  ***Success!!\n");
	return(ret)
}

#function to serialize the elisa_batch data 

#functions to 
#'@title Save the elisa_batch data
#'@description Serialize elisa_batch data to disk.
#'     
#'@details We serialize elisa_batch data by wrapping the saveRds()
#'		function call.
#'		The serialized elisa_batch data are assumed to have been correctly 
#'		analyzed. We will print a summary for what has been saved. 
#'@param  db character the file name specifying name of the db.
#'@param  batches list of elisa batch data to be serialized.
#'
#'
#'@examples
#'#R code to run 5-parameter logistic regression on ELISA data
#'#load the library
#'library(ELISAtools)
#'
#'#get file folder
#'dir_file<-system.file("extdata", package="ELISAtools")
#'
#'batches<-loadData(file.path(dir_file,"design.txt"))
#'
#'#make a guess for the parameters, the other two parameters a and d 
#'#will be estimated based on data.
#'model<-"5pl"
#'pars<-c(7.2,0.5, 0.015) #5pl inits
#'names(pars)<-c("xmid", "scal", "g")
#'
#'
#'#do fitting. model will be written into data set.
#'batches<-runFit(pars=pars,  batches=batches, refBatch.ID=1, model=model  )
#'
#'#now call to do predications based on the model.
#'batches<-predictAll(batches);
#'
#'#now saving the data.
#'saveDB(batches, file.path(tempdir(),"elisa_tool1.rds"));
#'
#' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} \code{\link{saveDB}}
#'
#'@export
saveDB<-function(batches, db)
{
	if(missing(batches)||missing(db))
	{
		stop("one or both of the input missing, please check!!") 
	}
	if(substr(db,nchar(db)-3, nchar(db))!=".rds")
	{
		db<-paste0(db,".rds")
	}
	if(file.exists(db))
	{
		cat("the specified database file exists, and will be overwritten");
	}
	cat("  ***saving ELISA data set: ",db,"\n")
	ret<-saveRDS(batches, db);
	cat("  ***success!!\n");
	return(ret);
}	

#functions to 
#'@title Save elisa_batch analysis results
#'@description Save the data analysis results to disk in text format.
#'     
#'@details The results are written to disk in the text format (tab-delimited) and is 
#'	easy to be used for other analysis. 
#'@param  file.name character specifying name of the output file.
#'@param  batches list of elisa batch data to be serialized.
#'
#'
#'@examples
#' #'#R code to run 5-parameter logistic regression on ELISA data
#'#load the library
#'library(ELISAtools)
#'
#'#get file folder
#'dir_file<-system.file("extdata", package="ELISAtools")
#'
#'batches<-loadData(file.path(dir_file,"design.txt"))
#'
#'#make a guess for the parameters, the other two parameters a and d 
#'#will be estimated based on data.
#'model<-"5pl"
#'pars<-c(7.2,0.5, 0.015) #5pl inits
#'names(pars)<-c("xmid", "scal", "g")
#'
#'
#'#do fitting. model will be written into data set.
#'batches<-runFit(pars=pars,  batches=batches, refBatch.ID=1, model=model  )
#'
#'#now call to do predications based on the model.
#'batches<-predictAll(batches);
#'
#'#now saving the data in text.
#'saveDataText(batches, file.path(tempdir(),"elisa_data.txt"));
#'
# #' @seealso  \code{\link{elisa_batch-class}} \code{\link{loadData}} \code{\link{saveDB}}
#'
#'@export
saveDataText<-function(batches, file.name)
{
	if(missing(batches))
	{
		stop("please specify the input batch data");
	}
	if(missing(file.name))
	{
		stop("please specify the file name for the analysis results");
	}
	if(file.exists(file.name))
	{
		cat("the specified file for saving analysis results exists. It will be overwritten");
	}
	file.conn<-file(file.name);
	open(file.conn,open="w");
	write(c("ELISA tool data analysis results"),file.conn, append=FALSE);
	write(c(paste0("Date:\t",date(),"\r\n")),file.conn, append=TRUE);
	
	for( i in 1:length(batches))
	{
		write(c("==============="),file.conn, append=TRUE)
		
		batch<-batches[[i]];
		#for each batch write the following.........
		write(c(paste0("batch:\t",batch@batchID,"\tS Factor:\t",batch@normFactor)),file.conn, append=TRUE)
		write(c("==============="),file.conn, append=TRUE)
		#now we first rearrange the data into data.frame and the write it out.
		for(j in 1:batch@num.runs)
		{
			#write(c(paste0("R:\t",batch@batchID,"\tS Factor:\t",batch@normFactor)),file.conn
			for(k in 1:batch@runs[[j]]@num.plates)
			{
				write(paste0("RUN_#",j,"\t",batch@runs[[j]]@date, "\tplate_#",k,"\tS Factor:",batch@runs[[j]]@plates[[k]]@normFactor),file.conn, append=TRUE);
				
				suppressWarnings(write.table(batch@runs[[j]]@plates[[k]]@mdata.std,file=file.conn,append=T,sep="\t", row.names = F))
				#start making the data frame for output data
				unknown<-batch@runs[[j]]@plates[[k]]@data.unknown;
				if(is.null(unknown)||dim(unknown)[1]==0){
					next;
				}
				ids<-unique(unknown$ID);
				#determine the number repeats 
				nRep<-max(aggregate(unknown, FUN=length,by=list(unknown$ID))$ID)
				
				dfm<-data.frame();
				if(!is.null(batch@runs[[j]]@plates[[k]]@mdata.unknown)&&dim(batch@runs[[j]]@plates[[k]]@mdata.unknown)[1]!=0)
				{
					dfm<-batch@runs[[j]]@plates[[k]]@mdata.unknown;
				}else {
					dfm<-aggregate(unknown[,"OD"], FUN=mean,by=list(unknown$ID))
					colnames(dfm)<-c("ID","OD");
				}
				colnames(dfm)[colnames(dfm)=="OD"]<-"OD_avg";
				for(q in 1:nRep)
				{
					dfm<-cbind(dfm, raw=NaN)
				}
				#extrCol.averageOD<-0;
				#if(is.null(batch@runs[[j]]@plates[[k]]@mdata.unknown)||dim(batch@runs[[j]]@plates[[k]]@mdata.unknown)[1]==0){
				#	dfm<-cbind(dfm, OD=NaN);
				#	extrCol.averageOD<-1;
				#}
				#rownames(dfm)<-dfm$ID;
				#dfm<-dfm[ids,]
				for(p in 1:length(ids))
				{
						smp<-unknown[unknown$ID==ids[p],]$OD
						dfm[dfm$ID==ids[p],c(dim(dfm)[2]-nRep+c(1:length(smp)))]<-smp;
						#if(is.null(batch@runs[[j]]@plates[[k]]@mdata.unknown)||dim(batch@runs[[j]]@plates[[k]]@mdata.unknown)[1]==0){
						#	dfm[dfm$ID==ids[p],"OD"]<-mean(smp);
						#}
				}
				#nRep<-nRep+extrCol.averageOD;
				dfm<-dfm[,c(1,dim(dfm)[2]-c(nRep:1)+1,c(2:(dim(dfm)[2]-nRep))) ]
			
				#now save the data
				suppressWarnings(write.table(dfm,file=file.conn,append=T,sep="\t", row.names = F))
				write("\n",file.conn, append=T);
			}
		}
	}
	close(file.conn);
}
Any scripts or data that you put into this service are public.
ELISAtools documentation built on April 12, 2025, 1:31 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ELISAtools
ELISA Data Analysis with Batch Correction

R/ELISAtools_IO.R
In ELISAtools: ELISA Data Analysis with Batch Correction

Defines functions saveDataText saveDB loadDB loadData read.plates read.plate read.annotations read.annotation

Documented in loadData loadDB read.annotation read.annotations read.plate read.plates saveDataText saveDB

Try the ELISAtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ELISAtools ELISA Data Analysis with Batch Correction

R/ELISAtools_IO.R In ELISAtools: ELISA Data Analysis with Batch Correction

Defines functions saveDataText saveDB loadDB loadData read.plates read.plate read.annotations read.annotation

Documented in loadData loadDB read.annotation read.annotations read.plate read.plates saveDataText saveDB

Try the ELISAtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ELISAtools
ELISA Data Analysis with Batch Correction

R/ELISAtools_IO.R
In ELISAtools: ELISA Data Analysis with Batch Correction