###########################################################################
# Statistical Microarray Analysis for R
# Exploratory analysis (i)
# Initialization functions
#
# Date : August 9, 2000
# Last update : November 12, 2000
#
# History:
# March, 19: Insert comments from the help files
# Nov, 10: Change data structure from matrix to list of matrix.
# Feb 12, 2003: Fix a bug in init.name.exp init.readexp -> init.read.exp
# Aug 15, 2003: Allow teh column names to be specified in read.genepix
# because some newer versions of genepix have more
# columns. We will allow the user to decide how they want to
# name the columns.
#
# Authors: Sandrine Dudoit, Yee Hwa (Jean) Yang and Natalie Roberts
# with occasional maintanence from B. M. Bolstad
##########################################################################
##########################################################################
# Read in data from Spot output file
##########################################################################
########################################################################/**
# \name{write.spot}
#
# \alias{write.spot}
#
# \title{Writing in Data Generated by the Image Analysis Package Spot}
# \description{
# Function writes in a data file in a tab delimited table format.
# }
#
# \usage{
# write.spot(x, imageid, batch="output")
# }
#
# \arguments{
# \item{x}{the object to be written, typically a data frame. If not, it
# is attempted to create one from it.}
# \item{imageid}{integer value; the index of the slide which is
# considered}
# \item{batch}{character string, this refers to the name of a collection
# of experiments. The default batch name is "output".}
# }
# \details{
# This function writes the data in for each imageid, assigning each file
# the filename which takes the default form of "output".imageid.spot. The
# column names of x are written along with x in the table format
# }
#
# \references{ Spot manual \url{
# http://www.cmis.csiro.au/iap/Spot/spotmanual.htm}}
# }
#
# \author{ Jessica Mar }
#
# \seealso{ \code{\link{write.table}, \code{\link{init.read.spot}} }
#
# \examples{## Setting up the data
# ## library(Spot)
# ## SetParameters("mouse")
# ## Here is what you should see:
# ## Enter number of rows of grids per image (ngrid.r): 4
# ## Enter number of columns of grids per image (ngrid.c): 4
# ## Enter number of rows of spots per grid (nspot.r): 19
# ## Enter number of columns of spots per grid (nspot.c): 21
# ## Enter top/bottom translation tolerance, default is 50 (tolerance.r): 20
# ## Enter left/right translation, default is 50 (tolerance.c): 30
# ## Initialization complete
#
# ##Inputting Image Data
# ## SetImages("mouse")
# ## Combining the red and green channels for the first slide
# ## mouse.array <- Spots("mouse", 1)
#
# ## Calling the function to write the data in
# ## write.spot(mouse.array, 1, "mouse")
# }
#
# \keyword{microarray, Spot, Genepix.}
#
#*/#####################################################################
write.spot <- function(x, imageid, batch="output")
{
if(is.numeric(imageid))
{
newname <- paste(batch, imageid, "spot", sep=".")
}
if(is.character(imageid))
newname <- imageid
if(!is.character(imageid)&!is.numeric(imageid))
{
stop("Warning: imageid must be a number or a character")
}
write.table(x, newname, quote=FALSE, sep="\t", row.names=FALSE, col.names=TRUE)
}
########################################################################/**
# \name{read.spot}
#
# \alias{read.spot}
#
# \title{Reading in Data Generated by the Image Analysis Package Spot}
#
# \description{
# Reads in a data file in table format and creates a data frame with the
# same number of rows as there are lines in the file, and the same number
# of columns as there are fields in the file.\cr
# `read.spot': reads in the data file generated by the microarray image
# extraction library Spot.
# }
#
# \usage{
# read.spot(name, dir=".", sep=",", header=T, ...)
# }
#
# \arguments{
# \item{name}{character string naming the data file from which to read the
# data. }
#
# \item{dir}{character string naming the directory that contains the data
# file. The default setting is the current directory.}
#
# \item{sep}{the field separator (single character), often "\t" for
# tab delimited fields. If omitted, any amount of white space
# (blanks or tabs) can separate the fields. To read fixed format
# files, make sep a numeric vector giving the initial columns of
# the fields. }
#
# \item{header}{logical flag: if TRUE, then the first line of the
# file is used as the variable names of the resulting data frame. }
#
# \item{\dots}{parameters for read.table may also be supplied as arguments to
# the function (see \code{\link{read.table}}). }
# }
#
# \value{as in \code{\link{read.table}}, a data frame
# (\code{\link{data.frame}}) containing a representation of the data
# in the file.
# }
#
# \seealso{\code{\link{read.table}}, \code{\link{data.frame}},
# \code{\link{write.spot}}, \code{\link{read.genepix}}.}
#
# \examples{
# ## write.spot(mouse.array, 1, "mouse")
# ## mouse1 <- read.spot("mouse.1.spot")
# }
#
# \keyword{microarray, Spot, GenePix.}
#
#*/#####################################################################
read.spot <- function(name, dir=".", sep="\t", header=TRUE, ...)
{
newname <- paste(dir, name, sep="/")
read.table(newname, sep=sep, header=header, ...)
}
########################################################################
## Read in data from GenePix output
## Assuming you include all columns of the output.
########################################################################
########################################################################/**
# \name{read.genepix}
#
# \alias{read.genepix}
#
# \title{Reading in Data Generated by the Image Analysis Package GenePix.}
#
# \description{
# Reads in a data file in table format and creates a data frame
# with the same number of rows as there are lines in the
# file, and the same number of columns as there are fields
# in the file.\cr
# `read.genepix' reads in the data file generated by the software
# "GenePix".
# }
#
# \usage{
# read.genepix(name, dir = ".", sep = "\t", header = T, skip = 26, ...)
# }
#
# \arguments{
# \item{name}{character string naming the data file from which to read the
# data. }
# \item{dir}{character string naming the directory that contains the
# data file.}
#
# \item{sep}{the field separator (single character), often "\t" for
# tab delimited fields. If omitted, any amount of white space
# (blanks or tabs) can separate the fields. To read fixed format
# files, make sep a numeric vector giving the initial columns of
# the fields. }
#
# \item{header}{logical flag: if TRUE, then the first line of the
# file is used as the variable names of the resulting data frame. }
#
# \item{skip}{the number of lines of the data file to skip before beginning
# to read data.}
#
# \item{\dots}{parameters for read.table may also be supplied as arguments to
# the function (see \code{\link{read.table}}). }
# }
#
# \value{as in \code{\link{read.table}}, a data frame
# (\code{\link{data.frame}}) containing a representation of the data
# in the file.
# }
#
# \seealso{\code{\link{read.table}}, \code{\link{data.frame}},
# \code{\link{read.spot}}.}
#
# \keyword{microarray, GenePix.}
#
#*/#####################################################################
read.genepix <- function (name, dir = ".", sep = "\t", header = TRUE, skip=26, gpname = c("Block", "Col", "Row", "Name", "ID", "X","Y", "Dia", "Rmed", "Rmean", "RSD", "Rbmed", "Rbmean", "RbSD", "Rb1SD", "Rb2SD", "Rbsat","Gmed", "Gmean", "GSD", "Gbmed", "Gbmean", "GbSD", "Gb1SD", "Gb2SD", "Gbsat", "ratiomed", "ratiomean", "medratio", "meanratio", "ratiosd", "Rratio", "RegR2", "FPixels", "BPixels", "summed", "summean", "logratio", "Rmedc", "Gmedc", "Rmeanc", "Gmedc", "flags"),...)
{
newname <- paste(dir, name, sep = "/")
#gpname <- c("Block", "Col", "Row", "Name", "ID", "X","Y", "Dia", "Rmed", "Rmean", "RSD", "Rbmed", "Rbmean", "RbSD", "Rb1SD", "Rb2SD", "Rbsat","Gmed", "Gmean", "GSD", "Gbmed", "Gbmean", "GbSD", "Gb1SD", "Gb2SD", "Gbsat", "ratiomed", "ratiomean", "medratio", "meanratio", "ratiosd", "Rratio", "RegR2", "FPixels", "BPixels", "summed", "summean", "logratio", "Rmedc", "Gmedc", "Rmeanc", "Gmedc", "flags")
x <- read.table(newname, sep = sep, header = header, skip=skip,quote="", ...)
colnames(x) <- gpname
x
}
##########################################################################
# Initialization: slide layout and data matrix of fluorescence intensities
##########################################################################
########################################################################/**
# \name{init.grid}
#
# \alias{init.grid}
#
# \title{
# Initialization of Grid Parameters}
#
# \description{
# Interactive function for specifying the dimensions of the spot
# matrix and the grid matrix. These parameters depend on the printing
# layout of the array, and are used for the within print-tip group
# normalization implemented in \code{\link{stat.ma}} and the spatial
# representation of spot statistics in \code{\link{plot.spatial}}.
# }
#
# \usage{
# init.grid()
# }
#
# \arguments{
# None.
# }
#
# \value{list containing the following components
# \item{nspot.r}{ the number of rows of spots per grid;}
# \item{nspot.c}{ the number of columns of spots per grid;}
# \item{ngrid.r}{ the number of rows of grids per image;}
# \item{ngrid.c}{ the number of columns of grids per image.}
# }
#
# \references{
# Spot manual.
# }
#
# \seealso{
# \code{\link{plot.mva}}, \code{\link{plot.spatial}},
# \code{\link{stat.ma}}, \code{\link{list}}.
# }
#
#
# \author{
# Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu} \cr
# Sandrine Dudoit, \email{sandrine@stat.berkeley.edu}
# }
#
# \examples{
# data(MouseArray)
# # mouse.setup <- init.grid()
#
# ## Here is what you should see:
# # Enter number of rows of grids per image (ngrid.r): 4
# # Enter number of columns of grids per image (ngrid.c): 4
# # Enter number of rows of spots per grid (nspot.r): 19
# # Enter number of columns of spots per grid (nspot.c): 21
# # Initialization complete
# }
#
# \keyword{microarray, grid.}
#*/#########################################################################
init.grid <- function(){
cat ("Enter number of rows of grids per image (ngrid.r): ")
ngrid.r <- readline()
cat ("Enter number of columns of grids per image (ngrid.c): ")
ngrid.c <- readline()
cat ("Enter number of rows of spots per grid (nspot.r): ")
nspot.r <- readline()
cat ("Enter number of columns of spots per grid (nspot.c): ")
nspot.c <- readline()
cat ("Initialization complete\n")
list(nspot.r = as.integer(nspot.r), nspot.c = as.integer(nspot.c), ngrid.r= as.integer(ngrid.r), ngrid.c = as.integer(ngrid.c))
}
########################################################################/**
#
# \name{init.data}
#
# \alias{init.data}
#
# \title{Creating a Data Structure for Multi-slide Microarray Experiments}
#
# \description{
# Interactive function which creates a data structure for multi-slide
# microarray experiments. The data structure is a list of
# matrices. For each spotted DNA sequence, the list stores raw red and
# green signal intensities as well as red and green background
# intensities. The function also allows the user to add data to an
# existing structure.
# }
#
# \usage{
# init.data()
# }
#
# \arguments{
# None.
# }
#
# \value{
# List containing the following components:
# \item{R}{contains the raw red intensities, R.}
# \item{G}{contains the raw green intensities, G.}
# \item{Rb}{contains the background red intensities, Rb.}
# \item{Gb}{contains the background green intensities, Gb.}
# }
#
# \author{
# Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu} \cr
# Sandrine Dudoit, \email{sandrine@stat.berkeley.edu}
# }
#
# \examples{
# ## mouse.data <- init.data()
#
# ## Here is what you should see:
# ## Are you creating a new data matrix or adding new array data
# ## to a prexisting data matrix?
# ## Enter "n" for creating and "a" for adding new array data: n
# ## Do the names of all your datasets have the following format:
# ## prefix1, prefix2, prefix3?, ... Here prefix can be any name,
# ## but the suffixes must be integers 1,2, ..., # of arrays.
# ## Enter "y" for yes, "n" for no: y
# ## Enter the prefix:mouse
# ## Enter the number of arrays to be processed:6
# ## Enter the name of Cy3 raw data: Gmean
# ## Enter the name of Cy3 background: morphG
# ## Enter the name of Cy5 raw data: Rmean
# ## Enter the name of Cy5 background: morphR
# ## Finished creating new dataset.
# }
#
# \keyword{microarray.}
#
#*/#########################################################################
init.data<-function()
{
## This file assumes that you have already read in the data.
cat("Are you creating a new data matrix or adding new array data\n")
cat("to a prexisting data matrix? \n")
cat("Enter \"n\" for creating and \"a\" for adding new array data: ")
new.n <- readline()
if(new.n == "a"){
cat("Enter the name of the existing data matrix: ")
oname <- readline()
}
cat("Do the names of all your datasets have the following format: \n")
cat("prefix1, prefix2, prefix3?, ... Here prefix can be any name, \n")
cat("but the suffixes must be integers 1,2, ..., # of arrays. \n")
cat("Enter \"y\" for yes, \"n\" for no: ")
b.n<-readline()
if(b.n=="y")
{
cat("Enter the prefix:")
prefixname<-readline()
cat("Enter the number of arrays to be processed:")
n<-readline(); n<-as.integer(n)
dname <- paste(prefixname, 1:n, sep="")
}
else if(b.n=="n")
{
cat("Enter the number of arrays to be processed:")
n<-as.integer(readline());
dname<-rep(0,n)
for(i in 1:n)
{
cat(paste("Enter the name of your ", i,"th dataset:"))
dname[i]<-readline()
}
}
cat ("Enter the name of Cy3 raw data: ")
name.G <- readline()
cat ("Enter the name of Cy3 background: ")
name.Gb <- readline()
cat ("Enter the name of Cy5 raw data: ")
name.R <- readline()
cat ("Enter the name of Cy5 background: ")
name.Rb <- readline()
if(new.n == "a"){
res <- eval(as.name(oname))
action <- "updating"}
else{
res <- list(R = NULL, G = NULL, Rb= NULL, Gb=NULL)
action <- "creating"
}
for( i in 1:n){
tmp <- eval(as.name(dname[i]))[,c(name.R, name.G, name.Rb, name.Gb)]
res$R <- cbind(res$R, as.numeric(as.vector(tmp[,1])))
res$G <- cbind(res$G, as.numeric(as.vector(tmp[,2])))
res$Rb <- cbind(res$Rb, as.numeric(as.vector(tmp[,3])))
res$Gb <- cbind(res$Gb, as.numeric(as.vector(tmp[,4])))
}
cat(paste("Finished", action, "the dataset.\n", sep=" "))
res
}
########################################################################/**
#
# \name{init.addinfo}
#
# \alias{init.addinfo}
#
# \title{Adding Information to a Data Structure for Multi-slide
# Microarray Experiments}
#
# \description{
# Interactive function which adds other information generated from the
# output of image analysis software for microarrays to the existing
# data structure created using \code{\link{init.data}}.
# }
#
# \usage{
# init.addinfo()
# }
#
# \arguments{
# \item{batch}{Character string, this refers to the name of a
# collection of experiments.}
# \item{attri}{Character string, the name of the information to be
# included in the data structure. For example, from the output of
# \tt{Spot}, this argument can be "area", "signal to noise" etc.
# In other words, these are the column headings from the raw data set.}
# \item{dataname}{A name of your experimental data. By default it's
# named "batch.exp" where batch is the name of the collection of
# experiments you are interested in.}
# }
#
# \value{
# List containing the following component:
#
# \item{R}{contains the raw red intensities, R.}
# \item{G}{contains the raw green intensities, G.}
# \item{Rb}{contains the background red intensities, Rb.}
# \item{Gb}{contains the background green intensities, Gb.}
# as well as information from other users selected columns information.
# }
#
# \author{
# Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu} \cr
# Natalie Roberts \email{nroberts@wehi.edu.au}
# }
#
# \examples{
# ## mouse.data <- init.addinfo("mouse", "area")
# }
#
# \section{Warning}{The code in the example is not directly executable as
# it draws upon a particular set of data. This data may be downloaded from
# \url{http://www.stat.berkeley.edu/users/terry/zarray/Software/smacode.html}
# and when loaded appropriately into the user's directory, this example
# should be executable in its current form. }
#
# \keyword{microarray, quality information.}
#
#*/########################################################################
init.addinfo <- function(batch, attri, dataname=NULL, ...)
{
if(is.null(dataname)) dataname <- paste(batch, "data", sep=".")
measure<-NULL
nd<-nrow(init.show.exp(batch))
for(i in 1:nd){
tmp<-eval(init.read.exp(batch,i, ... ))
measure<-cbind(measure,tmp[,attri])
}
res <- c(eval(as.name(dataname)), list(measure))
names(res) <- c(names(eval(as.name(dataname))), attri )
res
}
#########################################################################/**
#
# \name{init.read.exp}
# \alias{init.read.exp}
#
# \title{Reads the Output of the Computed Statistics}
#
# \description{
# Function displays the 30 measurements computed by the program Spot for
# each gene in the slide being considered.}
#
# \usage{
# init.read.exp(batch, imageid, sep="\t", header=T, ...)}
#
# \arguments{
# \item{batch}{batch name of the experiment}
# \item{imageid}{integer value; the index of the slide which is considered}
# \item{sep}{the field separator character; the columns of the file
# will be separated by this character.}
# \item{header}{a logical value indicating whether the file contains the
# names of the variables as its first line.}
# \item{\dots}{graphical parameters may also be supplied as arguments to
# the function (see \code{\link{par}}).}
# }
#
# \value{
# A matrix containing the 30 columns of computed measurements,
# corresponding to the rows of different genes in the specified
# slide. \cr Details regarding these measurements can be found at
# \url{http://www.cmis.csiro.au/iap/Spot/spotoutput.htm}.}
#
# \references{Spot manual
# \url{http://www.cmis.csiro.au/iap/Spot/spotmanual.htm}}.
# }
#
# \author{Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu}}
#
# \examples{
# ## apoa1.info <- init.read.exp("apoa1", 1) ## obtains the matrix
# ## of 30 measurements for all the genes spotted on slide 1 of the MouseArray
# ## experiment.}
#
# \section{Warning}{The code in the example is not directly executable as
# it draws upon a particular set of data. This data may be downloaded from
# \url{http://www.stat.berkeley.edu/users/terry/zarray/Software/smacode.html}
# and when loaded appropriately into the user's directory, this
# example should be executable in its current form.}
#
# \keyword{measurements, statistics}
#*/#########################################################################
init.read.exp <- function(batch, imageid, sep="\t", header=TRUE, ...)
{
tmp<-init.show.exp(batch)
res<-read.table(tmp[imageid,2], sep=sep, header=header, ...)
res<-as.matrix(res)
res<-apply(res, 2, as.numeric)
res
}
########################################################################/**
#
# \name{init.names}
#
# \alias{init.name.exp}
# \alias{init.show.exp}
#
# \title{Set and Read the Names of Experimental Data.}
# \description{
# `init.name.exp' creates a look-up table which contains the names of the
# experimental data files and the corresponding object names in R. \cr
# `init.show.exp' displays the look-up table created by
# \code{init.name.exp}.
# }
# \usage{
# init.name.exp(Robject=F)
# init.show.exp("batch")
# }
#
# \arguments{
# \item{Robject}{if TRUE, the function generates a matrix of characters.
# Otherwise, this matrix is written to a file.}
# \item{batch}{Character string, this refers to the name of a
# collection of experiments.}
# }
#
# \value{
# \code{init.show.exp} returns a list containing the following components:
# \item{Name in R}{the object names in R;}
# \item{Filename}{the experimental data filenames, including the full
# path name for each file.}
# }
# \references{Spot manual
# \url{http://www.cmis.csiro.au/iap/Spot/spotmanual.htm}
#
# \author{Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu}}
#
#
# \examples{
# ## init.name.exp() ## To create the look-up table.
#
# ## This is what you should see:
# ## Are you creating a new batch.exp file or adding new data names
# ## to a prexisting batch.exp file?
# ## Enter "n" for creating and "a" for adding new data names: n
# ## Enter the batch name for the new .exp file: mouse1
# ## Enter the number of names of files to be entered: 2
# ## Enter the R name of your 1 th dataset: m1
# ## Enter the actual file name including the full path name for m1 ?
# ## ~/path/image1.data
# ## Enter the R name of your 2 th dataset: m2
# ## Enter the actual file name including the full path name for m2 ?
# ## ~/path/image2.data
# ## Finished adding names to .exp file.
# ## NULL
#
# ## View the look-up table.
# ## init.show.exp("mouse1")
# ##
# ## Name in R Filename
# ## 1 m1 ~/path/image1.data
# ## 2 m2 ~/path/image2.data
# }
#
# \keyword{filename}
#
#*/#########################################################################
init.show.exp <- function(batch)
{
file <- paste(batch,"exp",sep = ".")
if (!file.exists(file)){
stop(paste("File \"", file, "\" does not exist. \n", sep = ""))
}
expt <- read.table(file, header = TRUE, as.is = TRUE)
if (ncol(expt) != 2){
stop(paste("Should be two columns in experiment name file \"",
file, "\". \n", sep = ""))
}
colnames(expt) <- c("Name in R", "Filename")
expt
}
init.name.exp <-function(Robject=FALSE)
{
## This file creates the file containing file sources and corresponding
## R names for a batch of experiments
## This file assumes that your data exist in the current directory.
cat("\nAre you creating a new batch.exp file or adding new data names\n")
cat("to a prexisting batch.exp file? \n")
cat("Enter \"n\" for creating and \"a\" for adding new data names: ")
new.n <- readline()
while((new.n != "n") & (new.n != "a")){
cat("Please enter \"n\" for creating, \"a\" for adding new data names or Ctl-C to quit")
new.n <- readline()
}
if(new.n == "a"){
cat("Enter the batch name of the existing .exp file: ")
oname <- readline();
}
if(new.n == "n"){
cat("Enter the batch name for the new .exp file: ")
oname <- readline();
}
cat("Enter the number of names of files to be entered: ")
n<-readline(); n<-as.integer(n)
dname<-rep(0,n);
pname<-rep(0,n);
for(i in 1:n)
{
cat(paste("\n Enter the R name of your ", i,"th dataset:"))
dname[i]<-readline()
cat(paste("\n Enter the actual file name including the full path name for", dname[i],"?"))
pname[i]<-readline()
}
if(new.n =="n")
{
res <- cbind(dname,pname)
write.table(res, paste(oname, "exp", sep="."),sep="\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
}
if(new.n =="a")
{
res <-rbind(as.matrix(init.read.exp(oname)), cbind(dname,pname))
write.table(res, paste(oname, "exp", sep="."),sep="\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
}
cat("Finished adding names to .exp file.\n")
if(!Robject) res <- NULL
res
}
########################################################################/**
# \name{init.ctl.index}
# \alias{init.ctl.index}
# \title{Generates co-ordinates of spots.}
# \description{
# Generates the 4 co-ordinates of any spots.
# }
# \usage{
# init.ctl.index(grows, gcols, srows, scols)
# }
# \arguments{
# \item{grows}{The row index of the grid.}
# \item{gcols}{The column index of the grid.}
# \item{srows}{The row index of the spot within the its grid.}
# \item{scols}{The column index of the spot within the its grid.}
# }
#
# \value{
# a matrix in which each row contains a vector of 4 integer elements
# which make up the image coordinates of a gene.
# }
#
# \examples{
# x <- init.ctl.index(1:4, 1:4, 1:2, 15:20)
# ## Generates the 4 co-ordinates index to spots in the first 2 rows,
# ## columns 15 to 20 of every print-tips groups.
# }
# \keywords{microarray}
#
# \author{Yee Hwa Yang, \email{yeehwa@stat.berkeley.edu}}
#
# \note{Sorry: No help files yet}
#*/#######################################################################
init.ctl.index <-
function(grows, gcols, srows, scols)
{
ngr <- length(grows)
ngc <- length(gcols)
nsr <- length(srows)
nsc <- length(scols)
t1 <- rep(grows, rep(nsr * nsc * ngc, ngr) )
t2 <- rep(rep(gcols, rep(nsr * nsc, ngc)), ngr)
t3 <- rep(rep(srows, rep(nsc,nsr)), ngc * ngr)
t4 <- rep(scols, nsr * ngc * ngr)
cbind(t1, t2, t3, t4)
}
##########################################################################
# End of file
##########################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.