#' One-way ANOVA
#'
#' Performs one-way ANOVA. Creates a file with results:
#' either \code{"anova_nonparametric.csv"} or \code{"anova_posthoc.csv"}
#' depending on \code{nonpar} argument.
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param nonpar If \code{FALSE} - use classical ANOVA; if \code{TRUE} - Kruskal Wallis Test
#' @param thresh Threshold of significance.
#' @param post.hoc Post-hoc statistics: \code{"tukey"} or \code{"fisher"}
#' @return Native \code{analSet} with one added \code{$aov} element consisting of:
#' \itemize{
#' \item\code{$aov.nm} - method of analysis
#' \item\code{$raw.thresh} - value of \code{thresh} argument
#' \item\code{$thresh} - \code{-log10(thresh)}
#' \item\code{$p.value} - p-values
#' \item\code{$p.log} - \code{-log10(p.value)}
#' \item\code{$inx.imp} - logical vector of features with significant difference
#' \item\code{$post.hoc} - post-hoc statistics, value of \code{post.hoc} argument
#' \item\code{$sig.mat} - data frame of significant features with corresponding statistics
#' }
#' @seealso \code{\link{PlotANOVA}} for plotting functions
#' \code{\link{ANOVA2.Anal}} for two-way analytical function
#' @export
ANOVA.Anal<-function(dataSet, analSet, nonpar= FALSE, thresh=0.05, post.hoc="fisher"){
# perform anova and only return p values and MSres (for Fisher's LSD)
match.arg(post.hoc, c("fisher", "tukey"))
aof <- function(x, cls = dataSet$cls) {
aov(x ~ cls);
}
# perform Kruskal Wallis Test
kwtest <- function(x, cls = dataSet$cls) {
kruskal.test(x ~ cls);
}
FisherLSD<-function(aov.obj, thresh){
LSD.test(aov.obj,"cls", alpha=thresh)
}
# return only the signicant comparison names
parseTukey <- function(tukey, cut.off){
inx <- tukey$cls[,"p adj"] <= cut.off;
paste(rownames(tukey$cls)[inx], collapse="; ");
}
# return only the signicant comparison names
parseFisher <- function(fisher, cut.off){
inx <- fisher[,"pvalue"] <= cut.off;
paste(rownames(fisher)[inx], collapse="; ");
}
if(nonpar){
aov.nm <- "Kruskal Wallis Test";
anova.res<-apply(as.matrix(dataSet$norm), 2, kwtest);
#extract all p values
p.value<-unlist(lapply(anova.res, function(x) {x$p.value}));
names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features!
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA');
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", "Post-Hoc");
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_nonparametric.csv";
write.csv(sig.mat,file=fileName);
}else{
aov.nm <- "One-way ANOVA";
aov.res<-apply(as.matrix(dataSet$norm), 2, aof);
anova.res<-lapply(aov.res, anova);
#extract all p values
p.value<-unlist(lapply(anova.res, function(x) { x["Pr(>F)"][1,]}));
names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
# do post-hoc only for signficant entries
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features with default thresh
# readjust threshold to top 20% or top 50
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
aov.imp <- aov.res[inx.imp];
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
cmp.res <- NULL;
post.nm <- NULL;
if(post.hoc=="tukey"){
tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh);
cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh));
post.nm = "Tukey's HSD";
}else{
fisher.res<-lapply(aov.imp, FisherLSD, thresh);
cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh));
post.nm = "Fisher's LSD";
}
# create the result dataframe,
# note, the last column is string, not double
sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res);
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", post.nm);
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_posthoc.csv";
write.csv(sig.mat,file=fileName);
}
aov<-list (
aov.nm = aov.nm,
raw.thresh = thresh,
thresh = -log10(thresh), # only used for plot threshold line
p.value = p.value,
p.log = -log10(p.value),
inx.imp = inx.imp,
post.hoc = post.hoc,
sig.mat = sig.mat
);
analSet$aov <- aov;
return(analSet);
}
#' Plot one-way ANOVA results
#'
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param imgName Image file name prefix.
#' @param format Image format, one of: "png", "tiff", "pdf", "ps", "svg"
#' @param dpi Image resolution.
#' @param width Image width.
#' @seealso \code{\link{ANOVA.Anal}} for analytical function
#' @export
PlotANOVA<-function(dataSet, analSet, imgName="aov_", format="png", dpi=72, width=NA){
if (is.null(analSet$aov)) stop("Please, conduct ANOVA.Anal first.")
lod <- analSet$aov$p.log;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
analSet$imgSet$anova<-imgName;
}else{
w <- width;
}
h <- w*6/9;
Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(dataSet), main=analSet$aov$aov.nm, type="n");
red.inx<- which(analSet$aov$inx.imp);
blue.inx <- which(!analSet$aov$inx.imp);
points(red.inx, lod[red.inx], bg="red", cex=1.2, pch=21);
points(blue.inx, lod[blue.inx], bg="green", pch=21);
abline (h=analSet$aov$thresh, lty=3);
dev.off();
frame()
grid::grid.raster(png::readPNG(imgName));
}
GetAovSigMat<-function(analSet){
return(CleanNumber(as.matrix(analSet$aov$sig.mat[, 1:3])));
}
GetAovSigRowNames<-function(analSet){
rownames(analSet$aov$sig.mat);
}
GetAovSigColNames<-function(analSet){
colnames(analSet$aov$sig.mat[, 1:3]);
}
GetAovPostHocSig<-function(analSet){
analSet$aov$sig.mat[,4];
}
GetSigTable.Anova<-function(dataSet, analSet){
GetSigTable(dataSet, analSet$aov$sig.mat, "One-way ANOVA and post-hoc analysis");
}
GetAnovaUpMat<-function(analSet){
lod <- analSet$aov$p.log;
red.inx<- which(analSet$aov$inx.imp);
as.matrix(cbind(red.inx, lod[red.inx]));
}
GetAnovaDnMat<-function(analSet){
lod <- analSet$aov$p.log;
blue.inx <- which(!analSet$aov$inx.imp);
as.matrix(cbind(blue.inx, lod[blue.inx]));
}
GetAnovaLnMat<-function(analSet){
lod <- analSet$aov$p.log;
as.matrix(rbind(c(0, analSet$aov$thresh), c(length(lod)+1,analSet$aov$thresh)));
}
GetAnovaCmpds<-function(analSet){
names(analSet$aov$p.log);
}
GetMaxAnovaInx <- function(analSet){
which.max(analSet$aov$p.log);
}
#######################################################
## calculate Fisher's Least Significant Difference (LSD)
## adapted from the 'agricolae' package
##############################################
LSD.test <- function (y, trt, alpha = 0.05){
clase<-c("aov","lm")
name.y <- paste(deparse(substitute(y)))
name.t <- paste(deparse(substitute(trt)))
if("aov"%in%class(y) | "lm"%in%class(y)){
A<-y$model
DFerror<-df.residual(y)
MSerror<-deviance(y)/DFerror
y<-A[,1]
ipch<-pmatch(trt,names(A))
name.t <-names(A)[ipch]
trt<-A[,ipch]
name.y <- names(A)[1]
}
junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
means <- tapply.stat(junto[, 1], junto[, 2], stat="mean") #change
sds <- tapply.stat(junto[, 1], junto[, 2], stat="sd") #change
nn <- tapply.stat(junto[, 1], junto[, 2], stat="length") #change
std.err <- sds[, 2]/sqrt(nn[, 2])
Tprob <- qt(1 - alpha/2, DFerror)
LCL <- means[,2]-Tprob*std.err
UCL <- means[,2]+Tprob*std.err
means <- data.frame(means, std.err, replication = nn[, 2], LCL, UCL)
names(means)[1:2] <- c(name.t, name.y)
#row.names(means) <- means[, 1]
ntr <- nrow(means)
nk <- choose(ntr, 2)
nr <- unique(nn[, 2])
comb <- combn(ntr, 2)
nn <- ncol(comb)
dif <- rep(0, nn)
LCL1<-dif
UCL1<-dif
sig<-NULL
pvalue <- rep(0, nn)
for (k in 1:nn) {
i <- comb[1, k]
j <- comb[2, k]
if (means[i, 2] < means[j, 2]){
comb[1, k]<-j
comb[2, k]<-i
}
dif[k] <- abs(means[i, 2] - means[j, 2])
sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j,4]))
pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror));
pvalue[k] <- round(pvalue[k],6);
LCL1[k] <- dif[k] - Tprob*sdtdif
UCL1[k] <- dif[k] + Tprob*sdtdif
sig[k]<-" "
if (pvalue[k] <= 0.001) sig[k]<-"***"
else if (pvalue[k] <= 0.01) sig[k]<-"**"
else if (pvalue[k] <= 0.05) sig[k]<-"*"
else if (pvalue[k] <= 0.1) sig[k]<-"."
}
tr.i <- means[comb[1, ],1]
tr.j <- means[comb[2, ],1]
output<-data.frame("Difference" = dif, pvalue = pvalue,sig,LCL=LCL1,UCL=UCL1)
rownames(output)<-paste(tr.i,tr.j,sep=" - ");
output;
}
tapply.stat <-function (y, x, stat = "mean"){
cx<-deparse(substitute(x))
cy<-deparse(substitute(y))
x<-data.frame(c1=1,x)
y<-data.frame(v1=1,y)
nx<-ncol(x)
ny<-ncol(y)
namex <- names(x)
namey <- names(y)
if (nx==2) namex <- c("c1",cx)
if (ny==2) namey <- c("v1",cy)
namexy <- c(namex,namey)
for(i in 1:nx) {
x[,i]<-as.character(x[,i])
}
z<-NULL
for(i in 1:nx) {
z<-paste(z,x[,i],sep="&")
}
w<-NULL
for(i in 1:ny) {
m <-tapply(y[,i],z,stat)
m<-as.matrix(m)
w<-cbind(w,m)
}
nw<-nrow(w)
c<-rownames(w)
v<-rep("",nw*nx)
dim(v)<-c(nw,nx)
for(i in 1:nw) {
for(j in 1:nx) {
v[i,j]<-strsplit(c[i],"&")[[1]][j+1]
}
}
rownames(w)<-NULL
junto<-data.frame(v[,-1],w)
junto<-junto[,-nx]
names(junto)<-namexy[c(-1,-(nx+1))]
return(junto)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.