R/FC.R

#' Fold change
#'
#' Fold change (FC) analysis is to compare the absolute value change between two group means. 
#' Since column-wise normalization (i.e. log transformation, mean-centering) will significantly 
#' change the absolute values, FC is calculated as the ratio between two group means using data 
#' before column-wise normalization was applied.
#' 
#' For paired analysis, the program first counts the number of pairs with consistent change above the given FC threshold. 
#' If this number exceeds a given count threshold, the variable will be reported as significant.
#' Writes an output file \code{"fold_change.csv"}.
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list).
#' @param fcthresh Fold-change threshold.
#' @param cmpType Comparison type. If equal \code{0} then group 1 is compared against group 2. Otherwise, vice versa. 
#' @param percent.thresh Sig. count threshold (for paired data only).
#' @return Native \code{analSet} with one added \code{$fc} element consisting of:
#' \itemize{
#' \item\code{$paired} - are data paired or not 
#' \item\code{$raw.thresh} - value of \code{fcthresh} argument
#' \item\code{$max.thresh} - upper log-threshold of fold change
#' \item\code{$min.thresh} - lower log-threshold of fold change
#' \item\code{$fc.all} - fold changes of all features
#' \item\code{$fc.mat} - matrix of fold changes (for paired data only)
#' \item\code{$fc.log} - log2 of fold changes (for unpaired data only)
#' \item\code{$inx.up} - logical vector of increasing features (for paired data only)
#' \item\code{$inx.down} - logical vector of decreasing features (for paired data only)
#' \item\code{$inx.imp} - logical vector of features with significant difference (for unpaired data only)
#' \item\code{$sig.mat} - data frame of significant features
#' }
#' @seealso \code{\link{PlotFC}} for plotting functions
#' @export
FC.Anal <- function(dataSet, analSet, fcthresh=2, percent.thresh=0.75, cmpType=0){
	if(dataSet$paired==FALSE) {
		analSet <- FC.Anal.unpaired(dataSet, analSet, fcthresh=2, cmpType=0)
	} else {
		analSet <- FC.Anal.paired(dataSet, analSet, fcthresh=2, percent.thresh=0.75, cmpType=0)
	}
	return(analSet);
}


# fold change analysis, method can be mean or median
# note: since the interface allow user to change all parameters
# the fold change has to be re-calculated each time
FC.Anal.paired<-function(dataSet, analSet, fcthresh=2, percent.thresh=0.75, cmpType=0){

    # make sure threshold is above 1
    fcthresh = ifelse(fcthresh>1, fcthresh, 1/fcthresh);
    max.thresh = fcthresh;
    min.thresh = 1/fcthresh;

    fc.mat <-GetFC(dataSet, T, cmpType);

    count.thresh<-round(nrow(dataSet$norm)/2*percent.thresh);
	mat.up <- fc.mat >= log(max.thresh,2);
	mat.down <- fc.mat <= log(min.thresh,2);

	count.up<-apply(mat.up, 2, sum);
 	count.down<-apply(mat.down, 2, sum);
	fc.all<-rbind(count.up, count.down);

 	inx.up <- count.up>=count.thresh;
 	inx.down <- count.down>=count.thresh;

    colnames(fc.all)<-colnames(dataSet$norm);
    rownames(fc.all)<-c("Count (up)", "Count (down)");
    sig.var <- t(fc.all[,(inx.up|inx.down), drop=F]);

    # sort sig.var using absolute difference between count(up)-count(down)
    sig.dff<-abs(sig.var[,1]-sig.var[,2])
    inx<-order(sig.dff, decreasing=T);
    sig.var<-sig.var[inx,,drop=F];

    fileName <- "fold_change.csv";
    write.csv(signif(sig.var,5),file=fileName);

    # create a list object to store fc
    fc<-list (
        paired = TRUE,
        fc.mat = fc.mat,
        raw.thresh = fcthresh,
        max.thresh = count.thresh,
        min.thresh = -count.thresh,
        fc.all = fc.all, # note: a 2-row matrix!
        inx.up = inx.up,
        inx.down = inx.down,
        sig.mat = sig.var
    );
    analSet$fc<-fc;
	return(analSet);
}


FC.Anal.unpaired<-function(dataSet, analSet, fcthresh=2, cmpType = 0){
    
    # make sure threshold is above 1
    fcthresh = ifelse(fcthresh>1, fcthresh, 1/fcthresh);
    max.thresh = fcthresh;
    min.thresh = 1/fcthresh;

    res <-GetFC(dataSet, F, cmpType);
    fc.all <- res$fc.all;
    fc.log <- res$fc.log;

    imp.inx <- fc.all > max.thresh | fc.all < min.thresh;
    sig.mat <- cbind(fc.all[imp.inx, drop=F], fc.log[imp.inx, drop=F]);
    colnames(sig.mat)<-c("Fold Change", "log2(FC)");

    # order by absolute log value (since symmetrical in pos and neg)
    inx.ord <- order(abs(sig.mat[,2]), decreasing=T);
    sig.mat <- sig.mat[inx.ord,,drop=F];

    fileName <- "fold_change.csv";
    write.csv(sig.mat,file=fileName);

    # create a list object to store fc
    fc<-list (
            paired = FALSE,
            raw.thresh = fcthresh,
            max.thresh = max.thresh,
            min.thresh = min.thresh,
            fc.all = fc.all, # note a vector
            fc.log = fc.log,
            inx.imp = imp.inx,
            sig.mat = sig.mat
     );
     analSet$fc<-fc;
	 return(analSet);
}


#' Plot fold change 
#' 
#' Plot fold change analysis results.
#' @param dataSet List, data set object generated by \code{\link[MSdata]{MS_to_MA}} function.
#' @param analSet List, containing the results of statistical analysis (can be just an empty list). 
#' @param imgName Image file name prefix.
#' @param format Image format, one of: "png", "tiff", "pdf", "ps", "svg"
#' @param dpi Image resolution.
#' @param width Image width.
#' @seealso \code{\link{FC.Anal}} for analytical function
#' @export
PlotFC<-function(dataSet, analSet, imgName="fc_", format="png", dpi=72, width=NA){
    imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
    if(is.na(width)){
        w <- 8;
    }else if(width == 0){
        w <- 7;
        analSet$imgSet$fc<-imgName;
    }else{
        w <- width;
    }
    h <- w*6/8;

    Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");

    par(mar=c(5,5,2,3));

    fc = analSet$fc;
    if(fc$paired){
        ylim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2);
        xlim<-c(0, ncol(dataSet$norm));
        plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(dataSet),
                ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh));
        for(i in 1:ncol(fc$fc.all)){
            segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"),
                     lwd= ifelse(fc$inx.up[i], 2, 1));
            segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"),
                     lwd= ifelse(fc$inx.down[i], 2, 1));
        }
        abline(h=fc$max.thresh, lty=3);
        abline(h=fc$min.thresh, lty=3);
        abline(h=0, lwd=1);
    }else{
        if(fc$raw.thresh > 0){
            # be symmetrical
            topVal <- max(abs(fc$fc.log));
            ylim <- c(-topVal, topVal);
            plot(fc$fc.log,  ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(dataSet), pch=19, axes=F,
                col= ifelse(fc$inx.imp, "magenta", "darkgrey"));
            axis(2);
            axis(4); # added by Beomsoo
            abline(h=log(fc$max.thresh,2), lty=3);
            abline(h=log(fc$min.thresh,2), lty=3);
            abline(h=0, lwd=1);
        }else{ # plot side by side

            dat1 <- dataSet$norm[as.numeric(dataSet$cls) == 1, ];
            dat2 <- dataSet$norm[as.numeric(dataSet$cls) == 2, ];

            mns1 <- apply(dat1, 2, mean);
            mn1 <- mean(mns1);
            sd1 <- sd(mns1);
            msd1.top <- mn1 + 2*sd1;
            msd1.low <- mn1 - 2*sd1;

            mns2 <- apply(dat2, 2, mean);
            mn2 <- mean(mns2);
            sd2 <- sd(mns2);
            msd2.top <- mn2 + 2*sd2;
            msd2.low <- mn2 - 2*sd2;

            ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low));
            new.mns <- c(mns1, rep(NA, 5), mns2);
            cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2)));
            pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2)));
            plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab="");
            axis(2);
            axis(4); # added by Beomsoo
            abline(h=mn1, col="magenta", lty=3, lwd=2);
            abline(h=msd1.low, col="magenta", lty=3, lwd=1);
            abline(h=msd1.top, col="magenta", lty=3, lwd=1);
            abline(h=mn2, col="blue", lty=3, lwd=2);
            abline(h=msd2.low, col="blue", lty=3, lwd=1);
            abline(h=msd2.top, col="blue", lty=3, lwd=1);
            # abline(h=mean(all.mns), col="darkgrey", lty=3);
            axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2)));
        }
    }
    dev.off();
	grid::grid.raster(png::readPNG(imgName));
}

GetSigTable.FC<-function(dataSet, analSet){
    GetSigTable(dataSet, analSet$fc$sig.mat, "fold change analysis");
}

GetFCSigMat<-function(analSet){
    return(CleanNumber(analSet$fc$sig.mat));
}

GetFCSigRowNames<-function(analSet){
    rownames(analSet$fc$sig.mat);
}

GetFCSigColNames<-function(analSet){
    colnames(analSet$fc$sig.mat);
}

# utility method to calculate FC
GetFC <- function(dataSet, paired=FALSE, cmpType){
    if(paired){
        if(dataSet$combined.method){
            data <- dataSet$norm;
        }else{
            data <- log(dataSet$row.norm,2);
        }

        G1 <- data[which(dataSet$cls==levels(dataSet$cls)[1]), ]
        G2 <- data[which(dataSet$cls==levels(dataSet$cls)[2]), ]

        if(cmpType == 0){
            fc.mat <- G1-G2;
        }else{
            fc.mat <- G2-G1;
        }
        return (fc.mat);
    }else{
        if(dataSet$combined.method){
            data <- dataSet$norm;
            m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]);
            m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]);

            # create a named matrix of sig vars for display
            if(cmpType == 0){
                fc.log <- signif (m1-m2, 5);
            }else{
                fc.log <- signif (m2-m1, 5);
            }
            fc.all <- signif(2^fc.log, 5);
        }else{
            data <- dataSet$row.norm;
            m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]);
            m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]);

            # create a named matrix of sig vars for display
            if(cmpType == 0){
                ratio <- m1/m2;
            }else{
                ratio <- m2/m1;
            }
            fc.all <- signif(ratio, 5);
            fc.log <- signif(log2(ratio), 5);
        }
        names(fc.all)<-names(fc.log)<-colnames(dataSet$norm);
        return(list(fc.all = fc.all, fc.log = fc.log));
    }
}
flajole/MApckg documentation built on May 16, 2019, 1:16 p.m.