Metabol: The statistical analysis of metabolomic data

#' Orthogonal partial least squares - discriminant analysis (OPLS-DA)
#'
#' Makes orthogonal partial least squares - discriminant analysis (OPLS-DA), displays score plots and s-plots.
#' @param data Data table with variables (metabolites) in columns. Samples in rows are sorted according to specific groups.
#' @param name A character string or expression indicating a name of data set. It occurs in names of every output.
#' @param groupnames A character vector defining specific groups in data. Every string must be specific for each group and they must not overlap.
#' @param tsf Data transformation must be defined by "clr" (default), "log", "log10", "PQN", "lnPQN", "pareto" or "none". See Details.
#' @param axx Which axis of S-plot is used to choose most important variables: p1 or p1corr (default).
#' @param top How many most important variables (in absolute values)  should be highlighted in s-plot? The default is 30.
#' @param qu Which quantile of the important variables (in absolute values) should be highlighted in s-plot? The default is 0.75.
#' @param QCs logical. If FALSE (default) quality control samples (QCs) are automatically distinguished and skipped.
#' @details Data transformation: with "clr" clr trasformation is used (see References), with "log" natural logarithm is used, with "log10" decadic logarithm is used, with "pareto" data are only scaled by Pareto scaling, with "PQN" probabilistic quotient normalization is done, with "lnPQN" natural logarithm of PQN transformed data is done, with "none" no tranformation is done.
#' @details S-plots can be used only for comparison of two groups. If there is more groups in data, all possible combinations of pairs are evaluated.
#' @details Up to twenty different groups can be distinguished in data (including QCs).
#' @return Score plot and s-plots of OPLS-DA.
#' @return Excel file with s-plot summary for every variable.
#' @import car
#' @import rrcov
#' @import utils
#' @importFrom stats median prcomp pf cov
#' @importFrom robCompositions cenLR
#' @references Aitchison, J. (1986) The Statistical Analysis of Compositional Data Monographs on Statistics and Applied Probability. Chapman & Hall Ltd., London (UK). p. 416.
#' @references  Gaude, E.et al. (2012) muma: Metabolomics Univariate and Multivariate Analysis. R package version 1.4. \url{https://CRAN.R-project.org/package=muma}
#' @export
GraphsOPLSDA=function(data,name,groupnames,tsf="clr",axx="p1corr",top=30,qu=0.75,QCs=FALSE){

    dirout=getwd()

    ##########################################################################################################################
    if (tsf=="clr"){
        dataM=cenLR(data)$x.clr
    }

    if (tsf=="log"){
        dataM=log(data)
    }

    if (tsf=="log10"){
        dataM=log10(data)
    }

    if (tsf=="pareto"){
        dataM=scale(data, scale=TRUE, center=FALSE)
    }

    PQN1=function (x){
        xref=apply(x,2,median)
        podil=x
        for (i in 1:ncol(x)){
            podil[,i]=x[,i]/xref[i]
        }
        s=apply(podil,1,median)
        PQN=x
        for (j in 1:nrow(x)){
            PQN[j,]=x[j,]/s[j]
        }
        return(PQN)
    }

    if (tsf=="PQN"){
        dataM=PQN1(data)
    }

    if (tsf=="lnPQN"){
        dataM=log(PQN1(data))
    }

    if (tsf=="none"){
        dataM=data
    }
    #################################################################################################
    count=length(groupnames)
    basecolor=c("blue","magenta","forestgreen","darkorange","deepskyblue","mediumaquamarine","lightslateblue","saddlebrown",
                "gray40","darkslateblue","firebrick","darkcyan","darkmagenta", "deeppink1","limegreen","gold2","bisque2",
                "lightcyan3","red","darkolivegreen3")           # Basic colours from: http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
    basemarks=c(15,17,18,8,11,2,0,16,5,6,4,10,3,7,9,12)

    Group=matrix(rep(NA,count*3),ncol=3)
    colnames(Group)=c("min","max","length")
    rownames(Group)=groupnames
    groups=NULL
    marks=NULL
    color=NULL
    for (i in 1:count){
        Gr=grep(groupnames[i],rownames(dataM))
        Group[i,1]=min(Gr)
        Group[i,2]=max(Gr)
        Group[i,3]=length(Gr)
        gr=rep(i,length(Gr))
        groups=c(groups,gr)
        zn=rep(basemarks[i],length(Gr))
        marks=c(marks,zn)
        cl=rep(basecolor[i],length(Gr))
        color=c(color,cl)
    }

##########################################################################################################################
if (QCs==FALSE){
    QC=grep("QC",rownames(dataM))
    if (length(QC)!=0){
        dataM=dataM[-QC,]
        color=color[-QC]
        marks=marks[-QC]
        groups=groups[-QC]
        groupnames=groupnames[unique(groups)]
        count=length(groupnames)
        Group=matrix(rep(NA,count*3),ncol=3)
        colnames(Group)=c("min","max","length")
        rownames(Group)=groupnames
        for (i in 1:count){
            Gr=grep(groupnames[i],rownames(dataM))
            Group[i,1]=min(Gr)
            Group[i,2]=max(Gr)
            Group[i,3]=length(Gr)
        }
    }
}

    ##########################################################################################################################

    dataSetM2=dataM
    colors=matrix(unique(color),ncol=1)

    #################################################################################################
    dirout2 = paste(dirout,"/","OPLSDA",sep = "")
    dir.create(dirout2, recursive=TRUE, showWarnings = FALSE)        # vytvori v zadane ceste slozku s nazvem "note"
    setwd(dirout2)
    #################################################################################################
    explore.data2=function (file, scaling, scal = TRUE, normalize = FALSE, imputation = FALSE,
                            imput,color=colors2)
    {
        #comp = read.csv("OPLSDA_Data.csv", sep = ";", header = TRUE)
        comp <- file
        comp.x = comp[, 3:ncol(comp)]
        comp.x = cbind(comp[, 2], comp[, 1], comp.x)
        x <- comp.x
        x.x <- x[, 3:ncol(x)]
        rownames(x.x) <- x[, 2]
        if (!scal) {
            scaling = ""
        }
        dirout = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                       "/", sep = "")
        dir.create(dirout)
        if (imputation) {
            y = x.x
            r = is.na(y)
            for (k in 1:ncol(r)) {
                vec = matrix(r[, k], ncol = 1)
                who.miss.rows = which(apply(vec, 1, function(i) {
                    any(i)
                }))
                if (length(who.miss.rows) > nrow(y) * 0.8) {
                    warning(paste("The variable -", colnames(y)[k],
                                  "- has a number of missing values > 80%, therefore has been eliminated",
                                  sep = " "))
                    y = y[, -k]
                }
            }
            r = is.na(y)
            who.miss.columns = c()
            for (i in 1:nrow(y)) {
                for (j in 1:ncol(y)) {
                    if (r[i, j] == TRUE) {
                        if (imput == "mean") {
                            v2 = matrix(r[, j], ncol = 1)
                            who.miss.rows = which(apply(v2, 1, function(i) {
                                any(i)
                            }))
                            y[i, j] = mean(y[-who.miss.rows, j])
                            print(paste("Imputing missing value of variable -",
                                        colnames(y)[j], "- for the observation -",
                                        rownames(y)[i], "- with", imput, "value",
                                        sep = " "))
                        }
                        else if (imput == "minimum") {
                            v2 = matrix(r[, j], ncol = 1)
                            who.miss.rows = which(apply(v2, 1, function(i) {
                                any(i)
                            }))
                            y[i, j] = min(y[-who.miss.rows, j])
                            print(paste("Imputing missing value of variable -",
                                        colnames(y)[j], "- for the observation -",
                                        rownames(y)[i], "- with", imput, "value",
                                        sep = " "))
                        }
                        else if (imput == "half.minimum") {
                            v2 = matrix(r[, j], ncol = 1)
                            who.miss.rows = which(apply(v2, 1, function(i) {
                                any(i)
                            }))
                            y[i, j] = min(y[-who.miss.rows, j])/2
                            print(paste("Imputing missing value of variable -",
                                        colnames(y)[j], "- for the observation -",
                                        rownames(y)[i], "- with", imput, "value",
                                        sep = " "))
                        }
                        else if (imput == "zero") {
                            v2 = matrix(r[, j], ncol = 1)
                            who.miss.rows = which(apply(v2, 1, function(i) {
                                any(i)
                            }))
                            y[i, j] = 0
                            print(paste("Imputing missing value of variable -",
                                        colnames(y)[j], "- for the observation -",
                                        rownames(y)[i], "- with", imput, "value",
                                        sep = " "))
                        }
                    }
                }
            }
            pwdi = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                         "/ImputedMatrix.csv", sep = "")
            write.csv(y, pwdi)
            x.x = y
        }
        #for (i in 1:nrow(x.x)) {
        #    for (j in 1:ncol(x.x)) {
        #        if (x.x[i, j] <= 0) {
        #            x.x[i, j] = runif(1, 0, 1e-10)
        #        }
        #    }
        #}
        x.x = cbind(comp[, 2], x.x)
        write.csv(x.x, paste(dirout, "CorrectedTable.csv", sep = ""))
        pwd.c = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP, "/CorrectedTable.csv",
                      sep = "")
        x <- read.csv(pwd.c, sep = ",", header = TRUE)
        x.x <- x[, 3:ncol(x)]
        rownames(x.x) <- x[, 1]
        k = matrix(x[, 2], ncol = 1)
        if (normalize) {
            x.t <- t(x.x)
            x.s <- matrix(colSums(x.t), nrow = 1)
            uni = matrix(rep(1, nrow(x.t)), ncol = 1)
            area.uni <- uni %*% x.s
            x.areanorm <- x.t/area.uni
            x.areanorm = t(x.areanorm)
            write.csv(x.areanorm, paste(dirout, "/ProcessedTable.csv",
                                        sep = ""))
        }
        else {
            write.csv(x.x, paste(dirout, "/ProcessedTable.csv", sep = ""))
        }
        if (scal) {
            if (scaling == "Pareto" | scaling == "pareto" | scaling ==
                "P" | scaling == "p") {
                pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                              "/ProcessedTable.csv", sep = "")
                x <- read.csv(pwd.n, sep = ",", header = TRUE)
                x.x <- x[, 2:ncol(x)]
                rownames(x.x) <- x[, 1]
                x.areanorm.tc <- scale(x.x, center = TRUE, scale = FALSE)
                all.sd <- matrix(apply(x.areanorm.tc, 2, sd), nrow = 1)
                uni.exp.all = matrix(rep(1, nrow(x.areanorm.tc)),
                                     ncol = 1)
                all.sdm = uni.exp.all %*% all.sd
                all.sqsd = sqrt(all.sdm)
                all.pareto <- x.areanorm.tc/all.sqsd
                write.csv(all.pareto, paste(dirout, "/ProcessedTable.csv",
                                            sep = ""))
            }
            else if (scaling == "Auto" | scaling == "auto" | scaling ==
                     "A" | scaling == "a") {
                pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                              "/ProcessedTable.csv", sep = "")
                x <- read.csv(pwd.n, sep = ",", header = TRUE)
                x.x <- x[, 2:ncol(x)]
                rownames(x.x) <- x[, 1]
                x.areanorm.tc <- scale(x.x, center = TRUE, scale = FALSE)
                all.sd <- matrix(apply(x.areanorm.tc, 2, sd), nrow = 1)
                uni.exp.all = matrix(rep(1, nrow(x.areanorm.tc)),
                                     ncol = 1)
                all.sdm = uni.exp.all %*% all.sd
                all.auto <- x.areanorm.tc/all.sdm
                write.csv(all.auto, paste(dirout, "/ProcessedTable.csv",
                                          sep = ""))
            }
            else if (scaling == "Vast" | scaling == "vast" | scaling ==
                     "V" | scaling == "v") {
                pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                              "/ProcessedTable.csv", sep = "")
                x <- read.csv(pwd.n, sep = ",", header = TRUE)
                x.x <- x[, 2:ncol(x)]
                rownames(x.x) <- x[, 1]
                x.areanorm.tc <- scale(x.x, center = TRUE, scale = FALSE)
                all.sd <- matrix(apply(x.areanorm.tc, 2, sd), nrow = 1)
                uni.exp.all = matrix(rep(1, nrow(x.areanorm.tc)),
                                     ncol = 1)
                all.sdm = uni.exp.all %*% all.sd
                sdm2 = all.sdm^2
                colm = matrix(colMeans(x.x), nrow = 1)
                colm.m = uni.exp.all %*% colm
                num = x.areanorm.tc * colm.m
                vast = num/sdm2
                write.csv(vast, paste(dirout, "/ProcessedTable.csv",
                                      sep = ""))
            }
            else if (scaling == "Range" | scaling == "range" | scaling ==
                     "R" | scaling == "r") {
                pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                              "/ProcessedTable.csv", sep = "")
                x <- read.csv(pwd.n, sep = ",", header = TRUE)
                x.x <- x[, 2:ncol(x)]
                rownames(x.x) <- x[, 1]
                x.areanorm.tc <- scale(x.x, center = TRUE, scale = FALSE)
                range = c()
                for (i in 1:ncol(x.x)) {
                    den = c()
                    den = max(x.x[, i]) - min(x.x[, i])
                    range = matrix(c(range, den), nrow = 1)
                }
                uni.exp.all = matrix(rep(1, nrow(x.areanorm.tc)),
                                     ncol = 1)
                range.m = uni.exp.all %*% range
                all.range = x.areanorm.tc/range.m
                write.csv(all.range, paste(dirout, "/ProcessedTable.csv",
                                           sep = ""))
            }
            else if (scaling == "Median" | scaling == "median" |
                     scaling == "M" | scaling == "m") {
                pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                              "/ProcessedTable.csv", sep = "")
                x <- read.csv(pwd.n, sep = ",", header = TRUE)
                x.x <- x[, 2:ncol(x)]
                rownames(x.x) <- x[, 1]
                x.areanorm.tc <- scale(x.x, center = TRUE, scale = FALSE)
                all.med <- matrix(apply(x.areanorm.tc, 2, median),
                                  nrow = 1)
                uni.exp.all = matrix(rep(1, nrow(x.areanorm.tc)),
                                     ncol = 1)
                all.sdm = uni.exp.all %*% all.med
                all.med <- x.areanorm.tc/all.sdm
                write.csv(all.med, paste(dirout, "/ProcessedTable.csv",
                                         sep = ""))
            }
        }
        else {
            pwd.n = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                          "/ProcessedTable.csv", sep = "")
            x <- read.csv(pwd.n, sep = ",", header = TRUE)
            x.x <- x[, 2:ncol(x)]
            rownames(x.x) <- x[, 1]
            x.c = scale(x.x, scale = FALSE)
            write.csv(x.c, paste(dirout, "/ProcessedTable.csv", sep = ""))
        }
        pwd.scal = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,
                         "/ProcessedTable.csv", sep = "")
        x <- read.csv(pwd.scal, sep = ",", header = TRUE)
        x.x <- x[, 2:ncol(x)]
        rownames(x.x) <- x[, 1]
        pc.all <- prcomp(x.x, center = FALSE, scale = FALSE)
        p.v <- matrix(((pc.all$sdev^2)/(sum(pc.all$sdev^2))), ncol = 1)
        p.i <- round(p.v * 100, 1)
        p.z <- matrix(1, nrow(p.i), 1)
        p.f <- cbind(p.i, p.z)
        dirout.pca = paste(getwd(), "/PCA_Data_", scaling, noteOP,"/", sep = "")
        dir.create(dirout.pca)
        write.csv(p.f, paste(dirout.pca, "PCA_P", sep = ""))
        write.csv(pc.all$x, paste(dirout.pca, "PCA_ScoreMatrix.csv",
                                  sep = ""))
        write.csv(pc.all$rotation, paste(dirout.pca, "PCA_LoadingsMatrix.csv",
                                         sep = ""))
        pwd.score = paste(getwd(), "/PCA_Data_", scaling, noteOP, "/", "PCA_ScoreMatrix.csv",
                          sep = "")
        Score <- read.csv(pwd.score, sep = ",", header = TRUE)
        Score.x <- Score[, 2:ncol(Score)]
        rownames(Score.x) <- Score[, 1]
        pwd.load = paste(getwd(), "/PCA_Data_", scaling, noteOP,"/", "PCA_LoadingsMatrix.csv",
                         sep = "")
        Loading <- read.csv(pwd.load, sep = ",", header = TRUE)
        Loading.x <- Loading[, 2:ncol(Loading)]
        rownames(Loading.x) <- Loading[, 1]
        pwd.pvar = paste(getwd(), "/PCA_Data_", scaling, noteOP,"/", "PCA_P",
                         sep = "")
        Pvar <- read.csv(pwd.pvar, sep = ",", header = TRUE)
        Pvar.x <- Pvar[, 2:ncol(Pvar)]
        rownames(Pvar.x) <- Pvar[, 1]
        barplot(Pvar.x[, 1], xlab = "Principal Components", ylab = "Proportion of Variance explained",
                main = "Screeplot", ylim = c(0, 100))
        scree = paste(dirout.pca, "Screeplot", scaling, noteOP, ".pdf", sep = "")
        dev.copy2pdf(file = scree)
        #tutticolors = matrix(c(1, 2, 3, 4, 5, 6, "rosybrown4",
        #                       "green4", "navy", "purple2", "orange", "pink", "chocolate2",
        #                       "coral3", "khaki3", "thistle", "turquoise3", "palegreen1",
        #                       "moccasin", "olivedrab3", "azure4", "gold3", "deeppink",7, 8),
        #                     ncol = 1)

        tutticolors = as.matrix(colors2)

        col = c()
        for (i in 1:nrow(k)) {
            col = c(col, tutticolors[k[i, ], ])
        }
        pairs = c()
        if (ncol(Score.x) >= 10) {
            pairs = c(10)
        }
        else {
            pairs = c(ncol(Score.x))
        }
        pairs(Score.x[, 1:pairs], col = col)
        #dev.new()
        pairs = paste(dirout.pca, "First_10_Components_", scaling, noteOP,
                      ".pdf", sep = "")
        dev.copy2pdf(file = pairs)
        K = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP,"/class.csv",
                  sep = "")
        write.csv(k, K)
        x.nn = cbind(k, pc.all$x)
        sorted = x.nn[order(x.nn[, 1]), ]
        g = c()
        for (i in 1:nrow(sorted)) {
            if (any(g == sorted[i, 1])) {
                g = g
            }
            else {
                g = matrix(c(g, sorted[i, 1]), ncol = 1)
            }
        }
        dirout.g = paste(getwd(), "/Groups_",noteOP, sep = "")
        dir.create(dirout.g)
        for (i in 1:nrow(g)) {
            vuota <- c()
            fin = matrix(rep(NA, ncol(sorted)), nrow = 1)
            for (j in 1:nrow(sorted)) {
                if (sorted[j, 1] == i) {
                    vuota <- matrix(sorted[j, ], nrow = 1)
                    rownames(vuota) = rownames(sorted)[j]
                    fin = rbind(fin, vuota)
                }
            }
            nam = paste("r", i, sep = ".")
            n = matrix(fin[-1, ], ncol = ncol(sorted))
            n.x = matrix(n[, -1], ncol = ncol(sorted) - 1)
            name = as.matrix(assign(nam, n.x))
            outputfileg = paste("r.", i, ".csv", sep = "")
            write.csv(name, paste(dirout.g, outputfileg, sep = "/"),
                      row.names = FALSE)
        }
        all.F = c()
        NoF = nrow(g)
        for (i in 1:NoF) {
            for (j in 1:NoF) {
                if (i < j) {
                    ni = paste("r.", i, ".csv", sep = "")
                    nj = paste("r.", j, ".csv", sep = "")
                    pwdi = paste(getwd(), "/Groups_",noteOP,"/", ni, sep = "")
                    pwdj = paste(getwd(), "/Groups_",noteOP,"/", nj, sep = "")
                    I = read.csv(pwdi, header = TRUE)
                    J = read.csv(pwdj, header = TRUE)
                    fin = ncol(I) - 1

                    ntest = factorial(fin)/(2 * (factorial(fin -
                                                               2)))
                    T2 = c()
                    nam = c()
                    for (k in 1:fin) {
                        for (l in 1:fin) {
                            if (k < l) {
                                Ikl = cbind(I[, k], I[, l])
                                Jkl = cbind(J[, k], J[, l])
                                t1 = matrix(T2.test(Ikl, Jkl)$statistic,
                                            ncol = 2)
                                t2 = c(t1[, 1])
                                T2 = matrix(c(T2, t2), ncol = 1)
                                rownam = paste("PC", k, "vsPC", l, sep = "")
                                nam = matrix(c(nam, rownam), ncol = 1)
                            }
                        }
                    }
                    pair = paste("T2statistic_", i, "vs", j, sep = "")
                    rownames(T2) = nam
                    colnames(T2)[1] = pair
                    num = nrow(I) + nrow(J) - 3
                    den = 2 * (nrow(I) + nrow(J) - 2)
                    coeff = num/den
                    Fval = T2 * coeff
                    Fvalname = paste("F_statistic_", i, "vs", j,
                                     sep = "")
                    colnames(Fval)[1] = Fvalname
                    Fpval = pf(Fval, 2, num)
                    Fname = paste("F_pvalue_", i, "vs", j, sep = "")
                    colnames(Fpval)[1] = Fname
                    Fpvalfin = 1 - Fpval
                    all.F = matrix(c(all.F, Fpvalfin))
                }
            }
        }
        varp = c()
        for (k in 1:fin) {
            for (l in 1:fin) {
                if (k < l) {
                    varp = matrix(c(varp, p.f[k, 1] + p.f[l, 1]),
                                  ncol = 1)
                }
            }
        }
        ncomparison = factorial(nrow(g))/(2 * (factorial(nrow(g) -
                                                             2)))
        all.F = matrix(all.F, ncol = ncomparison)
        rownames(all.F) = nam
        allFpwd = paste(getwd(), "/PCA_Data_", scaling, noteOP, "/PCs_Fstatistic.csv",
                        sep = "")
        write.csv(all.F, allFpwd, row.names = FALSE)
        sum = matrix(rowSums(all.F), ncol = 1)
        all = data.frame(nam, sum, varp)
        colnames(all)[3] = "Variance(%)"
        colnames(all)[2] = "Sum_p_values"
        colnames(all)[1] = "Pair_of_PCs"
        ord.sum = all[order(all[, 2]), ]
        colnames(ord.sum)[3] = "Variance(%)"
        colnames(ord.sum)[2] = "Sum_p_values(F_statistics)"
        colnames(ord.sum)[1] = "Pair_of_PCs"
        rownames(ord.sum) = 1:nrow(ord.sum)
        rankFpwd = paste(getwd(), "/PCA_Data_", scaling, noteOP, "/PCs_ranked_Fpvalue.csv",
                         sep = "")
        write.csv(ord.sum, rankFpwd, row.names = FALSE)
        print("Pairs of Principal Components giving highest statistical cluster separation are:")
        print(ord.sum[1:5, ])
    }

    ##############################################################################################################################################

    OPLSDA2=function (scaling,axx="",top="",qu="",color=colors2,marks,groupnames)
    {
        pwd.x = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP, "/ProcessedTable.csv",
                      sep = "")
        x = read.csv(pwd.x, header = TRUE)
        x.x = x[, 2:ncol(x)]
        rownames(x.x) = x[, 1]
        pwdK = paste(getwd(), "/Preprocessing_Data_", scaling, noteOP, "/class.csv",
                     sep = "")
        k = read.csv(pwdK, header = TRUE)
        k.x = matrix(k[, -1], ncol = 1)
        x.n = cbind(k.x, x.x)
        sorted = x.n[order(x.n[, 1]), ]
        k = matrix(sorted[, 1], ncol = 1)
        g = c()
        for (i in 1:nrow(sorted)) {
            if (any(g == sorted[i, 1])) {
                g = g
            }
            else {
                g = matrix(c(g, sorted[i, 1]), ncol = 1)
            }
        }
        Y = matrix(rep(NA, nrow(sorted)), ncol = 1)
        for (i in 1:nrow(sorted)) {
            for (l in 1:2) {
                if (sorted[i, 1] == l) {
                    Y[i, ] = 0
                }
                else {
                    Y[i, ] = 1
                }
            }
        }
        X = as.matrix(sorted[, -1], ncol = ncol(sorted) - 1)
        nf = 1
        T = c()
        P = c()
        C = c()
        W = c()
        Tortho = c()
        Portho = c()
        Wortho = c()
        Cortho = c()
        for (j in 1:nf) {
            w = (t(X) %*% Y) %*% solve(t(Y) %*% Y)
            w1 = t(w) %*% w
            w2 = abs(sqrt(w1))
            w = w %*% solve(w2)
            t = (X %*% w) %*% solve(t(w) %*% w)
            t1 = t(t) %*% t
            c = t(Y) %*% t %*% solve(t1)
            c1 = t(c) %*% c
            u = Y %*% c %*% solve(c1)
            u1 = t(u) %*% u
            u2 = abs(sqrt(u1))
            p = (t(X) %*% t) %*% solve(t1)
            wortho = p - w
            wortho1 = t(wortho) %*% wortho
            wortho2 = abs(sqrt(abs(wortho1)))
            wortho = wortho %*% solve(wortho2)
            tortho = X %*% wortho %*% solve(t(wortho) %*% wortho)
            tortho1 = t(tortho) %*% tortho
            portho = t(X) %*% tortho %*% solve(tortho1)
            cortho = t(Y) %*% tortho %*% solve(tortho1)
            X = X - tortho %*% t(portho)
            T = matrix(c(T, t))
            P = matrix(c(P, p))
            C = matrix(c(C, c))
            W = matrix(c(W, w))
            Tortho = matrix(c(Tortho, tortho))
            Portho = matrix(c(Portho, portho))
            Wortho = matrix(c(Wortho, wortho))
            Cortho = matrix(c(Cortho, cortho))
        }
        T = matrix(T, ncol = nf)
        T = scale(T, scale = FALSE, center = TRUE)
        P = matrix(P, ncol = nf)
        C = matrix(C, ncol = nf)
        W = matrix(W, ncol = nf)
        Tortho = matrix(Tortho, ncol = nf)
        Portho = matrix(Portho, ncol = nf)
        Cortho = matrix(Cortho, ncol = nf)
        Wortho = matrix(Wortho, ncol = nf)
        Xortho = Tortho %*% t(Portho)
        max.pc1 = 1.3 * (max(abs(T[, nf])))
        max.pc2 = 1.3 * (max(abs(Tortho[, nf])))
        lim = c()
        if (max.pc1 > max.pc2) {
            lim = c(-max.pc1, max.pc1)
        }else{
            lim = c(-max.pc2, max.pc2)
        }
        #tutticolors = matrix(c("darkgreen","blue","magenta","darkorange3", 5, 6, "rosybrown4",
        #                       "green4", "navy", "purple2", "orange", "pink", "chocolate2",
        #                       "coral3", "khaki3", "thistle", "turquoise3", "palegreen1",
        #                       "moccasin", "olivedrab3", "azure4", "gold3", "deeppink",7, 8),
        #                     ncol = 1)

        tutticolors = as.matrix(colors2)

        col = c()
        for (i in 1:nrow(k)) {
            col = c(col, tutticolors[k[i, ], ])
        }
        plot(T[, nf], Tortho[, 1], col = col, pch = marks2 , cex=2, xlim = lim,
             ylim = lim, xlab = "T score [1]", ylab = "Orthogonal T score [1]",
             main = "OPLS-DA score scatter plot")
        text(T[, nf], Tortho[, 1], col = col, labels = rownames(sorted),
             cex = 0.5, pos = 1)
        axis(1, at = lim * 2, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        axis(2, at = lim * 2, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        legend("topleft",legend = groupnames2, pch = unique(marks2), col=color,cex=1)

        dataEllipse(T[, nf], Tortho[, 1], levels = c(0.95), add = TRUE,
                    col = "black", lwd = 0.4, plot.points = FALSE, center.cex = 0.2)
        dirout = paste(getwd(), "/OPLS-DA", scaling, " - ",noteOP,"/", sep = "")
        dir.create(dirout)
        pwdxdef = paste(dirout, "X_deflated.csv", sep = "")
        write.csv(X, pwdxdef)
        scor = paste(dirout, "ScorePlot_OPLS-DA_", scaling, " - ",noteOP, ".pdf",
                     sep = "")
        dev.copy2pdf(file = scor)
        pwdT = paste(dirout, "TScore_Matrix.csv", sep = "")
        write.csv(T, pwdT)
        pwdTortho = paste(dirout, "TorthoScore_Matrix.csv", sep = "")
        write.csv(T, pwdTortho)
        s = as.matrix(sorted[, -1], ncol = ncol(sorted) - 1)
        p1 = c()
        for (i in 1:ncol(s)) {
            scov = cov(s[, i], T)
            p1 = matrix(c(p1, scov), ncol = 1)
        }
        pcorr1 = c()
        for (i in 1:nrow(p1)) {
            den = apply(T, 2, sd) * sd(s[, i])
            corr1 = p1[i, ]/den
            pcorr1 = matrix(c(pcorr1, corr1), ncol = 1)
        }
        pwdp1 = paste(dirout, "p1_Matrix.csv", sep = "")
        write.csv(p1, pwdp1)
        pwdpcorr1 = paste(dirout, "pcorr1_Matrix.csv", sep = "")
        write.csv(pcorr1, pwdpcorr1)

        MSn=cbind(p1,abs(p1),pcorr1,abs(pcorr1))
        rownames(MSn)=colnames(x.x) # shoud be same as dataSetM2
        colnames(MSn)=c("p1","abs_p1","p1_corr","abs_p1_corr")
        pwdpMSn = paste(dirout, "S-plot_list of mz - ",noteOP,".csv", sep = "")
        write.table(MSn,pwdpMSn,sep=";")

        #top:
        axx=axx
        top=top
        sorMSnt=NULL
        if(axx=="p1"){sorMSnt=MSn[order(-MSn[,2]),]}
        if(axx=="p1corr"){sorMSnt=MSn[order(-MSn[,4]),]}
        sortMSnt=sorMSnt[1:top,]
        pwdpMSnsortt = paste(dirout, "S-plot_list of mz_top - ",noteOP,".csv", sep = "")
        write.table(sortMSnt,pwdpMSnsortt,sep=";")

        p1t=sortMSnt[,1]
        pcorr1t=sortMSnt[,3]

        #quantile:
        qu=qu
        MSnq=NULL
        sorMSnq=NULL
        if(axx=="p1"){MSnq=quantile(MSn[,2],qu)
                    sorMSnq=which(sorMSnt[,2]>=MSnq)}
        if(axx=="p1corr"){MSnq=quantile(MSn[,4],qu)
                         sorMSnq=which(sorMSnt[,4]>=MSnq)}
        sortMSnq=sorMSnt[sorMSnq,]
        pwdpMSnsortq = paste(dirout, "S-plot_list of mz_quantile - ",noteOP,".csv", sep = "")
        write.table(sortMSnq,pwdpMSnsortq,sep=";")

        p1q=sortMSnq[,1]
        pcorr1q=sortMSnq[,3]

        #dev.new()
        plot(p1, pcorr1, xlab = "p[1]", ylab = "p(corr)[1]", main = paste("S-plot (OPLS-DA) ",
                                                                          scaling, sep = ""), pch=16, cex=0.5)
        text(p1, pcorr1, labels = colnames(s), cex = 0.5, pos = 1, offset=0.2, col="navy")
        splot = paste(dirout, "SPlot_OPLS-DA_", scaling," - ",noteOP, ".pdf",
                      sep = "")
        dev.copy2pdf(file = splot,width=15, height=10)

        #barevny S_plot - top:
        #dev.new()
        plot(p1, pcorr1, xlab = "p[1]", ylab = "p(corr)[1]", main = paste("S-plot (OPLS-DA) ",scaling, sep = ""), pch=16, cex=0.5)
        points(p1t,pcorr1t,col="red",pch=16,cex=0.6)
        legend("topleft",legend = paste("n=", top ),cex=1)
        text(p1, pcorr1, labels = colnames(s), cex = 0.5, pos = 1, offset=0.2, col="navy")
        text(p1t, pcorr1t, labels = rownames(sortMSnt), cex = 0.5, pos = 1, offset=0.2, col="red")
        splotsortt = paste(dirout, "SPlot_OPLS-DA_top_", scaling," - ",noteOP, ".pdf",
                           sep = "")
        dev.copy2pdf(file = splotsortt,width=15, height=10)

        #barevny S_plot - quantile:
        #dev.new()
        plot(p1, pcorr1, xlab = "p[1]", ylab = "p(corr)[1]", main = paste("S-plot (OPLS-DA) ",scaling, sep = ""), pch=16, cex=0.5)
        points(p1q,pcorr1q,col="red",pch=16,cex=0.6)
        legend("topleft",legend = c(paste("n=", length(rownames(sortMSnq))),paste(qu*100,"% quantile")),cex=1)
        text(p1, pcorr1, labels = colnames(s), cex = 0.5, pos = 1, offset=0.2, col="navy")
        text(p1q, pcorr1q, labels = rownames(sortMSnq), cex = 0.5, pos = 1, offset=0.2, col="red")
        splotsortq = paste(dirout, "SPlot_OPLS-DA_quantile_", scaling," - ",noteOP, ".pdf",
                           sep = "")
        dev.copy2pdf(file = splotsortq,width=15, height=10)


        pc.all <- prcomp(X, center = FALSE, scale = FALSE)
        p.v <- matrix(((pc.all$sdev^2)/(sum(pc.all$sdev^2))), ncol = 1)
        p.i <- round(p.v * 100, 1)
        p.z <- matrix(1, nrow(p.i), 1)
        p.f <- cbind(p.i, p.z)
        dirout.pca = paste(dirout, "PCA_OPLS/", sep = "")
        dir.create(dirout.pca)
        write.csv(p.f, paste(dirout.pca, "PCA_P_OPLS", sep = ""))
        write.csv(pc.all$x, paste(dirout.pca, "PCA_OPLS_ScoreMatrix.csv",
                                  sep = ""))
        write.csv(pc.all$rotation, paste(dirout.pca, "PCA_OPLS_LoadingsMatrix.csv",
                                         sep = ""))
        cum = p.f[1, 1] + p.f[2, 1]
        lim = c()
        max.pc1 = 1.3 * (max(abs(pc.all$x[, 1])))
        max.pc2 = 1.3 * (max(abs(pc.all$x[, 2])))
        if (max.pc1 > max.pc2) {
            lim = c(-max.pc1, max.pc1)
        }
        else {
            lim = c(-max.pc2, max.pc2)
        }
        pca <- paste("PC1 (", p.f[1, 1], ") %")
        pcb <- paste("PC2 (", p.f[2, 1], ") %")
        xlab = c(pca)
        ylab = c(pcb)
        D <- paste(dirout.pca, "PCA_OPLS_ScorePlot.pdf", sep = "")
        pdf(D)
        plot(pc.all$x[, 1], pc.all$x[, 2], col = col, xlab = xlab,
             ylab = ylab, xlim = lim, ylim = lim, pch = marks2, cex=2, sub = paste("Cumulative Proportion of Variance Explained = ",
                                                                                   cum, "%", sep = ""), main = "PCA Score Plot on orthogonal-deflated X")
        axis(1, at = lim * 2, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        axis(2, at = lim * 2, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        legend("topleft",legend = groupnames2, pch = unique(marks2), col = unique(col),cex=1)

        dataEllipse(pc.all$x[, 1], pc.all$x[, 2], levels = c(0.95),
                    add = TRUE, col = "black", lwd = 0.4, plot.points = FALSE,
                    center.cex = 0.2)
        text(pc.all$x[, 1], pc.all$x[, 2], col = col, cex = 0.5,
             labels = rownames(sorted), pos = 1)
        dev.off()
        pca.load <- paste("Loading PC1 (", p.f[1, 1], ") %")
        pcb.load <- paste("Loading PC2 (", p.f[2, 1], ") %")
        Max.pc1 = 1.1 * (max(pc.all$rotation[, 1]))
        Min.pc1 = 1.1 * (min(pc.all$rotation[, 1]))
        Mpc1 = c(Min.pc1 * 2, Max.pc1 * 2)
        Max.pc2 = 1.1 * (max(pc.all$rotation[, 2]))
        Min.pc2 = 1.1 * (min(pc.all$rotation[, 2]))
        Mpc2 = c(Min.pc2 * 2, Max.pc2 * 2)
        E = paste(dirout.pca, "PCA_OPLS_LoadingPlot.pdf", sep = "")
        pdf(E)
        plot(pc.all$rotation[, 1], pc.all$rotation[, 2], xlab = pca.load,
             ylab = pcb.load, xlim = c(Min.pc1, Max.pc1), ylim = c(Min.pc2,
                                                                   Max.pc2), main = "PCA Loading Plot on orthogonal-deflated X",
             sub = paste("Cumulative Proportion of Variance Explained = ",
                         cum, "%", sep = ""))
        axis(1, at = Mpc1, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        axis(2, at = Mpc2, pos = c(0, 0), labels = FALSE, col = "grey",
             lwd = 0.7)
        text(pc.all$rotation[, 1], pc.all$rotation[, 2], labels = rownames(pc.all$rotation),
             cex = 0.6, pos = 1)
        dev.off()
    }

for (i in 1:(count-1)){
    for (j in (i+1):count){
        OP=c(rep(1,Group[i,3]),rep(2,Group[j,3]))
        dataOP=cbind(OP,dataSetM2[c((Group[i,1]:Group[i,2]),(Group[j,1]:Group[j,2])),])
        colors2=color[c(Group[i,1],Group[j,1])]
        marks2=marks[c((Group[i,1]:Group[i,2]),(Group[j,1]:Group[j,2]))]
        groupnames2=groupnames[c(i,j)]
        r.names=row.names(dataOP)
        dataOP=data.frame(cbind(r.names,dataOP))
        row.names(dataOP)=NULL
        noteOP=paste(name,"_",groupnames[i],"_",groupnames[j],sep="")
        explore.data2(dataOP, scaling="p",scal=TRUE, normalize = FALSE, imputation = FALSE,color=colors2)
        OPLSDA2("p",axx=axx,top=top,qu=qu,color=colors2,marks2,groupnames2)
        #assign(paste("OP",i,j,sep=""),get("OP"), inherits=T)
        #assign(paste("dataOP",i,j,sep=""),get("dataOP"), inherits=T)
        remove(OP)
        remove(dataOP)
        remove(noteOP)
        remove(r.names)
        dev.off()
    }
}

    setwd(dirout)
}
AlzbetaG/Metabol documentation built on May 31, 2019, 12:39 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
AlzbetaG/Metabol
The statistical analysis of metabolomic data

R/GraphsOPLSDA.R
In AlzbetaG/Metabol: The statistical analysis of metabolomic data

R Package Documentation

Browse R Packages

We want your feedback!

AlzbetaG/Metabol The statistical analysis of metabolomic data

R/GraphsOPLSDA.R In AlzbetaG/Metabol: The statistical analysis of metabolomic data

R Package Documentation

Browse R Packages

We want your feedback!

AlzbetaG/Metabol
The statistical analysis of metabolomic data

R/GraphsOPLSDA.R
In AlzbetaG/Metabol: The statistical analysis of metabolomic data