R/amaretto_run.R

Defines functions Lambda_Sequence AMARETTO_CreateRegulatorPrograms AMARETTO_CreateModuleData AMARETTO_ReassignGenesToClusters AMARETTO_LearnRegulatoryProgramsLarsen AMARETTO_LarsenBased

Documented in AMARETTO_CreateModuleData AMARETTO_CreateRegulatorPrograms AMARETTO_LarsenBased AMARETTO_LearnRegulatoryProgramsLarsen AMARETTO_ReassignGenesToClusters Lambda_Sequence

#' AMARETTO_LarsenBased
#' 
#' @param Data 
#' @param Clusters 
#' @param RegulatorData 
#' @param Parameters 
#' @param NrCores 
#' @param random_seeds 
#' @param convergence_cutoff 
#'
#' @import MultiAssayExperiment
#' @import graphics
#' @importFrom Matrix nnzero
#' @importFrom doParallel registerDoParallel
#' 
#' @return result
#' @keywords internal
AMARETTO_LarsenBased <- function(Data, Clusters, RegulatorData, Parameters, NrCores, random_seeds, convergence_cutoff) {
    registerDoParallel(cores = NrCores)
    ptm1 <- proc.time()
    RegulatorData_rownames = rownames(RegulatorData)
    Data_rownames = rownames(Data)
    AutoRegulation = Parameters$AutoRegulation
    RegulatorSign = array(0, length(RegulatorData_rownames))
    Lambda = Parameters$Lambda2
    OneRunStop = Parameters$OneRunStop
    random_seeds = Parameters$random_seeds
    convergence_cutoff = Parameters$convergence_cutoff
    if (AutoRegulation == 1) {
        cat("\tAutoregulation is turned ON.\n")
    } else if (AutoRegulation == 2) {
        cat("\tAutoregulation is turned ON.\n")
    } else {
        cat("\tAutoregulation is turned OFF.\n")
    }
    NrReassignGenes = length(Data_rownames)
    NrReassignGenes_history <- NrReassignGenes
    error_history<-list()
    index=1
    while (NrReassignGenes > convergence_cutoff * length(Data_rownames)) {
        ptm <- proc.time()
        switch(Parameters$Mode, larsen = {
            regulatoryPrograms <- AMARETTO_LearnRegulatoryProgramsLarsen(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha = Parameters$alpha, pmax = Parameters$pmax, random_seeds)
        })
        error_history[[index]]<-(rowMeans(regulatoryPrograms$error * regulatoryPrograms$error))^0.5
        index<-index+1
        ptm <- proc.time() - ptm
        printf("Elapsed time is %f seconds\n", ptm[3])
        NrClusters = length(unique(Clusters))
        sum = 0
        for (i in 1:NrClusters) {
            sum = sum + Matrix::nnzero(regulatoryPrograms$Beta[i,])
        }
        avg = sum/NrClusters
        printf("Average nr of regulators per module: %f \n", avg)
        PreviousClusters = Clusters
        if (OneRunStop == 1) {
            break
        }
        ptm <- proc.time()
        ReassignGenesToClusters <- AMARETTO_ReassignGenesToClusters(Data, RegulatorData, regulatoryPrograms$Beta, Clusters, AutoRegulation)
        ptm <- proc.time() - ptm
        printf("Elapsed time is %f seconds\n", ptm[3])
        NrReassignGenes = ReassignGenesToClusters$NrReassignGenes
        Clusters = ReassignGenesToClusters$Clusters
        printf("Nr of reassignments is: %i \n", NrReassignGenes)
        NrReassignGenes_history <- c(NrReassignGenes_history, NrReassignGenes)
    }
    ptm1 <- proc.time() - ptm1
    printf("Elapsed time is %f seconds\n", ptm1[3])
    ModuleMembership = as.matrix(PreviousClusters)
    rownames(ModuleMembership) = rownames(Data)
    colnames(ModuleMembership) = c("ModuleNr")
    result <- list(NrModules = length(unique(Clusters)), RegulatoryPrograms = regulatoryPrograms$Beta, AllRegulators = rownames(RegulatorData),
                   AllGenes = rownames(Data), ModuleMembership = ModuleMembership, AutoRegulationReport = regulatoryPrograms$AutoRegulationReport,
                   run_history = list(NrReassignGenes_history = NrReassignGenes_history, error_history = error_history))
    return(result)
}

#' AMARETTO_LearnRegulatoryProgramsLarsen
#' @importFrom foreach foreach
#' @importFrom glmnet cv.glmnet
#' @importFrom stats coef
#' @return result
#' @keywords internal
AMARETTO_LearnRegulatoryProgramsLarsen <- function(Data, Clusters, RegulatorData, RegulatorSign, Lambda, AutoRegulation, alpha, pmax, random_seeds) {
    `%dopar%` <- foreach::`%dopar%`
    RegulatorData_rownames = rownames(RegulatorData)
    Data_rownames = rownames(Data)
    trace = 0
    NrFolds = 10
    NrClusters = length(unique(Clusters))
    NrGenes = nrow(Data)
    NrSamples = ncol(Data)
    NrInterpolateSteps = 100
    if (AutoRegulation >= 1) {}
    else if (AutoRegulation == 0) {
        BetaSpecial = list(NrClusters, 1)
        RegulatorPositions = list(NrClusters, 1)
    }
    y_all = mat.or.vec(NrClusters, NrSamples)
    ClusterIDs = unique(Clusters)
    ClusterIDs = sort(ClusterIDs, decreasing = FALSE)
    cnt <- 1:NrClusters
    ptm1 <- proc.time()
    BetaY_all <- foreach(i = 1:NrClusters, .combine = cbind, .init = list(list(), list(), list()), .packages = "glmnet") %dopar% {
            if (length(which(Clusters == ClusterIDs[i])) > 1) {
                y = apply((Data[which(Clusters == ClusterIDs[i]),]), 2, mean)
            } else {
                y = Data[which(Clusters == ClusterIDs[i]),]
            }
            CurrentClusterPositions = which(Clusters %in% ClusterIDs[i])
            nrGenesInClusters = length(CurrentClusterPositions)
            if (AutoRegulation >= 1) {
                X = RegulatorData
            } else if (AutoRegulation == 0) {
                X = RegulatorData[setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]),]
            }
            suppressWM = function(...) suppressWarnings(suppressMessages(...))
            
            if(!is.null(random_seeds)){
              seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
              set.seed(seed)
            }
            fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
            nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
            nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
            if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
                warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients.")
                message(warnMessage)
            }
            bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
            b_o = coef(fit, s = bestNonZeroLambda)
            b_opt <- c(b_o[2:length(b_o)])
            if (AutoRegulation == 2) {
                CurrentUsedRegulators = RegulatorData_rownames[which(b_opt != 0, arr.ind = TRUE)]
                CurrentClusterMembers = Data_rownames[CurrentClusterPositions]
                nrIterations = 0
                while (length(CurrentClusterMembers[CurrentClusterMembers %in% CurrentUsedRegulators]) != 0) {
                  CurrentClusterMembers = setdiff(CurrentClusterMembers, CurrentUsedRegulators)
                  nrCurrentClusterMembers = length(CurrentClusterMembers)
                  if (nrCurrentClusterMembers > 0) {
                    names = Data_rownames %in% CurrentClusterMembers
                    if (length(which(names == TRUE)) > 1) {
                      y = apply((Data[names, ]), 2, mean)
                    } else {
                      y = Data[names, ]
                    }
                    if(!is.null(random_seeds)){
                      seed<-ifelse (length(random_seeds)==2,random_seeds[2],random_seeds[1])
                      set.seed(seed)
                    }
                    fit = suppressWM(cv.glmnet(t(X), y, alpha = alpha, pmax = pmax, lambda = Lambda_Sequence(t(X), y)))
                    nonZeroLambdas <- fit$lambda[which(fit$nzero > 0)]
                    nonZeroCVMs <- fit$cvm[which(fit$nzero > 0)]
                    if (length(which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))) == 0) {
                      warnMessage <- paste0("\nOn cluster ", i, " there were no cv.glm results that gave non-zero coefficients during the Autoregulation step.")
                      message(warnMessage)
                    }
                    bestNonZeroLambda <- nonZeroLambdas[which(nonZeroCVMs == min(nonZeroCVMs, na.rm = TRUE))]
                    new_b_o = coef(fit, s = bestNonZeroLambda)
                    new_b_opt <- c(new_b_o[2:length(b_o)])
                    CurrentUsedRegulators = RegulatorData_rownames[which(new_b_opt != 0)]
                    nrIterations = nrIterations + 1
                    b_opt = new_b_opt
                  } else {
                    b_opt = rep(0, length(RegulatorData_rownames))
                  }
                }
                Report <- c(length(CurrentClusterPositions), length(CurrentClusterMembers), nrIterations)
            }
            if (sum(RegulatorSign[which(RegulatorSign != 0)]) > 0) {
                RegulatorCheck = RegulatorSign * t(b_opt)
                WrongRegulators = which(RegulatorCheck < 0)
                if (length(WrongRegulators) == 0) {
                  b_opt[WrongRegulators] = 0
                }
            }
            if (AutoRegulation >= 1) {
            } else {
                BetaSpecial[i] = b_opt
                RegulatorPositions[i] = (RegulatorData_rownames %in% setdiff(RegulatorData_rownames, Data_rownames[CurrentClusterPositions]))
            }
            list(b_opt, y, Report)
        }
    if (AutoRegulation == 0) {
        for (i in 1:NrClusters) {
            Beta[i, RegulatorPositions[i]] = BetaSpecial[i]
        }
    }
    tmpPos = NrClusters + 1
    Beta <- do.call(cbind, BetaY_all[1, 2:tmpPos])
    Beta = t(Beta)
    colnames(Beta) = RegulatorData_rownames
    rownames(Beta) = gsub("result.", "Module_", rownames(Beta))
    y_all <- do.call(cbind, BetaY_all[2, 2:tmpPos])
    y_all = t(y_all)
    rownames(y_all) = gsub("result.", "Module_", rownames(y_all))
    AutoRegulationReport <- do.call(cbind, BetaY_all[3, 2:tmpPos])
    AutoRegulationReport = t(AutoRegulationReport)
    rownames(AutoRegulationReport) = gsub("result.", "Module_", rownames(AutoRegulationReport))
    error = y_all - (Beta %*% RegulatorData)
    result <- list(Beta = Beta, error = error, AutoRegulationReport = AutoRegulationReport)
    return(result)
}

#' AMARETTO_ReassignGenesToClusters
#'
#' @return result
#' @importFrom Matrix nnzero
#' @importFrom stats cor
#' @keywords internal
AMARETTO_ReassignGenesToClusters <- function(Data, RegulatorData, Beta, Clusters, AutoRegulation) {
    `%dopar%` <- foreach::`%dopar%`
    RegulatorData_rownames = rownames(RegulatorData)
    Data_rownames = rownames(Data)
    NrGenes = nrow(Data)
    NrSamples = ncol(Data)
    NrReassignGenes = 0
    X = RegulatorData
    X1 = data.matrix(X)
    ModuleVectors = Beta %*% X1
    GeneNames = rownames(Data)
    ptm1 <- proc.time()
    i <- NULL
    nc <- foreach(i = 1:NrGenes, .combine = c) %dopar%
        {
            OldModule = Clusters[i]
            CurrentGeneVector = Data[i, , drop = FALSE]
            Correlations = cor(t(CurrentGeneVector), t(ModuleVectors))
            corr = data.matrix(Correlations, rownames.force = NA)
            MaxCorrelation = max(corr, na.rm = TRUE)
            MaxPosition = which(signif(corr, digits = 7) == signif(MaxCorrelation, digits = 7))
            MaxPosition = MaxPosition[1]
            if (AutoRegulation > 0) {
                if (MaxPosition != OldModule) {
                  NrReassignGenes = NrReassignGenes + 1
                }
                NewClusters = MaxPosition
            } else {
                if (nnzero(rownames(RegulatorData_rownames) %in% GeneNames[i]) != 0) {
                  if (nnzero(which(which(GeneNames %in% rownames(RegulatorData_rownames)) %in% i) %in% which(Beta[MaxPosition,] != 0)) != 0) {
                    if (MaxPosition != OldModule) {
                      NrReassignGenes = NrReassignGenes + 1
                    }
                    NewClusters = MaxPosition
                  } else {
                    NewClusters = OldModule
                  }
                } else {
                  if (MaxPosition != OldModule) {
                    NrReassignGenes = NrReassignGenes + 1
                  }
                  NewClusters = MaxPosition
                }
            }
        }
    ptm1 <- proc.time() - ptm1
    NrReassignGenes = length(which(nc != Clusters))
    result <- list(NrReassignGenes = NrReassignGenes, Clusters = nc)
    return(result)
}

#' AMARETTO_CreateModuleData
#'
#' @param AMARETTOinit List output from AMARETTO_Initialize().
#' @param AMARETTOresults List output from AMARETTO_Run()
#'
#' @return result
#' @export
#' @examples
#' data('ProcessedDataLIHC')
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
#'                                     NrModules = 2, VarPercentage = 50)
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
#' AMARETTO_MD <- AMARETTO_CreateModuleData(AMARETTOinit, AMARETTOresults)
AMARETTO_CreateModuleData <- function(AMARETTOinit, AMARETTOresults) {
    ModuleData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
    rownames(ModuleData) = rownames(AMARETTOresults$AutoRegulationReport)
    colnames(ModuleData) = colnames(AMARETTOinit$MA_matrix_Var)
    for (ModuleNr in 1:AMARETTOresults$NrModules) {
        currentModuleData = AMARETTOinit$MA_matrix_Var[AMARETTOresults$ModuleMembership[, 1] == ModuleNr, ]
        if (length(which(AMARETTOresults$ModuleMembership[, 1] == ModuleNr)) > 1) {
            ModuleData[ModuleNr, ] = colMeans(currentModuleData)
        } else {
            ModuleData[ModuleNr, ] = currentModuleData
        }
    }
    return(ModuleData)
}

#' AMARETTO_CreateRegulatorPrograms
#'
#' @param AMARETTOinit  List output from AMARETTO_Initialize().
#' @param AMARETTOresults List output from AMARETTO_Run()
#'
#' @return result
#' @export
#' @examples
#' data('ProcessedDataLIHC')
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
#'                                     NrModules = 2, VarPercentage = 50)
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
#' AMARETTO_RP <- AMARETTO_CreateRegulatorPrograms(AMARETTOinit,AMARETTOresults)
AMARETTO_CreateRegulatorPrograms <- function(AMARETTOinit, AMARETTOresults) {
    RegulatorProgramData = matrix(0, AMARETTOresults$NrModules, length(colnames(AMARETTOinit$MA_matrix_Var)))
    rownames(RegulatorProgramData) = rownames(AMARETTOresults$AutoRegulationReport)
    colnames(RegulatorProgramData) = colnames(AMARETTOinit$MA_matrix_Var)
    RegulatorNames = rownames(AMARETTOinit$RegulatorData)
    for (ModuleNr in 1:AMARETTOresults$NrModules) {
        currentRegulators = RegulatorNames[which(AMARETTOresults$RegulatoryPrograms[ModuleNr, ] != 0)]
        weights = AMARETTOresults$RegulatoryPrograms[ModuleNr, currentRegulators]
        RegulatorData = AMARETTOinit$RegulatorData[currentRegulators, ]
        RegulatorProgramData[ModuleNr, ] = weights %*% RegulatorData
    }
    return(RegulatorProgramData)
}


#' Lambda_Sequence
#'
#' @return result
#' @keywords internal
Lambda_Sequence <- function(sx, sy) {
    n <- nrow(sx)
    lambda_max <- max(abs(colSums(sx * sy)))/n
    epsilon <- 1e-04
    K <- 100
    lambdaseq <- round(exp(seq(log(lambda_max), log(lambda_max * epsilon), length.out = K)), digits = 10)
    return(lambdaseq)
}
gevaertlab/AMARETTO documentation built on April 6, 2023, 2:43 p.m.