R/aracne.r

Defines functions readAracneAdj summary.regulon print.regulon aracne2regulon4cnv aracne2regulon

Documented in aracne2regulon aracne2regulon4cnv

#' Regulon object generation from ARACNe results
#' 
#' This function generates a regulon object from ARACNe results and the corresponding expression dataset
#' 
#' @param afile Character string indicating the name of the ARACNe network file
#' @param eset Either a character string indicating the name of the expression-dataset file, a ExpressionSet object or a gene expression matrix with genes (probes) in rows and samples in columns
#' @param gene Logical, whether the probes should be collapsed at the gene level
#' @param format Character string, indicating the format of the aracne file, either adj for adjacency matrixes generated by aracne, or 3col when the interactome is represented by a 3 columns text file, with regulator in the first column, target in the second and mutual information in the third column
#' @param verbose Logical, whether progression messages should be printed in the terminal.
#' @return Regulon object
#' @seealso \code{\link{msviper}}, \code{\link{viper}}
#' @examples
#' data(bcellViper, package="bcellViper")
#' adjfile <- file.path(find.package("bcellViper"), "aracne", "bcellaracne.adj")
#' regul <- aracne2regulon(adjfile, dset)
#' print(regul)
#' @export
aracne2regulon <- function(afile, eset, gene = FALSE, format=c("adj", "3col"), verbose = TRUE) {
    format <- match.arg(format)
    if (is(eset, "ExpressionSet")) eset <- exprs(eset)
    if (verbose) message("\nLoading the dataset...")
    if (length(eset)==1) {
        tmp <- strsplit(readLines(eset), "\t")
        dset <- t(sapply(tmp[-1], function(x) as.numeric(x[-(1:2)])))
        colnames(dset) <- tmp[[1]][-(1:2)]
        rownames(dset) <- sapply(tmp[-1], function(x) x[1])
        annot <- t(sapply(tmp[-1], function(x) x[1:2]))
    }
    else {
        dset <- eset
        annot <- rownames(eset)
        names(annot) <- rownames(eset)
        rm(eset)
    }
#Collapsing interactomes
    switch(format,
    adj={
        aracne <- readAracneAdj(afile)
    },
    "3col"={
        tmp <- t(sapply(strsplit(readLines(afile), "\t"), function(x) x[1:3]))
        aracne <- data.frame(tf=tmp[, 1], target=tmp[, 2], mi=as.numeric(tmp[, 3])/max(as.numeric(tmp[, 3])))
    })
    if (gene) {
        if (verbose) message("Collapsing the interactomes to the gene level...")
        tmp <- aracne[order(aracne$mi, decreasing=TRUE), ]
        tmp$tf <- annot[match(tmp$tf, annot[, 1]), 2]
        tmp$target <- annot[match(tmp$target, annot[, 1]), 2]
        aracne <- tmp[!duplicated(paste(tmp$tf, tmp$target, sep="_")), ]
#Generating the gene centric datasets
        rownames(dset) <- annot[match(rownames(dset), annot[, 1]), 2]
        dset <- filterCV(dset)
    }
    if (verbose) message("Generating the regulon objects...")
    tmp <- aracne[!is.na(aracne$mi), ]
    tmp <- tmp[rowSums(matrix(as.matrix(tmp[, 1:2]) %in% rownames(dset), nrow(tmp), 2))==2, ]
    aracne <- tapply(1:nrow(tmp), as.vector(tmp$tf), function(pos, tmp) {
        tfmode <- rep(0, length(pos))
        names(tfmode) <- tmp$target[pos]
        list(tfmode=tfmode, likelihood=tmp$mi[pos])
    }, tmp=tmp)
    names(aracne) <- levels(factor(as.vector(tmp$tf)))
    aracne <- TFmode1(aracne, dset)
    rm(dset)
# removing missing data from the aracne regulon
    aracne <- aracne[names(aracne) != "NA"]
    aracne <- lapply(aracne, function(x) {
        filtro <- !(names(x$tfmode)=="NA" | is.na(x$tfmode) | is.na(x$likelihood))
        x$tfmode <- x$tfmode[filtro]
        x$likelihood <- x$likelihood[filtro]
        return(x)
    })
    aracne <- aracne[sapply(aracne, function(x) length(names(x$tfmode)))>0]
    regul <- TFscore(aracne, verbose=verbose)
    class(regul) <- "regulon"
    return(regul)
}

#' Regulon object generation from ARACNe results corrected by cnv
#' 
#' This function generates a regulon object from ARACNe results and the corresponding expression dataset when correction for CNV have been applied
#' 
#' @param afile Character string indicating the name of the ARACNe network file
#' @param eset Either a character string indicating the name of the expression-dataset file, a ExpressionSet object or a gene expression matrix with genes (probes) in rows and samples in columns, where the expression was corrected by CNV
#' @param regeset Either a character string indicating the name of the expression-dataset file, a ExpressionSet object or a gene expression matrix with genes (probes) in rows and samples in columns
#' @param gene Logical, whether the probes should be collapsed at the gene level
#' @param format Character string, indicating the format of the aracne file, either adj for adjacency matrixes generated by aracne, or 3col when the interactome is represented by a 3 columns text file, with regulator in the first column, target in the second and mutual information in the third column
#' @param verbose Logical, whether progression messages should be printed in the terminal.
#' @return Regulon object
#' @seealso \code{\link{msviper}}, \code{\link{viper}}
#' @examples
#' data(bcellViper, package="bcellViper")
#' adjfile <- file.path(find.package("bcellViper"), "aracne", "bcellaracne.adj")
#' regul <- aracne2regulon(adjfile, dset)
#' print(regul)
#' @export
aracne2regulon4cnv <- function(afile, eset, regeset, gene = FALSE, format=c("adj", "3col"), verbose = TRUE) {
    format <- match.arg(format)
    if (is(eset, "ExpressionSet")) eset <- exprs(eset)
    if (verbose) message("\nLoading the dataset...")
    if (length(eset)==1) {
        tmp <- strsplit(readLines(eset), "\t")
        dset <- t(sapply(tmp[-1], function(x) as.numeric(x[-(1:2)])))
        colnames(dset) <- tmp[[1]][-(1:2)]
        rownames(dset) <- sapply(tmp[-1], function(x) x[1])
        annot <- t(sapply(tmp[-1], function(x) x[1:2]))
        rm(eset)
    }
    else {
        dset <- eset
        annot <- rownames(eset)
        names(annot) <- rownames(eset)
        rm(eset)
    }
    if (is(regeset, "ExpressionSet")) regeset <- exprs(regeset)
    if (verbose) message("\nLoading the dataset...")
    if (length(regeset)==1) {
        tmp <- strsplit(readLines(regeset), "\t")
        regdset <- t(sapply(tmp[-1], function(x) as.numeric(x[-(1:2)])))
        colnames(regdset) <- tmp[[1]][-(1:2)]
        rownames(regdset) <- sapply(tmp[-1], function(x) x[1])
        annot <- t(sapply(tmp[-1], function(x) x[1:2]))
        rm(regeset)
    }
    else {
        regdset <- regeset
        annot <- rownames(regeset)
        names(annot) <- rownames(regeset)
        rm(regeset)
    }
    # Making dset and regdset compatible
    genes <- intersect(rownames(dset), rownames(regdset))
    samp <- intersect(colnames(dset), colnames(regdset))
    dset <- dset[match(genes, rownames(dset)), ][, match(samp, colnames(dset))]
    regdset <- regdset[match(genes, rownames(regdset)), ][, match(samp, colnames(regdset))]
    #Collapsing interactomes
    switch(format,
           adj={
               aracne <- readAracneAdj(afile)
           },
           "3col"={
               tmp <- t(sapply(strsplit(readLines(afile), "\t"), function(x) x[1:3]))
               aracne <- data.frame(tf=tmp[, 1], target=tmp[, 2], mi=as.numeric(tmp[, 3])/max(as.numeric(tmp[, 3])))
           })
    if (gene) {
        if (verbose) message("Collapsing the interactomes to the gene level...")
        tmp <- aracne[order(aracne$mi, decreasing=TRUE), ]
        tmp$tf <- annot[match(tmp$tf, annot[, 1]), 2]
        tmp$target <- annot[match(tmp$target, annot[, 1]), 2]
        aracne <- tmp[!duplicated(paste(tmp$tf, tmp$target, sep="_")), ]
        #Generating the gene centric datasets
        rownames(dset) <- rownames(regdset) <- annot[match(rownames(dset), annot[, 1]), 2]
        dset <- filterCV(dset)
        regdset <- filterCV(regdset)        
    }
    if (verbose) message("Generating the regulon objects...")
    tmp <- aracne[!is.na(aracne$mi), ]
    tmp <- tmp[rowSums(matrix(as.matrix(tmp[, 1:2]) %in% rownames(dset), nrow(tmp), 2))==2, ]
    aracne <- tapply(1:nrow(tmp), tmp$tf, function(pos, tmp) {
        tfmode <- rep(0, length(pos))
        names(tfmode) <- tmp$target[pos]
        list(tfmode=tfmode, likelihood=tmp$mi[pos])
    }, tmp=tmp)
    names(aracne) <- levels(tmp$tf)
    aracne <- TFmode2(aracne, dset, regdset)
    rm(dset)
    # removing missing data from the aracne regulon
    aracne <- aracne[names(aracne) != "NA"]
    aracne <- lapply(aracne, function(x) {
        filtro <- !(names(x$tfmode)=="NA" | is.na(x$tfmode) | is.na(x$likelihood))
        x$tfmode <- x$tfmode[filtro]
        x$likelihood <- x$likelihood[filtro]
        return(x)
    })
    aracne <- aracne[sapply(aracne, function(x) length(names(x$tfmode)))>0]
    regul <- TFscore(aracne, verbose=verbose)
    class(regul) <- "regulon"
    return(regul)
}

#' @method print regulon
#' @export
print.regulon <- function(x, ...) {
    targets <- unlist(lapply(x, function(x) names(x$tfmode)), use.names=FALSE)
    cat("Object of class regulon with ", length(x), " regulators, ", length(unique(targets)), " targets and ", length(targets), " interactions\n", sep="")
}

#' @method summary regulon
#' @export
summary.regulon <- function(object, ...) {
    targets <- unlist(lapply(object, function(x) names(x$tfmode)), use.names=FALSE)
    c(Regulators=length(object), Targets=length(unique(targets)), Interactions=length(targets))
}

TFmode1 <- function (regulon, expset, method = "spearman") {
    expset <- filterCV(expset)
    regulon <- updateRegulon(regulon)
    regulon <- regulon[names(regulon) %in% rownames(expset)]
    regulon <- lapply(regulon, function(x, genes) {
        filtro <- names(x$tfmode) %in% genes
        x$tfmode <- x$tfmode[filtro]
        if (length(x$likelihood) == length(filtro)) 
        x$likelihood <- x$likelihood[filtro]
        return(x)
    }, genes = rownames(expset))
    tf <- unique(names(regulon))
    tg <- unique(unlist(lapply(regulon, function(x) names(x$tfmode)), use.names = FALSE))
    cmat <- cor(t(expset[rownames(expset) %in% tf, ]), t(expset[rownames(expset) %in% tg, ]), method = method)
    reg <- lapply(1:length(regulon), function(i, regulon, cmat) {
        tfscore <- cmat[which(rownames(cmat) == names(regulon)[i]), match(names(regulon[[i]]$tfmode), colnames(cmat))]
        list(tfmode = tfscore, likelihood = regulon[[i]]$likelihood)
    }, regulon = regulon, cmat = cmat)
    names(reg) <- names(regulon)
    return(reg)
}

TFmode2 <- function (regulon, expset, regexpset, method = "spearman") {
    tf <- unique(names(regulon))
    tg <- unique(unlist(lapply(regulon, function(x) names(x$tfmode)), use.names = FALSE))
    cmat <- cor(t(regexpset[rownames(regexpset) %in% tf, ]), t(expset[rownames(expset) %in% tg, ]), method = method)
    reg <- lapply(1:length(regulon), function(i, regulon, cmat) {
        tfscore <- cmat[which(rownames(cmat) == names(regulon)[i]), match(names(regulon[[i]]$tfmode), colnames(cmat))]
        list(tfmode = tfscore, likelihood = regulon[[i]]$likelihood)
    }, regulon = regulon, cmat = cmat)
    names(reg) <- names(regulon)
    return(reg)
}

TFscore <- function (regul, mu = NULL, sigma = NULL, verbose=TRUE) {
    if (length(mu) == 3 & length(sigma) == 3)
        fit <- list(mu = mu, sigma = sigma)
    else {
        tmp <- unlist(lapply(regul, function(x) x$tfmode), use.names = FALSE)
        fit <- list(mu = c(-0.5, 0, 0.5), sigma = c(0.15, 0.25, 0.15), lambda = c(0.2, 0.4, 0.4), all.loglik = rep(0, 1001))
        count <- 0
        while (length(fit$all.loglik) > 1000 & count < 3) {
            fit <- normalmixEM(tmp, mu = fit$mu, sigma = fit$sigma, lambda = fit$lambda, mean.constr = c(NA, 0, NA), maxit = 1000, verb = FALSE)
            count <- count + 1
        }
    }
    if (verbose) message("mu: ", paste(fit$mu, collapse = ", "), ". sigma: ", paste(fit$sigma, collapse = ", "))
    regul <- lapply(regul, function(x, fit) {
        g2 <- pnorm(x$tfmode, fit$mu[3], fit$sigma[3], lower.tail = TRUE)
        g1 <- pnorm(x$tfmode, fit$mu[1], fit$sigma[1], lower.tail = FALSE)
        g0 <- pnorm(x$tfmode, fit$mu[2], fit$sigma[2], lower.tail = FALSE)
        g00 <- pnorm(x$tfmode, fit$mu[2], fit$sigma[2], lower.tail = TRUE)
        x$tfmode <- g2/(g1 + g0 + g2) * (x$tfmode >= 0) - g1/(g1 + g00 + g2) * (x$tfmode < 0)
        return(x)
    }, fit = fit)
    return(regul)
}

readAracneAdj <- function(fname) {
    tmp <- readLines(fname)
    pos <- grep(">", tmp)
    if (length(pos)>0) tmp <- tmp[-pos]
    tmp <- strsplit(tmp, "\t")
    nom <- sapply(tmp, function(x) x[1])
    tmp <- lapply(tmp, function(x) matrix(x[-1], length(x[-1])/2, 2, byrow=TRUE))
    aracne <- data.frame(tf=rep(nom, sapply(tmp, nrow)), target=unlist(lapply(tmp, function(x) x[, 1]), use.names=FALSE), mi=as.numeric(unlist(lapply(tmp, function(x) x[, 2]), use.names=FALSE)))
    return(aracne)
}

Try the viper package in your browser

Any scripts or data that you put into this service are public.

viper documentation built on Nov. 8, 2020, 7:37 p.m.