#'
#' data es un dataframe donde la primera columna son loa symbol y la segunda
#' los entrez id
customKegg <- function(data, universe = NULL, restrict.universe = FALSE,
species = "Hs", species.KEGG = "hsa", convert = FALSE, gene.pathway = NULL,
pathway.names = NULL, prior.prob = NULL, covariate = NULL,
plot = FALSE, ...) {
if( !is.data.frame(data)){
stop("de should be a data.frame with firt column as symbol Id and second
column as entrez Id.")
}
if( dim(data)[2] != 2){
stop("de should be a data.frame with firt column as symbol
Id and second column as entrez Id.")
}
names(data) <- c("SYMBOL","ENTREZID")
de <- data
de <- as.vector(de[,2])
de <- list(DE = de)
nsets <- length(de)
if (!all(vapply(de, is.vector, TRUE))) {
stop("components of de should be vectors")
}
for (s in 1:nsets) de[[s]] <- unique(as.character(de[[s]]))
names(de) <- trimWhiteSpace(names(de))
NAME <- names(de)
i <- which(NAME == "" | is.na(NAME))
NAME[i] <- paste0("DE", i)
names(de) <- makeUnique(NAME)
if (is.null(species.KEGG)) {
species <- match.arg(species, c("Ag", "At", "Bt", "Ce",
"Dm", "Dr", "EcK12", "EcSakai", "Gg", "Hs", "Mm",
"Mmu", "Pf", "Pt", "Rn", "Ss", "Xl"))
species.KEGG <- switch(species, Ag = "aga", At = "ath",
Bt = "bta", Ce = "cel", Cf = "cfa", Dm = "dme", Dr = "dre",
EcK12 = "eco", EcSakai = "ecs", Gg = "gga", Hs = "hsa",
Mm = "mmu", Mmu = "mcc", Pf = "pfa", Pt = "ptr",
Rn = "rno", Ss = "ssc", Xl = "xla")
}
if (is.null(gene.pathway)) {
GeneID.PathID <- getGeneKEGGLinks(species.KEGG, convert = convert)
} else {
GeneID.PathID <- gene.pathway
d <- dim(GeneID.PathID)
if (is.null(d)) {
stop("gene.pathway must be data.frame or matrix")
}
if (d[2] < 2) {
stop("gene.pathway must have at least 2 columns")
}
isna <- rowSums(is.na(GeneID.PathID[, 1:2])) > 0.5
GeneID.PathID <- GeneID.PathID[!isna, ]
ID.ID <- paste(GeneID.PathID[, 1], GeneID.PathID[, 2],
sep = ".")
if (anyDuplicated(ID.ID)) {
d <- duplicated(ID.ID)
GeneID.PathID <- GeneID.PathID[!d, ]
}
}
if (is.null(pathway.names)) {
PathID.PathName <- getKEGGPathwayNames(species.KEGG,
remove.qualifier = TRUE)
} else {
PathID.PathName <- pathway.names
d <- dim(PathID.PathName)
if (is.null(d)) {
stop("pathway.names must be data.frame or matrix")
}
if (d[2] < 2) {
stop("pathway.names must have at least 2 columns")
}
isna <- rowSums(is.na(PathID.PathName[, 1:2])) > 0.5
PathID.PathName <- PathID.PathName[!isna, ]
}
if (is.null(universe)) {
universe <- unique(GeneID.PathID[, 1])
prior.prob <- covariate <- NULL
} else {
universe <- as.character(universe)
lu <- length(universe)
if (!lu) {
stop("No genes in universe")
}
if (!is.null(prior.prob) && length(prior.prob) != lu) {
stop("universe and prior.prob must have same length")
}
if (!is.null(covariate) && length(covariate) != lu) {
stop("universe and covariate must have same length")
}
if (restrict.universe) {
i <- universe %in% GeneID.PathID[, 1]
universe <- universe[i]
if (!is.null(prior.prob)) {
prior.prob <- prior.prob[i]
}
if (!is.null(covariate)) {
covariate <- covariate[i]
}
}
}
if (anyDuplicated(universe)) {
d <- duplicated(universe)
if (!is.null(covariate)) {
covariate <- rowsum(covariate, group = universe,
reorder = FALSE)
n <- rowsum(rep_len(1L, length(universe)), group = universe,
reorder = FALSE)
covariate <- covariate/n
}
if (!is.null(prior.prob)) {
prior.prob <- rowsum(prior.prob, group = universe,
reorder = FALSE)
n <- rowsum(rep_len(1L, length(universe)), group = universe,
reorder = FALSE)
prior.prob <- prior.prob/n
}
universe <- universe[!d]
}
NGenes <- length(universe)
if (NGenes < 1L) {
stop("No annotated genes found in universe")
}
for (s in 1:nsets) de[[s]] <- de[[s]][de[[s]] %in% universe]
i <- GeneID.PathID[, 1] %in% universe
if (sum(i) == 0L) {
stop("Pathways do not overlap with universe")
}
GeneID.PathID <- GeneID.PathID[i, ]
if (!is.null(covariate)) {
if (!is.null(prior.prob)) {
message("prior.prob being recomputed from covariate")
}
covariate <- as.numeric(covariate)
isDE <- (universe %in% unlist(de))
o <- order(covariate)
prior.prob <- covariate
span <- approx(x = c(20, 200), y = c(1, 0.5), xout = sum(isDE),
rule = 2)$y
prior.prob[o] <- tricubeMovingAverage(isDE[o], span = span)
if (plot) {
barcodeplot(covariate, index = isDE, worm = TRUE,
span.worm = span, main = "DE status vs covariate")
}
}
if (is.null(prior.prob)) {
X <- matrix(1, nrow(GeneID.PathID), nsets + 1)
colnames(X) <- c("N", names(de))
} else {
names(prior.prob) <- universe
X <- matrix(1, nrow(GeneID.PathID), nsets + 2)
X[, nsets + 2] <- prior.prob[GeneID.PathID[, 1]]
colnames(X) <- c("N", names(de), "PP")
}
for (s in 1:nsets) {
X[, s + 1] <- (GeneID.PathID[, 1] %in% de[[s]])
}
S <- rowsum(X, group = GeneID.PathID[, 2], reorder = FALSE)
PValue <- matrix(0, nrow = nrow(S), ncol = nsets)
colnames(PValue) <- paste("P", names(de), sep = ".")
nde <- lengths(de, use.names = FALSE)
if (!is.null(prior.prob)) {
SumPP <- sum(prior.prob)
M2 <- NGenes - S[, "N"]
Odds <- S[, "PP"]/(SumPP - S[, "PP"]) * M2/S[, "N"]
if (!requireNamespace("BiasedUrn", quietly = TRUE)) {
stop("BiasedUrn package required but is not installed (or can't be loaded)")
}
for (j in seq_len(nsets)) {
for (i in seq_len(nrow(S))) {
PValue[i, j] <- BiasedUrn::pWNCHypergeo(S[i,
1L + j], S[i, "N"], M2[i], nde[j], Odds[i],
lower.tail = FALSE) + BiasedUrn::dWNCHypergeo(S[i,
1L + j], S[i, "N"], M2[i], nde[j], Odds[i])
}
}
S <- S[, -ncol(S)]
} else {
for (j in seq_len(nsets)) {
PValue[, j] <- phyper(S[, 1L + j] - 0.5, nde[j],
NGenes - nde[j], S[, "N"], lower.tail = FALSE)
}
}
g <- rownames(S)
m <- match(g, PathID.PathName[, 1])
path <- PathID.PathName[m, 2]
##
kk <- GeneID.PathID
data <- data.frame(ENTREZID = data[,2])
data$ENTREZID <- as.character(data$ENTREZID)
kkb <- inner_join(kk, data, by = c(GeneID = "ENTREZID"))
kkb <- kkb[!duplicated(kkb), ]
# kkb <- kkb[ which( !is.na( kkb$SYMBOL ) ), ]
kkk <- kkb %>% group_by(PathwayID) %>% summarize(genes = paste(GeneID,
collapse = ","))
##
Results <- data.frame(Pathway = path, S, PValue, stringsAsFactors = FALSE)
rownames(Results) <- g
Results$pathID <- rownames(Results)
resultado <- left_join(Results, kkk, by = c(pathID = "PathwayID"))
resultado <- resultado[which(resultado$P.DE < 0.05), ]
return(resultado)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.