date generated: r Sys.Date()

Background

Mitch performs unidimensional and multidimensional gene set enrichment analysis. The concept behind this dates to work by Cox and Mann (https://doi.org/10.1186/1471-2105-13-S16-S12). This implementation is suited to R based workflows of multi-omics datasets. This software was developed by Antony Kaspi and Mark Ziemann. Learn more about Mitch at the website: https://github.com/markziemann/Mitch

Input profiles

Here is the first few lines of the input profile.

suppressPackageStartupMessages({
    library("echarts4r")
    library("dplyr")
    library("tibble")
    library("gtools")
    library("gplots")
    library("beeswarm")
    library("reshape2")
    library("ggplot2")
    library("GGally")
    library("pkgload")
    library("kableExtra")
})
res <- readRDS(DATANAME)

# capture the dimensionality of the data                                               
d=ncol(res$input_profile)

# if working with >5 dimensions, then substitute the dimension (colnames) names with a number
if ( d>5 ) {
    mydims<-data.frame(seq_len(d))
    mydims$colnames<-attributes(res)$profile_dimensions
    colnames(mydims)<-c("dimension","contrast_name")
    print(kbl( mydims, caption = "Profile dimensions" ) %>%
    kable_styling("hover", full_width = FALSE))
    colnames(res$input_profile)<- paste("d",seq_len(d),sep="")
    colnames(res$ranked_profile)<- paste("d",seq_len(d),sep="")
    ss<-res$ranked_profile
}

head(res$input_profile) %>%
  kbl(caption="The profiling data being passed to mitch") %>%
  kable_styling("hover", full_width = FALSE)

Here are some metrics about the input data profile:

if (d==1) {
    formatted<-t(as.data.frame(res$analysis_metrics[ c(1,2,3,4,5 ) ]))
} else if (d==2) {
    unformatted<-t(as.data.frame(res$analysis_metrics[ c(2,3,4,5,11,12 )  ]))
    formatted<-unformatted
    formatted[1:4]<-as.character(round(as.numeric(unformatted[1:4]) , digits=0))
    formatted[5:6]<-as.character(round(as.numeric(unformatted[5:6]) , digits=5))
} else if (d>2) {
    formatted<-t(as.data.frame(res$analysis_metrics[ c(2,3,4,5 )  ]))
}

colnames(formatted)="Profile metrics"

formatted %>%
  kbl(caption="Profiling data metrics") %>%
  kable_styling("hover", full_width = FALSE)

Here is a plot of the input profiles. Note the dynamic ranges.

if ( d==1 ) {
    par(mfrow=c(2,1))
    hist(res$input_profile[,1],breaks=50,main="Distribution of DE scores",xlab=paste("DE score for ",colnames(res$input_profile)))
    plot(res$input_profile[,1],xlab=paste("DE score for ",colnames(res$input_profile)),
    pch="|",frame.plot=FALSE)
    UPS=length(which(res$input_profile[,1]>0))
    DNS=length(which(res$input_profile[,1]<0))
    TOTAL=nrow(res$input_profile)
    mtext(paste(TOTAL,"genes in total,",UPS,"trending up-regulated,",DNS,"trending down-regulated"))
} else if ( d<3 ) {
    plot(res$input_profile, pch=19, col=rgb(red = 0, green = 0, blue = 0, alpha = 0.15), main="Input profiles"  )
} else {
    ggpairs_points_plot <- function(data ,mapping, ...){
        p <- ggplot(data = data, mapping = mapping) +
        geom_point(alpha=0.05) +
        geom_vline(xintercept=0,linetype="dashed") +
        geom_hline(yintercept=0,linetype="dashed")
    }
    p<-ggpairs(as.data.frame(res$input_profile), title="Scatterplot of all genes" , lower  = list(continuous = ggpairs_points_plot ))
    print( p +  theme_bw() )
}

Here is the contour plot of the profile including all detected genes.

palette <- colorRampPalette(c("white", "yellow","orange" ,"red","darkred","black"))

#Contour of all the data
ss<-res$ranked_profile

if (d==1) {
    message("Contour plot does not apply to unidimensional analysis.")
} else if (d==2) {
    xmin=min(ss[,1])
    xmax=max(ss[,1])
    ymin=min(ss[,2])
    ymax=max(ss[,2])
    ss<-res$ranked_profile
    k<-MASS:::kde2d(ss[,1],ss[,2])
    X_AXIS=paste("Rank in contrast",colnames(ss)[1])
    Y_AXIS=paste("Rank in contrast",colnames(ss)[2])
    filled.contour(k, xlim=c(xmin,xmax),ylim=c(ymin,ymax),
    color=palette ,
    plot.title={ abline(v=0,h=0,lty=2,lwd=2,col="blue")
    title( main="Rank-rank plot of all genes",xlab=X_AXIS,ylab=Y_AXIS ) } )
} else if (d>2) {
    #pairs contour plot function
    ggpairs_func <- function(data, mapping, ...){
        p <- ggplot(data = data, mapping = mapping) +
        stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) +
        geom_vline(xintercept=0,linetype="dashed") +
        geom_hline(yintercept=0,linetype="dashed") +
        scale_fill_gradientn(colours=palette(25))

        p

    }

    #pairs contour plot
    p<-ggpairs(as.data.frame(ss), title="Contour plot of all genes after ranking" ,
    lower=list(continuous=ggpairs_func),
    diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/100)))

    print( p + theme_bw() )

    #subset contour plot
    ggpairs_contour_limit_range <- function(data ,mapping, ...){

        p <- ggplot(data = data, mapping = mapping) +
        stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) +
        geom_vline(xintercept=0,linetype="dashed") +
        geom_hline(yintercept=0,linetype="dashed") +
        scale_fill_gradientn(colours=palette(25)) +
        scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) +
        scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) )

        p

    }

    #subset points plot
    ggpairs_points_limit_range <- function(data ,mapping, ...){

        p <- ggplot(data = data, mapping = mapping) +
        geom_point(alpha=0.1) +
        geom_vline(xintercept=0,linetype="dashed") +
        geom_hline(yintercept=0,linetype="dashed") +
        scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) +
        scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) )

        p

    }

}

Input genesets

Here are some metrics about the gene sets used:

ORIGINFILE=attributes(res$input_genesets)$originfile
cat(paste("GMT file of genesets:",ORIGINFILE,"<br>"))
unformatted<-t(as.data.frame(res$analysis_metrics[c(1,6,7)]))
formatted<-as.data.frame(as.character( unformatted[1:3]) )
rownames(formatted)=rownames(unformatted)
colnames(formatted)="Gene sets metrics"

formatted %>%
  kbl(caption="Gene set metrics") %>%
  kable_styling("hover", full_width = F)
par(mfrow=c(3,1))
geneset_counts<-res$analysis_metrics$geneset_counts
boxplot(geneset_counts$count,horizontal=TRUE,frame=FALSE,main="Gene set size",xlab="number of member genes included in profile")
hist(geneset_counts$count,100,xlab="geneset size",main="Histogram of geneset size")
hist(geneset_counts$count,100,xlim=c(0,500),xlab="geneset size",main="Trimmed histogram of geneset size")

if ( d==2 ) {
    uu=length(which(res$input_profile[,1]>0 & res$input_profile[,2]>0))
    ud=length(which(res$input_profile[,1]>0 & res$input_profile[,2]<0))
    dd=length(which(res$input_profile[,1]<0 & res$input_profile[,2]<0))
    du=length(which(res$input_profile[,1]<0 & res$input_profile[,2]>0))
    a<-as.data.frame(c(uu,ud,dd,du))
    rownames(a)=c("top-right","bottom-right","bottom-left","top-left")
    colnames(a)="a"
    par(mfrow=c(1,1))
    xx<-barplot(a$a,names.arg=rownames(a),main="number of genes in each quadrant")
    text(x = xx, y = a$a, label = a$a , pos = 1, cex = 1)
} else if (d>2) {
    if (d<6) {
        sig<-sign(ss)
        sector_count<-aggregate(seq_len(nrow(sig)) ~ ., sig, FUN = length)
        colnames(sector_count)[ncol(sector_count)]<-"Count"

         sector_count %>%
         kbl(caption="Genes by sector") %>%
         kable_styling("hover", full_width = FALSE)
    }

}
if (d<2) {
    nsig=length(which(res$enrichment_result$p.adjustANOVA<0.05))
} else {
    nsig=length(which(res$enrichment_result$p.adjustMANOVA<0.05))
}

if (d==1) {
    cat("<h2>Differential pathway expression</h2><br>")
    par(mfrow=c(1,1))
    sig<-subset(res$enrichment_result,p.adjustANOVA<=0.05)

    plot(res$enrichment_result$s.dist,-log10(res$enrichment_result$pANOVA),
      xlab="s score",ylab="-log10(p-value)",
      main="volcano plot of gene set enrichments",pch=19,cex=0.8)

    points(sig$s.dist,-log10(sig$pANOVA),pch=19,cex=0.85,col="red")
    TOTAL=nrow(res$enrichment_result)
    SIG=nrow(sig)
    UP=length(which(sig$s.dist>0))
    DN=length(which(sig$s.dist<0))
    SUBHEADER=paste(TOTAL,"gene sets in total,",UP,"upregulated and ",DN,"downregulated (FDR<=0.05)")
    mtext(SUBHEADER)
}

if (d==2) {
    cat("<h2>Gene sets by quadrant</h2><br>")
    cat(paste("Number of significant gene sets (FDR<0.05)=", res$analysis_metrics$num_sets_significant,"<br>" ))
    a<-res$analysis_metrics[14]
    a<-as.data.frame(as.numeric(unlist(strsplit(as.character(a),','))),stringsAsFactors=FALSE)
    rownames(a)=c("top-right","bottom-right","bottom-left","top-left")
    colnames(a)="a"
    xx<-barplot(a$a,names.arg=rownames(a),main="number of genesets FDR<0.05")
    text(x = xx, y = a$a, label = a$a , pos = 1, cex = 1)
}

if ( nsig > 0 ) {
    if ( d>2 ) {
        if ( d<6 ) {
            cat(paste("Number of significant gene sets (FDR<0.05)=", res$analysis_metrics$num_sets_significant,"<br>" ))
            cat("<h2>Gene sets by sector</h2><br>")
            sig<-sign(res$enrichment_result[which(res$enrichment_result$p.adjustMANOVA<0.05),4:(4+d-1)])
            sector_count<-aggregate(seq_len(nrow(sig)) ~ ., sig, FUN = length)
            colnames(sector_count)[ncol(sector_count)]<-"Count"

            sector_count %>%
            kbl(caption="Gene sets by sector") %>%
            kable_styling("hover", full_width = FALSE)
        }
    }
}

Interactive enrichment scatterplot

if (d==1) {
    numsets=nrow(subset(res$enrichment_result,p.adjustANOVA<0.05))
    p=NULL
    if (numsets==0){
        message("No significant enrichments found.")
    } else {
        # volcano with echarts4r
        cat("Significance is calculated by -log10(p-value). All points shown are FDR<0.05.<br>")
        myres2<-subset(res$enrichment_result,p.adjustANOVA<0.05)
        myres2$significance<--log10(myres2$pANOVA)
        myres2$set<-sub(",","",myres2$set)
        XCOL=colnames(myres2)[4]
        YCOL=colnames(myres2)[6]
        colnames(myres2)[4]<-"xx"
        colnames(myres2)[6]<-"yy"

        p2<-myres2 %>%
            tibble::rownames_to_column("model") %>%
            dplyr::mutate(set = paste(set, setSize, p.adjustANOVA , sep = ",")) %>%
            e_charts(x = xx) %>%
            e_legend(show = FALSE) %>%
            e_x_axis(name = XCOL) %>%  # add x axis name
            e_y_axis(name = YCOL) %>%  # add y axis name
            e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
            e_tooltip(formatter = htmlwidgets::JS("
                function(params){
                var vals = params.name.split(',')
                return('<strong>' + vals[0] +
                '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
                '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
                '<br />setSize: ' + vals[1] +
                '<br />p-adjust ANOVA: ' + Number(vals[2]).toPrecision(2)
            }"))
    p2
    }
}

if (d==1) {
    resrows=length(res$detailed_sets)
    if (resrows>1) {
        myres2<-head(res$enrichment_result,resrows)
        myres2$significance<--log10(myres2$pANOVA)
        myres2$set<-sub(",","",myres2$set)
        XCOL=colnames(myres2)[4]
        YCOL=colnames(myres2)[6]
        colnames(myres2)[4]<-"xx"
        colnames(myres2)[6]<-"yy"

        p2<-myres2 %>%
            tibble::rownames_to_column("model") %>%
            mutate(set = paste(set, setSize, p.adjustANOVA , sep = ",")) %>%
            e_charts(x = xx) %>%
            e_legend(show = FALSE) %>%
            e_x_axis(name = XCOL) %>%  # add x axis name
            e_y_axis(name = YCOL) %>%  # add y axis name
            e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
            e_tooltip(formatter = htmlwidgets::JS("
                function(params){
                var vals = params.name.split(',')
                return('<strong>' + vals[0] +
                '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
                '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
                '<br />setSize: ' + vals[1] +
                '<br />p-adjust ANOVA: ' + Number(vals[2]).toPrecision(2)
            }"))

        cat("Significance is calculated by -log10(p-value). Top N sets shown irrespective of FDR.<br>")
    p2
    }
}

```r

echartscatter<-function(i){
    my_x=plan[i,1]
    my_y=plan[i,2]
    XCOL=colnames(myres)[3+my_x]
    YCOL=colnames(myres)[3+my_y]
    myres$set<-sub(","," ",as.character(myres$set))
    colnames(myres)[3+my_x]<-"xx"
    colnames(myres)[3+my_y]<-"yy"

    p2 <- myres %>%
        tibble::rownames_to_column("model") %>%
        mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>%
        e_charts(x = xx , height = 300 ) %>%
        e_legend(show = FALSE) %>%
        e_x_axis(name = XCOL) %>%  # add x axis name
        e_y_axis(name = YCOL) %>%  # add y axis name
        e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
        e_tooltip(formatter = htmlwidgets::JS("
            function(params){
            var vals = params.name.split(',')
            return('<strong>' + vals[0] +
            '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
            '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
            '<br />setSize: ' + vals[1] +
            '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) +
            '<br />s.dist: ' + Number(vals[3]).toPrecision(2)
        }"))
    p2
}

if (d!=1) {
    cat("All sets with FDR<0.05. Try hovering over the points.<br>")

    numsets=nrow(subset(res$enrichment_result,p.adjustMANOVA<0.05))

    p=NULL

    if (numsets<1){

        message("No significant enrichments found.")
        p2<-list()

    } else {

        myres<-subset(res$enrichment_result,p.adjustMANOVA<0.05)

        if ( d<3 ) {

            myres2<-myres[,c(1,which(names(myres) %in% "p.adjustMANOVA"),2, 4:(4+d-1) , which(names(myres) %in% "s.dist") )]
            myres2$set<-sub(",","",myres2$set)
            XCOL=colnames(myres2)[4]
            YCOL=colnames(myres2)[5]
            colnames(myres2)[4]<-"xx"
            colnames(myres2)[5]<-"yy"

            p2 <- myres2 %>%
                tibble::rownames_to_column("model") %>%
                mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>%
                e_charts(x = xx) %>%
                e_legend(show = FALSE) %>%
                e_x_axis(name = XCOL) %>%  # add x axis name
                e_y_axis(name = YCOL) %>%  # add y axis name
                e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
                e_tooltip(formatter = htmlwidgets::JS("
                    function(params){
                    var vals = params.name.split(',')
                    return('<strong>' + vals[0] +
                    '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
                    '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
                    '<br />setSize: ' + vals[1] +
                    '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) +
                    '<br />s.dist: ' + Number(vals[3]).toPrecision(2)
                }"))
        } else {
            plan<-combinations(n = d, r = 2, v = seq_len(d), repeats.allowed = FALSE)
            myres<-myres[,c(1, which(names(myres) %in% "p.adjustMANOVA") ,2,4:(4+d-1),
                which(names(myres) %in% "s.dist") ), which(names(myres) %in% "SD")]
            p2<-list()
            p2<-lapply( seq_len(nrow(plan)) , echartscatter)
        }
    htmltools::tagList(p2)
    }
}

if (d!=1) {
    cat("Top N sets irrespective of FDR. Try hovering over the points.<br>")
    resrows=length(res$detailed_sets)
    p=NULL

    if (resrows<1){
        message("No results found.")
    } else {
        myres<-head(res$enrichment_result,resrows)

        if ( d<3 ) {
            myres2<-myres[,c(1,which(names(myres) %in% "p.adjustMANOVA"),2, 4:(4+d-1) ,
                which(names(myres) %in% "s.dist") )]

            myres2$set<-sub(",","",myres2$set)
            XCOL=colnames(myres2)[4]
            YCOL=colnames(myres2)[5]
            colnames(myres2)[4]<-"xx"
            colnames(myres2)[5]<-"yy"

            p2<-myres2 %>%
                tibble::rownames_to_column("model") %>%
                mutate(set = paste(set, setSize, p.adjustMANOVA , s.dist , sep = ",")) %>%
                e_charts(x = xx) %>%
                e_legend(show = FALSE) %>%
                e_x_axis(name = XCOL) %>%  # add x axis name
                e_y_axis(name = YCOL) %>%  # add y axis name
                e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
                e_tooltip(formatter = htmlwidgets::JS("
                    function(params){
                    var vals = params.name.split(',')
                    return('<strong>' + vals[0] +
                    '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
                    '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
                    '<br />setSize: ' + vals[1] +
                    '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2) +
                    '<br />s.dist: ' + Number(vals[3]).toPrecision(2)
                }"))
        } else {
            plan<-combinations(n = d, r = 2, v = seq_len(d), repeats.allowed = FALSE)

            myres<-myres[,c(1, which(names(myres) %in% "p.adjustMANOVA") ,2,4:(4+d-1),
                which(names(myres) %in% "s.dist") ), which(names(myres) %in% "SD")]

            p<-list()
            lapply( seq_len(nrow(plan)) , echartscatter)
        }
    htmltools::tagList(p)
    htmltools::tagList(p2)
    }
}
if (d!=1) {
    if (resrows>2) {
        cat("<h2> A heatmap of S values for top results</h2><br>")
        hmapx<-head( res$enrichment_result[,4:(4+d-1)] ,resrows)
        rownames(hmapx)<-head(res$enrichment_result$set,resrows)
        colnames(hmapx)<-gsub("^s.","",colnames(hmapx))
        my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)

        if(resrows<25){
            CEXROW=1
        } else if (resrows<51) {
            CEXROW=0.8
        } else if (resrows<76) {
            CEXROW=0.6
        } else if (resrows<101) {
            CEXROW=0.4
        } else {
            CEXROW=0.25
        }
        heatmap.2(as.matrix(hmapx),scale="none",margin=c(10, 25),cexRow=CEXROW,trace="none",cexCol=1,col=my_palette)
    }
}
if (d!=1) {
    cat('<h2> A plot of effect size versus significance</h2><br>')
    cat('Significance is the -log2(p.adjustMANOVA) and effect size is the s.dist which is the hypotenuse of the s scores.<br>')
    colnames(myres)<-gsub("\\.","_",colnames(myres))
    myres<-res$enrichment_result
    myres$significance<--log2(myres$p.adjustMANOVA)
    myres$set<-sub(",","",myres$set)
    XCOL=colnames(myres)[ncol(myres)-3]
    YCOL=colnames(myres)[ncol(myres)]
    colnames(myres)[ncol(myres)-3]<-"xx"
    colnames(myres)[ncol(myres)]<-"yy"

    p2<-myres %>%
        tibble::rownames_to_column("model") %>%
        mutate(set = paste( set , setSize , p.adjustMANOVA , sep = ",")) %>%
        e_charts( x = xx) %>%
        e_legend(show = FALSE) %>%
        e_x_axis(name = XCOL) %>%  # add x axis name
        e_y_axis(name = YCOL) %>%  # add y axis name
        e_scatter( serie = yy , bind = set , symbolSize = 10  ) %>%
        e_tooltip(formatter = htmlwidgets::JS("
            function(params){
            var vals = params.name.split(',')
            return('<strong>' + vals[0] +
            '</strong><br />s.x-axis: ' + parseFloat(params.value[0]).toFixed(2) +
            '<br />s.y-axis: ' +  parseFloat(params.value[1]).toFixed(2)) +
            '<br />setSize: ' + vals[1] +
            '<br />p-adjust MANOVA: ' + Number(vals[2]).toPrecision(2)
        }"))
    p2
}

Results table

if ( d==1 ) {
    resrows=length(res$detailed_sets)
    myres<-head(res$enrichment_result,resrows)
    myres[,c(3:ncol(myres))]<-signif(myres[,c(3:ncol(myres))],3)
} else if (d>1) {
    resrows=length(res$detailed_sets)
    myres<-head(res$enrichment_result,resrows)
    myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")]
    myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3)
}

formatted<-myres
formatted[,1]<-gsub("_"," ",myres[,1])

kbl( formatted , format="markdown", row.names=FALSE, caption = cat(paste("Top N=",resrows,"gene sets")) ,digits=100) %>%
kable_styling("hover", full_width = FALSE)

cat("<hr><br>")

Results (complete table)

myres<-res$enrichment_result

if ( d==1 ) {
    myres[,c(3:ncol(myres))]<-signif(myres[,c(3:ncol(myres))],3)
    formatted<-myres
    formatted[,1]<-gsub("_"," ",myres[,1])
} else if (d>1) {
    myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")]
    myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3)
    formatted<-myres
    formatted[,1]<-gsub("_"," ",myres[,1])
    formatted[,3:ncol(myres)]<-format(myres[,3:ncol(myres)],digits=3)
}

HEADER=paste("<br><details><summary><b>","Click HERE to show results for all gene sets","</b></summary><br><p>",sep=" " )

cat(HEADER)

kbl(formatted, format="html", row.names=FALSE, caption = "Complete results",digits=100) %>%
kable_styling("hover", full_width = FALSE)

cat("<br></p></details>")
cat("<hr><br>")
if (d==1) {
    cat("<h2>Detailed Gene set reports</h2><br>")
    cat('<br>')
    cat('\n')
    ss<-res$ranked_profile

    for ( i in seq_len(resrows) ) {
        mydat = NULL
        mydat<-t(myres[i,])

        cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>"))

        tbl <- kbl(mydat,col.names = NULL,format='pipe',caption=NULL ) %>%
            kable_styling("hover", full_width = FALSE)

        cat(tbl)
        cat('\n\n<!-- -->\n\n')
        cat("<br>")

        # plots
        sss<-res$detailed_sets[[i]]
        set<-names(res$detailed_sets[i])
        size<-length(sss)
        par(mfrow=c(3,1))

        beeswarm(sss,vertical = FALSE,cex=0.75,xlim=c(min(ss),
        max(ss)),col="darkgray",pch=19,main=set,cex.main=1.5,
        xlab=paste("ranked DE score in:",colnames(ss)))

        mtext("beeswarm plot",cex=0.8)

        hist(sss,xlim=c(min(ss),max(ss)),breaks=15,col="darkgray",main=NULL,
        border="black",xlab=paste("ranked DE score in:",colnames(ss)))

        mtext("histogram",cex=0.8)

        plot(sss,rep(1,length(sss)),type="n",xlim=c(min(ss),max(ss)),
        frame=FALSE,axes=FALSE,ylab="",
        xlab=paste("ranked DE score in:",colnames(ss)))

        rug(sss, ticksize = 0.9)
        axis(1)
        mtext("rugplot",cex=0.8)
        cat("<br>")

        # top gene list
        setSign=sign(res$enrichment_result[i,4])

        if (setSign==-1) {
            tops<-as.data.frame(res$detailed_sets[[i]][order(res$detailed_sets[[i]])])
        } else if (setSign==1) {
            tops<-as.data.frame(res$detailed_sets[[i]][order(-res$detailed_sets[[i]])])
        }

        cat("Top enriched genes\\n")
        tops$GeneID<-rownames(tops)
        tops<-tops[,c(2,1)]
        colnames(tops)=c("GeneID","Gene Rank")
        cat("<br>")

        tbl <- kbl(head(tops,n=20L), format='pipe', row.names=FALSE,caption="Top 20 genes",digits=100) %>%
                kable_styling("hover", full_width = FALSE)

        cat(tbl)
        cat('\n\n<!-- -->\n\n')
        HEADER=paste("<br><details><summary><b>","Click HERE to show all gene set members","</b></summary><br><p>",sep=" " )
        cat(HEADER)

        tbl <- kbl(tops, format='pipe', row.names=FALSE, caption="All member genes",digits=100) %>%
                kable_styling("hover", full_width = FALSE)

        cat(tbl)
        cat('\n\n<!-- -->\n\n')
        cat("<br></p></details>")
        cat("<br><hr>")
    }
}
twodimplot<-function(i) {
    ll<-res$enrichment_result[i,]
    size<-ll$setSize
    sss<-res$detailed_sets[[i]]
    k<-MASS:::kde2d(sss[,1],sss[,2])
    filled.contour( k, color = palette, xlim=c(xmin,xmax),ylim=c(ymin,ymax),
    plot.title={ abline(v=0,h=0,lty=2,lwd=2,col="blue")
    title( main=ll$set , xlab=X_AXIS,ylab=Y_AXIS )})

    plot(sss, pch=19, col=rgb(red = 0, green = 0, blue = 0, alpha = 0.2),
    main=ll$set , xlim=c(xmin,xmax),ylim=c(ymin,ymax),
    xlab=X_AXIS,ylab=Y_AXIS)

    abline(v=0,h=0,lty=2,lwd=2,col="blue")
    sss_long<-melt(sss)
    colnames(sss_long) <- c("gene","contrast","value")
    colnames(ss_long) <- c("gene","contrast","value")

    p<-ggplot(ss_long,aes(contrast,value)) +
    geom_violin(data=ss_long,fill = "grey", colour = "grey") +
    geom_boxplot(data=ss_long,width=0.9,fill="grey",outlier.shape = NA) +
    geom_violin(data=sss_long,fill = "black", colour = "black") +
    geom_boxplot(data=sss_long,width=0.1,outlier.shape = NA) +
    labs(y = "Position in rank",title = ll[,1] )

    print( p + theme_bw() +
    theme( axis.text=element_text(size=14),
    axis.title=element_text(size=15),
    plot.title = element_text(size = 14)))
}


# subset contour plot
ggpairs_contour_limit_range <- function(data ,mapping, ...){
    p <- ggplot(data = data, mapping = mapping) +
    stat_density2d(aes(fill=..density..), geom="tile", contour = FALSE) +
    geom_vline(xintercept=0,linetype="dashed") +
    geom_hline(yintercept=0,linetype="dashed") +
    scale_fill_gradientn(colours=palette(25)) +
    scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) +
    scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) )
    p
}


# subset points plot
ggpairs_points_limit_range <- function(data ,mapping, ...){
    p <- ggplot(data = data, mapping = mapping) +
    geom_point(alpha=0.1) +
    geom_vline(xintercept=0,linetype="dashed") +
    geom_hline(yintercept=0,linetype="dashed") +
    scale_x_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[1]))]),max(ss[,gsub("~","",as.character(mapping[1]))])) ) +
    scale_y_continuous( limits = range(min(ss[,gsub("~","",as.character(mapping[2]))]),max(ss[,gsub("~","",as.character(mapping[2]))])) )
    p
}


ndplot<-function(i) {
    ll<-res$enrichment_result[i,]
    size<-ll$setSize
    ss<-res$ranked_profile
    sss<-res$detailed_sets[[i]]

    if ( d>5 ) {
        colnames(sss)<- paste("d",seq_len(d),sep="")
    }

    empty_cnt <- length(which(is.na(cor(sss,use="pairwise.complete.obs"))))
    if ( empty_cnt == 0 ) {

        p<-ggpairs(as.data.frame(sss), title=ll[,1], lower=list(continuous=ggpairs_contour_limit_range),
        diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/10)) )

        print( p + theme_bw() )

        p<-ggpairs(as.data.frame(sss), title=ll[,1], lower= list(continuous = ggpairs_points_limit_range ),
        diag=list(continuous=wrap("barDiag", binwidth=nrow(ss)/10)))

        print( p + theme_bw() )
    }

    sss_long<-melt(sss)
    colnames(sss_long) <- c("gene","contrast","value")
    colnames(ss_long) <- c("gene","contrast","value")

    empty_cnt <- apply(sss,2,function(x) sum(as.numeric(is.finite(x))))
    empty_cnt <- length(which(empty_cnt==0))
    if ( empty_cnt == 0 ) {

        p<-ggplot(ss_long,aes(contrast,value)) +
        geom_violin(data=ss_long,fill = "grey", colour = "grey") +
        geom_boxplot(data=ss_long,width=0.9,fill="grey",outlier.shape = NA) +
        geom_violin(data=sss_long,fill = "black", colour = "black") +
        geom_boxplot(data=sss_long,width=0.1,outlier.shape = NA) +
        labs(y = "Position in rank",title = ll[,1] )

        print( p + theme_bw() +
        theme( axis.text=element_text(size=14),
        axis.title=element_text(size=15),
        plot.title = element_text(size = 14)))
    }
}


topgenelist<-function(i) {
    ss<-res$ranked_profile
    sss<-res$detailed_sets[[i]]

    if ( d>5 ) {
        colnames(sss)<- paste("d",seq_len(d),sep="")
    }

    tl=bl=tr=br=0
    myres<-head(res$enrichment_result,resrows)
    myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)) , which(names(myres) %in% "s.dist")]
    myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3)

    #select 2 strongest columns to highlight genes
    if( d>2 ) {
        cols<-(ncol(myres)-(2*d)+1):(ncol(myres)-(2*d)+d)
        COLS<-order(-abs(unlist((myres[i,cols]))))[1:2]
        sss<-sss[,COLS]
        cols<-cols[COLS]
        mysx=signif(myres[i,cols[1]],3)
        mysy=signif(myres[i,cols[2]],3)
    } else {
        cols=c(ncol(myres)-3,ncol(myres)-2)
        mysx=signif(myres[i,cols[1]],3)
        mysy=signif(myres[i,cols[2]],3)
    }

    if ( mysy>0 ) { tl=tl+1 ; tr=tr+1  } else { bl=bl+1 ; br=br+1 }
    if ( mysx>0 ) { tr=tr+1 ; br=br+1  } else { tl=tl+1 ; bl=bl+1 }

    if (bl==2) {
        myquad<-sss[which(sss[,1]<0 & sss[,2]<0),,drop=FALSE]
        topgenes<-myquad[order(-rank(myquad[,1]*myquad[,2])),,drop=FALSE]
    }

    if (tr==2) {
        myquad<-sss[which(sss[,1]>0 & sss[,2]>0),,drop=FALSE]
        topgenes<-myquad[order(-rank(myquad[,1]*myquad[,2])),,drop=FALSE]
    }

    if (br==2) {
        myquad<-sss[which(sss[,1]>0 & sss[,2]<0),,drop=FALSE]
        topgenes<-myquad[order(rank(myquad[,1]*myquad[,2])),,drop=FALSE]
    }

    if (tl==2) {
        myquad<-sss[which(sss[,1]<0 & sss[,2]>0),,drop=FALSE]
        topgenes<-myquad[order(rank(myquad[,1]*myquad[,2])),,drop=FALSE]
    }

    cat("<br>")
    topgenes<-as.data.frame(topgenes)
    topgenes$Gene<-as.character(rownames(topgenes))
    topgenes<-topgenes[,c(3,1,2)]

    tbl <- kbl(head(topgenes,n=20L), format='pipe', row.names=FALSE, caption="Top 20 genes", digits=100) %>%
        kable_styling("hover", full_width = FALSE)

    cat(tbl)
    cat('\n\n<!-- -->\n\n')
    HEADER=paste("<br><details><summary><b>","Click HERE to show all gene set members","</b></summary><br><p>",sep=" " )
    cat(HEADER)
    sss<-res$detailed_sets[[i]]

    tbl <- kbl(sss, format='pipe', row.names=TRUE, caption="All member genes", digits=100) %>%
        kable_styling("hover", full_width = FALSE)

    cat(tbl)
    cat('\n\n<!-- -->\n\n')
    cat("<br></p></details>")
    cat("<br><hr>")
}


#functions end here

if (d!=1) {
    cat("<h2> Detailed Gene set reports</h2><br>")
    resrows=length(res$detailed_sets)
    ss<-res$ranked_profile
    myres<-head(res$enrichment_result,resrows)
    myres<-myres[,c(1:3, which(names(myres) %in% "p.adjustMANOVA"), which(names(myres) %in% "s.dist") ,4:((d*2)+3)), which(names(myres) %in% "SD")]
    myres[,3:ncol(myres)]<-signif(myres[,3:ncol(myres)],3)
    palette <- colorRampPalette(c("white", "yellow","orange" ,"red","darkred","black"))
    ss_long<-melt(ss)
    colnames(ss_long) <- c("gene","contrast","value")

    if ( d<3 ) {
        xmin=min(ss[,1])
        xmax=max(ss[,1])
        ymin=min(ss[,2])
        ymax=max(ss[,2])

            for ( i in seq_len(resrows )) {
                mydat = NULL
                mydat$metrics<-names(myres[2:ncol(myres)])
                mydat$values<-unname(t(myres[i,2:ncol(myres)]))
                mydat<-as.data.frame(cbind(mydat$metrics,mydat$values))
                colnames(mydat)<-c("metric","value")
                cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>"))

                tbl <- kbl(mydat,format='pipe', caption=as.character(t(myres[i,1]))) %>%
                    kable_styling("hover", full_width = FALSE)

                cat(tbl)
                cat('\n\n<!-- -->\n\n')
                cat("<br>")
                twodimplot(i)
                cat("<br>")
                topgenelist(i)
                cat("<hr><br>")
            }
        } else {

        for ( i in seq_len(resrows )) {
            mydat = NULL
            mydat$metrics<-names(myres[2:ncol(myres)])
            mydat$values<-unname(t(myres[i,2:ncol(myres)]))
            mydat<-as.data.frame(cbind(mydat$metrics,mydat$values))
            colnames(mydat)<-c("metric","value")
            cat(paste("<b>",as.character(myres[i,1]) ,"</b><br>"))

            tbl <- kbl(mydat,format='pipe',digits=5,caption=as.character(t(myres[i,1]))) %>%
                    kable_styling("hover", full_width = FALSE)

            cat(tbl)
            cat('\n\n<!-- -->\n\n')
            cat("<br>")
            ndplot(i)
            cat("<br>")
            topgenelist(i)
            cat("<hr><br>")
        }
    }
}

Here is the session info with all the versions of packages used.

sessionInfo()

END of report



markziemann/mitch documentation built on Nov. 3, 2024, 7:28 p.m.