R/deseq2helpers.R

Defines functions LOG extractComparisonsList createGroupComparisons zipall outputFilesTable generateMDLink knitLabels extractMultiLFCSignif extractMultiResultsSignif extractUpDownSignif extractSignif ncbi_url to_link ensembl_url saveTables getNegExplanation getPosExplanation getComparisonString getComparisonTitle getComparison getComparisonFoldChange getFilterThreshold toSortedTibble deseqResult getCounts

#' gets normalised counts as a tibble from DeseqDataset
#'
#'
#' @return  a normalised counts tibble with geneid as last column
#'
#' @export
getCounts <- function(dds.r, groups=NULL, grouping=NULL){
  co <- DESeq2::counts(dds.r, normalized=TRUE)
  if(! is.null(groups) & ! is.null(grouping)){
     checkCountColumnsDesignSamples(dds.r,grouping)
     gi <-  which(grouping$group %in% groups)
     co <- co[,gi]
  }
  cot <- tibble::as_tibble(co)
  cot$geneid <- rownames(co)
  cot
}



#' deseq result for condition (default: group)
#'
#' group condition1 vs condition2
#'
#' normal deseq result with "contrast", adds condition names to metadata
#' could not find it, and parsing with getComparisonXXXX works but is brittle
#'
#' @param dds.r normalised DESeq dataset
#' @param condition1 the 1. condition
#' @param condition2 the 2. condition
#' @param condition (group to compare)
#'
#' @return the deseq2 result
#'
#' @export
deseqResult <- function(dds.r, condition1, condition2, condition="group"){
   comparison <- c(condition, condition1, condition2)
   deseq.result <- DESeq2::results(dds.r, contrast=comparison)
   S4Vectors::metadata(deseq.result)$comparison <- comparison
   deseq.result
}

#' converts deseq result to a sorted tibble joined with annotation
#'
#' annotation should be a tab file with geneid, %gc, length columns
#' other columns are optional
#' creation of annotation with ensemblFA2tab.py
#' ensemblFA2tab input should consist of:
#'
#' zcat build.ncRNA.fa.gz build.cDNA.fa.gz > rna.fa  from ensembl
#' Ids and build should be identical to build.GTF
#'
#' @param deseq.result deseq result dataset
#' @param ensembl annotation
#' @param countsdata the normalised expression values counts(dds.r, normalized=TRUE)
#' @param filterThreshold the mean value cutoff from deseq2 analysis
#' @param urlfun function to create links e.g. ensembl_url, ncbi_url, can be NULL
#'
#' @export
toSortedTibble <- function(deseq.result, annotation, filterThreshold, urlfun=NULL, countsdata=NULL){
    comp.all <-  as.data.frame(deseq.result) %>%
    dplyr::mutate(geneid = rownames(deseq.result), absFC = abs(log2FoldChange), meanFilter=baseMean < filterThreshold)
    if(! is.null(urlfun)){
      comp.all <- comp.all %>% dplyr::mutate(url=urlfun(geneid), link=to_link(url, geneid))
    }
    if(! is.null(countsdata)){
      comp.all <- comp.all %>% dplyr::left_join(countsdata)
    }
    if(! is.null(annotation)){
        comp.all <- comp.all %>%
        dplyr::left_join(annotation, by=c(geneid="geneid")) %>%
        dplyr::arrange(padj, absFC) %>%
        dplyr::select(-absFC) %>%
        dplyr::select(geneid, dplyr::everything())
        if(nrow(deseq.result) != nrow(comp.all)){
          error <- paste("we lost some rows in toSortedTibble, should be left_join!: ", nrow(deseq.result), " -> ", nrow(comp.all))
          stop(error)
        }
    }else{
      comp.all <- comp.all %>%
        dplyr::arrange(padj, absFC) %>%
        dplyr::select(-absFC) %>%
        dplyr::select(geneid, dplyr::everything())
    }
    comp.all
}


#' extracts the independent filtering threshold from deseq result
#'
#' @export
getFilterThreshold <- function(deseq.result){
  S4Vectors::metadata(deseq.result)$filterThreshold
}

#' extracts the complete comparison string including log2FoldChange
#'
#' @export
getComparisonFoldChange <- function(deseq.result){
  meta <- S4Vectors::elementMetadata(deseq.result)
  lfcrow <- rownames(meta) == "log2FoldChange"
  meta$description[lfcrow]
}

#' extracts the comparison string
#' the comparison must be done on the group column
#'
#' @export
getComparison <- function(deseq.result){
  cfc <- getComparisonFoldChange(deseq.result)
  gsub(".*group (\\.*)", "\\1", cfc)
}

#' extracts the comparison
#'
#' @export
getComparisonTitle <- function(deseq.result){
  cn <- getComparisonFoldChange(deseq.result)
  trimws(strsplit(cn, ":")[[1]][2])
}

#' extracts a string from the comparison that is usable as a file path
#'
#' @export
getComparisonString <- function(deseq.result){
    # \\& \\ not working?
   #gsub("/| |\\||\\+|-|\\|\\&", "_", "as|d\\+f/sfd asdf& aer$|% ")
   REPLACER <- "/| |\\||\\+|-|\\|\\&"
   tit <- getComparisonTitle(deseq.result)
   gtit <- gsub(".*group (\\.*)", "\\1", tit)
   titu <- gsub(REPLACER, "_", gtit)
   titu
}


#' a meaningful explanation for the direction of the comparison
#' when log2 fold change > 0
#'
#' @export
getPosExplanation <- function(comparison){
   conds <- sapply(strsplit(comparison, "vs")[[1]], trimws)
   paste("log2 fold change > 0: ", "higher in ", conds[1], "lower in ", conds[2])
}

#' a meaningful explanation for the direction of the comparison
#' when log2 fold change < 0
#'
#' @export
getNegExplanation <- function(comparison){
  conds <- sapply(strsplit(comparison, "vs")[[1]], trimws)
  paste("log2 fold change < 0: ", "higher in ", conds[2], "lower in ", conds[1])
}

#' saving the result table in tab and xls
#'
#' @param comp.all a tibble with the data
#' @param pathbase the file name to save as will be appended with .diff.norm.[tab|xlsx]
#'
#' @export
saveTables <- function(comp.all, pathbase){
  LOG(paste("saving tables to ", pathbase))
  readr::write_tsv(comp.all, paste(pathbase, ".diff.norm.tab",sep=""))
  writexl::write_xlsx(comp.all, paste(pathbase, ".diff.norm.xlsx",sep=""))
}

#' create an ensemble url
#' @param create a url to ensembl gene id
#'
#' @export
ensembl_url <- function(ensemblid){
  sprintf("https://www.ensembl.org/id/%s",ensemblid)
}

#' create a link
#' @param url http://...
#' @param txt link text
#'
#' @export
to_link <- function(url, txt){
  paste('<a href="', url, '">', txt, '</a>', sep="")
}

#' create an ncbi gene link url
#' @param create a url to ncbi gene id
#'
#' @export
ncbi_url <- function(ncbi_gene_id){
  sprintf("https://www.ncbi.nlm.nih.gov/gene/?term=%s", ncbi_gene_id)
}

#' extract row indices of significanlty deregulated genes
#' @param result list of deseq2 results
#' @param pcut adjusted p-value cutoff
#' @param lfc log fold change cutoff
#'
#' @export
extractSignif <- function(result, pcut, lfc){
  nm <- lapply(result, function(r){
    which(r$padj < pcut & abs(r$log2FoldChange) > lfc)
  })
  names(nm) <- lapply(result, getComparison)
  nm
}

#' extract counts of significanlty deregulated genes for one comparison
#' @param comparison deseq2 result tibble
#' @param pcut adjusted p-value cutoff
#' @param lfc log fold change cutoff
#'
#' @export
extractUpDownSignif <- function(comparison, pcut, lfc){
   up <- sum(comparison$padj < pcut & comparison$log2FoldChange > lfc, na.rm = TRUE)
   down <- sum(comparison$padj < pcut & comparison$log2FoldChange < -lfc, na.rm = TRUE)
   dereg <- up + down
   tibble::tibble(p.adj=pcut, log2FC=lfc, dereg=dereg, up=up, down=down)
}

#' extract counts of significanlty deregulated genes for a list of comparisons
#'
#' @param result comarison result list
#' @param pcut the adjusted p-value cutoff
#' @param lfc the absolute log 2 fold change cutoff
#'
#' @export
extractMultiResultsSignif <- function(result, pcut, lfc){
  do.call("rbind", lapply(result, function(r){
      sig <- extractUpDownSignif(r, pcut, lfc)
      cbind(tibble::tibble(comparison=getComparison(r)), sig)
  }))
}

#' extract counts of significanlty deregulated genes for one of comparison
#' but multiple log2 fold changes
#'
#' @param compasison the comparison data table
#'
#' @export
extractMultiLFCSignif <- function(comparison){
  do.call("rbind", lapply(c(0, 1, 2, 3 ,4), function(lfc){
     extractUpDownSignif(comparison, pcut, lfc)
  }))
}


#' adds labels to chunks in deseq2_pairwise.Rmd (tpl)
#'
#' https://github.com/yihui/knitr-examples/blob/master/041-label-i.Rmd
#'
#' @param titfile the group comparison name suitable for a path
#' @param knitdir the directory where knitr runs
#'
#' @export
knitLabels <- function(titfile, knitdir){
      knitr::pat_brew() ## switching to pat_brew
      infile <- system.file('deseq2_pairwise.tpl',package="deseq2analysis")
      titfiles <- gsub("_","", titfile)
      withLabels <- knitr::knit(text = readLines(infile))
      outpath <- paste(knitdir,"/comp_",titfile,".Rmd",sep="")
      fileConn<-file(outpath)
      writeLines(withLabels, fileConn)
      close(fileConn)
      knitr::pat_md()
      outpath
}

#' generates a link for md
#'
#' @param title the tile of the link
#' @param link the url
#'
#' @export
generateMDLink <- function(title, link){
   paste("[",title,"]", "(", link,")", sep="") # {:target='_blank'}???
}

#' returns a table with output files based on grouping and config
#'
#' @param deseqconfig the deseq config
#' @param deseq.r the list with all comparisons
#'
#' @export
outputFilesTable <- function(deseqconfig, deseq.r){
    purrr::map_df(deseq.r, function(deseq.result){
        #comparison was added by me to metadata(deseq.result)
        comparisongroups <- S4Vectors::metadata(deseq.result)$comparison[2:3]
        tit <- getComparison(deseq.result)
        titfile <- getComparisonString(deseq.result)
        resv <- c(comparison=tit)
        if(deseqconfig$output$savetables){
            exl <- paste(titfile, ".diff.norm.xlsx",sep="")
            tab <- paste(titfile, ".diff.norm.tab",sep="")
            exll <- generateMDLink("excel file", exl)
            tabl <- generateMDLink("tab delimited", tab)
            comb <- paste(exll, tabl)
            resv <- c(resv, `DGE tables`=comb)
        }
        if(!is.null(deseqconfig$functional)){
            func <- paste(titfile,"_functional_analysis.html",sep="")
            funcz <-  paste(titfile,"_functional_analysis_tables.zip",sep="")
            funcl <- generateMDLink("html", func)
            funczl <- generateMDLink("zipped tab delimited", funcz)
            comb <- paste(funcl, funczl)
            resv <- c(resv, `functional analysis`=comb)
        }
        dplyr::bind_rows(resv)
    })
}


#' zips all output files
#'
#' @param deseqconfig the deseqconfig
#' @param knitdir the dir where its being knitted
#'
#' @export
zipall <- function(deseqconfig, knitdir){
    zipbase <- gsub(".html",".zip", deseqconfig$outputname)
    zipfile <- paste(knitdir, "/", zipbase ,sep="")
    zips <- dir(knitdir, pattern="*functional_analysis_tables.zip", full.names = TRUE)
    html <- dir(knitdir, pattern="*html", full.names = TRUE)
    dn <- dir(knitdir, pattern="*diff.norm*", full.names=TRUE)
    allz <- c(zips, html, dn)
    zip::zipr(zipfile = zipfile, files = allz)
}


#' process group comparisons in loop
#'
#' its identical to the portion in deseq2.Rmd
#' it might not have worked as a function ... must check
#'
#' @param deseqconfig the deseq config
#' @param deseq.r the list with all comparisons
#' @param dds.r the normalised values experimentset
#' @param grouping the groping table
#' @param ensembl the object containing ensembl annotation
#'
#' @export
createGroupComparisons <- function(deseqconfig, deseq.r, dds.r, grouping, ensembl){
      deseqout <- NULL
      for(deseq.result in deseq.r){

            #comparison was added by me to metadata(deseq.result)
            comparisongroups <- S4Vectors::metadata(deseq.result)$comparison[2:3]
            normcounts <- getCounts(dds.r, comparisongroups, grouping)
            filterThreshold <- getFilterThreshold(deseq.result)
            urlfun <- deseqconfig$output$urlfunction
            if(! is.null(urlfun)){
              urlfun <- get(urlfun)
            }
            comp <- toSortedTibble(deseq.result, ensembl, filterThreshold, urlfun, normcounts)
            comparisonFoldchange <- getComparisonFoldChange(deseq.result)
            tit <- getComparison(deseq.result)
            titfile <- getComparisonString(deseq.result)

            LOG(paste("creating files for: ", tit, titfile, deseqconfig$output$savetables ))

            if(deseqconfig$output$savetables){
                saveTables(comp, titfile)
            }

            if(!is.null(deseqconfig$functional)){
              if(! is.null(deseqconfig$debug) && !is.null(deseqconfig$debug$loadrds) && deseqconfig$debug$loadrds ){ #check deseqconfig for loadrds: TRUE
                LOG("skipping functional analysis but creating plots from RDS")
              }else{
                testfunctionalFromConfig(comp, titfile, deseqconfig, tit)
              }
            }else{
              LOG(paste("skipping funcitonal analysis because of config"))
            }


            ADVENTUROUS <- TRUE
            if(ADVENTUROUS){
                pairRmd <- knitLabels(titfile, KNITDIR)
            }else{
                pairRmd <- system.file('deseq2_pairwise.Rmd', package="deseq2analysis")
            }
            deseqout <- c(deseqout, knitr::knit_child(pairRmd, envir=globalenv(), options = list(echo = FALSE, warning = FALSE, results = 'hide')))
      }
      deseqout
}

#' extract the list of comparisons
#'
#'
#' @export
extractComparisonsList <- function(comparisons, dds.r){
    purrrlyr::by_row(comparisons, function(row){ deseqResult(dds.r, row$cond1, row$cond2) }) %>% dplyr::pull(.out)
}




#' LOG to console in knitr
#'
#' @param message
#'
#' @export
LOG <- function(message){
    write(message, stderr())
}
idot/deseq2analysis documentation built on Aug. 14, 2021, 5:12 a.m.