iPSCpoweR: iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

Documented in aggByClone fpkm fpkm2tpm getGeneExpr getSamplesInfo getTxExpr initPar multiProbSpurious probSpurious

.checkPkg <- function(package){
  return(tryCatch(require(package, character.only=TRUE),error = function(e) FALSE))
}

#' initPar
#'
#' Initializes multithreading. This does not need to be called directly; the package's functions will call this.
#'
#' @param ncores The number of cores to use, default detectedCores()-1.
#'
#' @return A makeCluster object if parallel is supported and more than one core is used; otherwise FALSE.
#'
#' @export
initPar <- function(ncores=NULL){
    hasP <- .checkPkg("parallel")
    if(is.null(ncores) & hasP){
        library(parallel)
        ncores <- detectCores() - 1
    }else{
        if(ncores>1){
		if(!hasP){
			warning("Could not find the `parallel` package. Continuing with single thread.")
			ncores <- 1
		}else{
			library(parallel)
		}
	}
    }
    if(ncores>1)	return(makeCluster(ncores))
    return(FALSE)
}

#' getTxExpr
#'
#' Returns the transcript-level expression of the HipSci dataset.
#'
#' @param unit The expression unit in which to return the data; either "counts" (default), "fpkm", or "tpm".
#'
#' @export
getTxExpr <- function(unit="counts"){
    unit <- match.arg(unit, c("tpm","fpkm","counts"))
    data("hipsci_txCounts")
    if(unit != "counts") txCounts <- fpkm(txCounts)
    if(unit=="tpm") txCounts <- fpkm2tpm(txCounts)
    txCounts <- txCounts[which(rowSums(txCounts)>0),]
    return(txCounts)
}


#' getGeneExpr
#'
#' Returns the gene-level expression of the HipSci dataset.
#'
#' @param unit The expression unit in which to return the data; either "counts" (default), "fpkm", or "tpm".
#'
#' @export
getGeneExpr <- function(unit="counts", quiet=F){
    unit <- match.arg(unit, c("tpm","fpkm","counts"))
    txCounts <- getTxExpr(unit)
    data("tx2gene")
    if(!quiet)  message(paste("Aggregating transcript",unit,"to gene-level..."))
    gc <- aggregate(txCounts,by=list(gene=tx2gene[row.names(txCounts)]),FUN=sum)
    row.names(gc) <- gc$gene;
    gc$gene <- NULL
    return(gc)
}

#' Convert FPKM values to TPM values
#'
#' Convert FPKM values to transcripts per million (TPM).
#'
#' @param fpkm A dataframe, matrix or numeric vector of fpkm values.
#'
#' @return The corresponding TPM values.
#'
#' @export
fpkm2tpm <- function(fpkm){
  if(is.matrix(fpkm) | is.data.frame(fpkm)){
    for(i in 1:ncol(fpkm))    fpkm[,i] <- fpkm2tpm(as.numeric(fpkm[,i]))
    return(fpkm)
  }
  tpm <- fpkm/sum(fpkm)*10^6
  tpm[which(is.na(tpm))] <- 0
  return(tpm)
}

#' Convert counts to FPKM values
#'
#' Convert fragment counts to FPKM values, using the sum as library size. By default, effective length is used; set mean.frag.size to 0 to use total (given) length.
#'
#' @param counts A numeric dataframe or matrix, with transcripts as rows.
#' @param mean.frag.size A numeric value indicating the mean fragment size (default 220) used for effective length calculation.
#' @param removeShorter Logical; whether to remove transcripts whose length is shorter than the mean fragment size (default FALSE).
#'
#' @return A dataframe containing the corresponding FPKM values.
#'
#' @export
fpkm <- function(counts, mean.frag.size=500, removeShorter=F){
    data("txLengths")
    r <- intersect(names(txLengths),row.names(counts))
    if(length(r)==0)    stop("Could not find lengths for these transcripts. Is the data really transcript counts?")
    if(length(r) < nrow(counts))    message(paste("Could not find length for",nrow(counts)-length(r),"transcripts; these will be excluded."))
    txLengths <- txLengths[r]-mean.frag.size
    counts <- counts[r,]
    if(removeShorter) counts[which(txLengths < 0),] <- 0
    txLengths[which(txLengths < mean.frag.size)] <- mean.frag.size
    txLengths <- txLengths/1000
    for(i in 1:ncol(counts))    counts[,i] <- 10^6*counts[,i]/(txLengths*sum(counts[,i]))
    as.data.frame(counts)
}



#' aggByClone
#'
#' Aggregate expression data from technical replicates
#'
#' @param dat The expression data. If NULL (default), transcript-level count data are used.
#' @param removeFeeder Logical; whether to exclude samples from lines grown on feeder cells (default TRUE).
#' @param ag.fun Aggregation function; default `sum`
#'
#' @return A list with two slots: `dat` containing the aggregated data, and `annotation` containing the corresponding metadata.
#'
#' @export
aggByClone <- function(dat=NULL, removeFeeder=T, ag.fun=sum){
    data("hipsci_annotation")
    if(is.null(dat)){
        data("hipsci_txCounts")
        dat <- txCounts
    }else{
        annotation <- annotation[colnames(dat),]
    }
    if(removeFeeder){
        w <- which(!annotation$feeder)
        annotation <- annotation[w,]
        dat <- dat[,w]
    }
    ag <- aggregate(t(dat),by=list(clone=annotation$line),na.rm=T,FUN=ag.fun)
    row.names(ag) <- ag$clone
    ag$clone <- NULL;
    ag <- as.data.frame(t(ag))
    annotation <- annotation[!duplicated(annotation$line),]
    row.names(annotation) <- annotation$line
    return(list(annotation=annotation, dat=ag))
}


#' probSpurious
#'
#' Returns the probability of a given gene and/or foldchange in the permutation analyses.
#'
#' @param geneSymbol A HGNC gene symbol
#' @param log2FC The log2(foldchange) in your test (optional, but either geneSymbol or log2FC must be given)
#'
#' @return The probability of the gene and/or foldchange in the permutation analyses.
#'
#' @export
probSpurious <- function(geneSymbol=NULL, log2FC=NULL){
	if(is.null(geneSymbol) & is.null(log2FC)) stop("At least one of geneSymbol and log2FC must be given.")
	data("probSpurious")
	p1 <- ifelse( !is.null(geneSymbol) & geneSymbol %in% row.names(probGeneDE), probGeneDE[geneSymbol,"prob"], NA) 
	if(!is.na(p1))	print(paste("Gene's probability of differential expression across permutations:",format(p1, scientific=T)))
	p2 <- ifelse( is.null(log2FC), NA, probSpuriousFC(abs(log2FC)) )
	if(!is.na(p2))	print(paste("Probability of the foldchange in permutations:",format(p2, scientific=T)))
	if(is.na(p1) & is.na(p2))	message(paste("Could not find information for gene '",geneSymbol,"'",sep=""))
	return(p1,p2)
}

#' multiProbSpurious
#'
#' Annotates a data.frame of DEA results (such as produced by edgeR's `topTags` function) with the probability of each gene and foldchange in the DEA permutations.
#'
#' @param d A data.frame of differential expression results, but HGNC symbols as row.names and a column indicating the log2 foldchange.
#' @param FCfield The name of the column in `d` containing the log2(foldchange) (default `logFC`).
#' @param FDRfield Optional; the name of the column in `d` containing the FDR. If given, a column will be added with the maximum of the FDR and the probability of DE in the permuations. (default disabled)
#'
#' @return The data.frame d, with the addition columns probDEinPermutations and probFCinPermutations
#'
#' @export
multiProbSpurious <- function(d, FCfield="logFC", FDRfield=NULL){
	data("probSpurious")
	d <- as.data.frame(d)
	if(!(FCfield %in% colnames(d)))	stop("Could not find the foldchange column.")
	d$probDEinPermutations <- NA
	i <- intersect(row.names(d),row.names(probGeneDE))
	d[i,"probDEinPermutations"] <- probGeneDE[i,"prob"]
	d$probFCinPermutations <- probSpuriousFC(abs(d[i,FCfield]))
	if(!is.null(FDRfield)){
		if(!(FDRfield %in% colnames(d)))	stop("Could not find the column specified by the `FDRfield` argument.")
		w <- which(d$probFCinPermutations > 0.05 & d$probDEinPermutations > 0.01)
		d$maxFDR <- d[[FDRfield]]
		d[w,"maxFDR"] <- apply(cbind(d[w,FDRfield],d[w,"probDEinPermutations"]),1,na.rm=T,FUN=max)
	}
	return(d)
}


#' getSamplesInfo
#'
#' Returns an annotation of the samples used.
#'
#' @param removeFeeder Logical; whether to remove the samples cultivated on feeders, which should be exluded from the permutation analysis (Default TRUE).
#' @param doPlot Whether to plot summary information on the samples (default TRUE).
#'
#' @return A data.frame.
#'
#' @export
getSamplesInfo <- function(removeFeeder=T, doPlot=T){
	data("hipsci_annotation")
	if(removeFeeder){
		a <- annotation[which(!annotation$feeder),]
	}else{
		a <- annotation
	}
	a$Ethnicity <- as.factor(as.character(a$Ethnicity))
	if(!doPlot)	return(a[,c(1:4,6,8)])
	layout(matrix(1:4,nrow=2))
	nbClone <- table(aggregate(a$clone,by=list(indiv=a$individual),FUN=function(x){ length(unique(x))})$x)
	sex <- table(aggregate(a$sex,by=list(indiv=a$individual),FUN=function(x){ x[[1]] })$x)
	eth <- table(aggregate(a$Ethnicity,by=list(indiv=a$individual),FUN=function(x){ x[[1]] })$x)
	barplot(sex,ylab="Number of individuals",main="Sex")
	barplot(eth,ylab="Number of individuals",main="'Ethnicity'")
	barplot(nbClone,ylab="Number of individuals",main="iPSC clone per individual")
	return(a[,c(1:4,6,8)])
}

plger/iPSCpoweR documentation built on Feb. 2, 2022, 1:37 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

plger/iPSCpoweR
iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

R/getFunctions.R
In plger/iPSCpoweR: iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

Defines functions getSamplesInfo multiProbSpurious probSpurious aggByClone fpkm fpkm2tpm getGeneExpr getTxExpr initPar .checkPkg

Documented in aggByClone fpkm fpkm2tpm getGeneExpr getSamplesInfo getTxExpr initPar multiProbSpurious probSpurious

R Package Documentation

Browse R Packages

We want your feedback!

plger/iPSCpoweR iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

R/getFunctions.R In plger/iPSCpoweR: iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

Defines functions getSamplesInfo multiProbSpurious probSpurious aggByClone fpkm fpkm2tpm getGeneExpr getTxExpr initPar .checkPkg

Documented in aggByClone fpkm fpkm2tpm getGeneExpr getSamplesInfo getTxExpr initPar multiProbSpurious probSpurious

R Package Documentation

Browse R Packages

We want your feedback!

plger/iPSCpoweR
iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling

R/getFunctions.R
In plger/iPSCpoweR: iPSCpoweR: transcriptomics-based evaluation of experimental designs for iPSC disease modeling