#' @eval get_description('fold_change')
#' @examples
#' D = MTBLS79_DatasetExperiment()
#' M = fold_change(factor_name='Class')
#' M = model_apply(M,D)
#' @import stats
#' @export fold_change
fold_change = function(
factor_name,
paired=FALSE,
sample_name=character(0),
threshold=2,
control_group=character(0),
method = "geometric",
conf_level = 0.95,
...) {
out=struct::new_struct('fold_change',
factor_name=factor_name,
paired=paired,
sample_name=sample_name,
threshold=threshold,
control_group=control_group,
method = method,
conf_level=conf_level,
...)
return(out)
}
.fold_change<-setClass(
"fold_change",
contains=c('model'),
slots=c(
# INPUTS
factor_name='entity',
paired='entity',
sample_name='entity',
threshold='entity',
control_group='entity',
method='entity',
conf_level='entity',
# OUTPUTS
fold_change='entity',
upper_ci='entity',
lower_ci='entity',
significant='entity'
),
prototype = list(name='Fold change',
description=paste0('Fold change is the relative change in mean (or ',
'non-parametric equivalent) intensities of a feature between all pairs of levels ',
'in a factor.'),
type="univariate",
predicted='fold_change',
citations=list(
bibentry(
bibtype='Article',
year=2020,
volume=45,
number=6,
pages='750-770',
author=as.person('Robert M. Price Jr and Douglas G. Bonett'),
title='Confidence Intervals for Ratios of Means and Medians',
journal='Journal of Educational and Behavioral Statistics'
)
),
# ontology="STATO:0000304",
.params=c('factor_name','sample_name','paired','threshold','control_group','method','conf_level'),
.outputs=c('fold_change','lower_ci','upper_ci','significant'),
factor_name=ents$factor_name,
sample_name=entity(name='Sample names',
type='character',
description='The name of a sample_meta column containing sample identifiers for paired sampling.'
),
paired=entity(name='Paired fold change',
value=FALSE,
type='logical',
description=c(
'TRUE' = 'Fold change is calculated taking into account paired sampling.',
'FALSE'= 'Fold change is calculated assuming there is no paired sampling.'),
),
fold_change=entity(name='Fold change',
type='data.frame',
description='The fold change between groups',
value=data.frame()
),
lower_ci=entity(name='Lower confidence interval',
type='data.frame',
description='Lower confidence interval for fold change',
value=data.frame()
),
upper_ci=entity(name='Upper confidence interval',
type='data.frame',
description='Upper confidence interval for fold change.',
value=data.frame()
),
method=enum(name='Fold change method',
type='character',
description=c(
'geometric' = paste0('A log transform is applied before using ',
'group means to calculate fold change. In the non-tranformed',
'space this is equivalent to using geometric group means. ',
'Confidence intervals for independant and paired sampling ',
'are estimated using standard error of the mean in log ',
'transformed space before being transformed back to the ',
'original space.'),
"median" = paste0('The group medians and the method described ',
'by Price and Bonett is used to ',
'estimate confidence intervals. For paired data standard ',
'error of the median is used to estimate confidence ',
'intervals from the median fold change of all pairs.'),
"mean" = paste0('The group means and the method described by ',
'Price and Bonnet is used to estimate confidence ',
'intervals. For paired data standard error of the mean is ',
'used to estimate confidence intervals from the mean ',
'fold change of all pairs.')
),
value="geometric",
allowed=c("geometric","median","mean")
),
control_group=entity(
name='Control group',
description = paste0('The level name of the group used in ',
'the denominator (where possible) when computing fold change.'
),
type='character',
max_length = 1,
value=character(0)
),
threshold=entity(
name='threshold',
description = paste0('The fold change threshold for labelling ',
'features as significant.'
),
type='numeric',
max_length = 1,
value=2
),
significant = entity(
name='Significant features',
description=paste0('A logical indictor of whether the calculated ',
'fold change including the estimated confidence limits is greater ',
'than the selected threshold.'),
type='data.frame',
value=data.frame()
),
conf_level = entity(
name = 'Confidence level',
description = 'The confidence level of the interval.',
type='numeric',
value=0.95,
max_length = 1
)
)
)
#' @export
#' @template model_apply
setMethod(f="model_apply",
signature=c("fold_change",'DatasetExperiment'),
definition=function(M,D)
{
# log transform
if (M$method=='geometric') {
# log transform
D$data=log2(D$data)
}
X=D$data
Y=D$sample_meta
# levels for factor of interest
L=levels(as.factor(Y[[M$factor_name]]))
L=rev(L)
# put control group last, if provided
if (length(M$control_group)>0) {
w=which(L==M$control_group)
if (length(w)>0) {
L=c(L[-w],L[w])
}
}
# number of pairwise comparisons
n=((length(L)^2)-length(L))/2
# prep some matrices
FC=matrix(NA,nrow=ncol(X),ncol=n)
rownames(FC)=colnames(D)
LCI=FC
UCI=FC
comp=character(0)
counter=1
D$sample_meta[[M$factor_name]]=factor(D$sample_meta[[M$factor_name]])
# for all pairs of groups
for (A in 1:(length(L)-1)) {
for (B in (A+1):(length(L))) {
# filter groups to A and B
FG=filter_smeta(factor_name=M$factor_name,mode='include',levels=L[c(A,B)])
FG=model_apply(FG,D)
# change to ordered factor so that we make use of control group
#FG$filtered$sample_meta[[M$factor_name]]=ordered(FG$filtered$sample_meta[[M$factor_name]],levels=L[c(A,B)])
if (M$method=='geometric') {
control_group = NULL
if (length(M$control_group)>0) {
if (L[B] == M$control_group) {
control_group = M$control_group
}
}
# apply t-test
TT=ttest(
alpha=0.05,
mtc='none',
factor_names=M$factor_name,
paired=M$paired,
paired_factor=M$sample_name,
conf_level=M$conf_level,
control_group=control_group)
TT=model_apply(TT,predicted(FG))
# log2(fold change) is the difference in estimate.mean from ttest
if (M$paired) {
fc=TT$estimates[,1]
} else {
fc=TT$estimates[,1]-TT$estimates[,2] # log2 fold change
}
FC[,counter]=fc
LCI[,counter]=TT$conf_int[,1]
UCI[,counter]=TT$conf_int[,2]
counter=counter+1
comp=c(comp,paste0(L[A],'/',L[B]))
} else if (M$method == 'median') {
D2 = predicted(FG)
# check for pairs in features
if (M$paired) {
FF=pairs_filter(
factor_name=M$factor_name,
sample_id=M$sample_name)
FF=model_apply(FF,D2)
D2=predicted(FF)
}
# calculate medians and confidence intervals for all features
out=lapply(D2$data,function(x) {
y1=x[D2$sample_meta[[M$factor_name]]==L[A]]
y2=x[D2$sample_meta[[M$factor_name]]==L[B]]
if (M$paired) {
# if one of pair is NA, set both to NA
y1[is.na(y2)]=NA
y2[is.na(y1)]=NA
}
if (all(is.na(y1)) | all(is.na(y2))) {
val=rep(NA,3) # report NA if a group is missing
} else {
val=ci_delta_nu(y1,y2,alpha=1-M$conf_level,paired=M$paired)
}
val=matrix(val,nrow=1,ncol=3)
colnames(val)=c('median','lci','uci')
return(val)
})
nmes=names(out)
out=do.call(rbind,out)
rownames(out)=nmes
# merge with original names
temp=merge(
FC[,counter,drop=FALSE],
out,
by = 'row.names',
all.x=TRUE,
)
rownames(temp)=temp$Row.names
# sort back into original order
temp=temp[rownames(FC),]
FC[,counter]=temp$median
LCI[,counter]=temp$lci
UCI[,counter]=temp$uci
counter=counter+1
comp=c(comp,paste0(L[A],'/',L[B]))
} else if (M$method == 'mean') {
D2 = predicted(FG)
# check for pairs in features
if (M$paired) {
FF=pairs_filter(
factor_name=M$factor_name,
sample_id=M$sample_name)
FF=model_apply(FF,D2)
D2=predicted(FF)
# calculate means and confidence intervals for all features
out=lapply(D2$data,function(x) {
y1=x[D2$sample_meta[[M$factor_name]]==L[A]]
y2=x[D2$sample_meta[[M$factor_name]]==L[B]]
if (M$paired) {
# if one of pair is NA, set both to NA
y1[is.na(y2)]=NA
y2[is.na(y1)]=NA
}
if (all(is.na(y1)) | all(is.na(y2))) {
ret=matrix(NA,nrow=1,ncol=3) # report NA if a group is missing
colnames(ret)=c('fold_change','lower_ci','upper_ci')
} else {
ret=ci.mean.paired(1-M$conf_level,y1,y2)
}
return(ret)
})
} else {
# calculate means and confidence intervals for all features
out=lapply(D2$data,function(x) {
y1=x[D2$sample_meta[[M$factor_name]]==L[A]]
y2=x[D2$sample_meta[[M$factor_name]]==L[B]]
if (all(is.na(y1)) | all(is.na(y2))) {
ret=matrix(NA,nrow=1,ncol=8) # report NA if a group is missing
colnames(ret)=c("Mean1", "Mean2", "df", " Mean1/Mean2", "LL", "UL", " Log-ratio", "SE")
} else {
ret=ci.mean.bs(1-M$conf_level,na.exclude(y1),na.exclude(y2))
}
return(ret)
})
}
nmes=names(out)
out=do.call(rbind,out)
rownames(out)=nmes
# merge with original names
temp=merge(
FC[,counter,drop=FALSE],
out,
by = 'row.names',
all.x=TRUE,
)
rownames(temp)=temp$Row.names
# sort back into original order
temp=temp[rownames(FC),]
if (M$paired) {
FC[,counter]=temp[,3]
LCI[,counter]=temp[,4]
UCI[,counter]=temp[,5]
} else {
FC[,counter]=temp[,6]
LCI[,counter]=temp[,7]
UCI[,counter]=temp[,8]
}
counter=counter+1
comp=c(comp,paste0(L[A],'/',L[B]))
}
}
}
colnames(FC)=comp
colnames(LCI)=comp
colnames(UCI)=comp
# store in object
if (M$method=='geometric') {
#store in object
M$fold_change=as.data.frame(2^FC)
M$lower_ci=as.data.frame(2^LCI)
M$upper_ci=as.data.frame(2^UCI)
} else {
M$fold_change = as.data.frame(FC)
M$lower_ci = as.data.frame(LCI)
M$upper_ci = as.data.frame(UCI)
}
check1 = M$lower_ci > M$threshold
check2 = M$upper_ci < 1/M$threshold
M$significant = data.frame(significant=check1 | check2)
colnames(M$significant)=comp
rownames(M$significant)=colnames(D)
return(M)
}
)
#' @eval get_description('fold_change_plot')
#' @export fold_change_plot
#' @include PCA_class.R
#' @examples
#' C = fold_change_plot()
fold_change_plot = function(number_features=20,orientation='portrait',...) {
out=struct::new_struct('fold_change_plot',
number_features=number_features,
orientation=orientation,
...)
return(out)
}
.fold_change_plot<-setClass(
"fold_change_plot",
contains='chart',
slots=c(
number_features='entity',
orientation='entity'),
prototype = list(name='Fold change plot',
description=paste0('A plot of fold changes calculated for a chosen ',
'subset of features. A predefined fold change threshold is indicated ',
'by shaded regions.'),
type="boxlot",
.params=c('number_features','orientation'),
number_features=entity(
name='Features to plot',
description = 'The number randomly selected features to plot, or
a list of column numbers.',
type='numeric',
value=10
),
orientation=enum(
name='Plot orientation',
description = c(
'landscape' = 'Features are plotted on the y-axis.',
'portrait' = 'Features are plotted on the x-axis.'
),
type='character',
value='portrait',
allowed=c('portrait','landscape'),
max_length=1
)
)
)
#' @export
#' @template chart_plot
setMethod(f="chart_plot",
signature=c("fold_change_plot",'fold_change'),
definition=function(obj,dobj)
{
# choose randomly,or use provided vector
if (length(obj$number_features)==1) {
S=sample.int(nrow(dobj$fold_change),size=obj$number_features)
} else {
S=obj$number_features
}
A=data.frame(
'fc'=dobj$fold_change[S,1],
'group'=colnames(dobj$fold_change)[1],
'lci'=dobj$lower_ci[S,1],
'uci'=dobj$upper_ci[S,1],
'feature'=rownames(dobj$fold_change)[S])
if (ncol(dobj$fold_change)>1) {
for (k in 2:ncol(dobj$fold_change)) {
B=data.frame(
'fc'=dobj$fold_change[S,k],
'group'=colnames(dobj$fold_change)[k],
'lci'=dobj$lower_ci[S,k],
'uci'=dobj$upper_ci[S,k],
'feature'=rownames(dobj$fold_change)[S])
A=rbind(A,B)
}
}
A[,c(1,3,4)]=log2(A[,c(1,3,4)])
A$x=1:nrow(A)
out=ggplot(data=A,aes(x=feature,y=fc,color=group)) +
geom_rect(xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=-log2(dobj$threshold),fill='#9EC086',inherit.aes = FALSE,alpha=0.02) +
geom_rect(xmin=-Inf,xmax=Inf,ymax=Inf,ymin=log2(dobj$threshold),fill='#9EC086',inherit.aes = FALSE,alpha=0.02) +
#geom_rect(xmin=-Inf,xmax=Inf,ymax=log2(M$threshold),ymin=-log2(M$threshold),fill='#B6B6B6',inherit.aes = FALSE,alpha=0.02) +
geom_pointrange(aes(ymin=lci,ymax=uci),position=position_dodge(width=0.75)) +
#geom_hline(yintercept = log2(dobj$threshold),color='red') +
#geom_hline(yintercept = -log2(dobj$threshold),color='red') +
xlab('Feature') +
ylab('log2(Fold change)')+
scale_colour_Publication() +
theme_Publication(base_size = 12)
if (obj$orientation=='landscape') {
out=out+coord_flip()
} else {
out=out+theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5))
}
return(out)
}
)
ci_delta_nu = function(y1,y2,alpha=0.05,paired=FALSE) {
if (!paired) {
out=ci.median.bs(alpha=alpha,y1=y1,y2=y2)
return(out)
} else {
if (length(y1) != length(y2)) {
stop('the same number of samples must be present in all groups for a paired comparison')
}
r = y1/y2
r=r[!is.na(r)]
mr=mean(r)
md=median(r)
s=sd(r)/sqrt(length(r)) # standard error of mean
z=qt(1-(alpha/2),length(r)-1)
sf=sqrt(pi/2)
out=t(c(md,md-(sf*z*s),md+(sf*z*s)))
colnames(out)=c('fold_change','lower_ci','upper_ci')
return(out)
}
}
ci.mean.bs <- function(alpha, y1, y2){
# from 10.3102/1076998620934125
# Compute confidence interval for a ratio of population means of ratio-scale
# measurements in a design with a 2-level between-subjects factor.
# Arguments:
# alpha: alpha level for 1-alpha confidence
# y1 n1 x 1 vector of scores for group 1
# y2: n2 x 1 vector of scores for group 2
# Returns:
# confidence interval
y1=y1[!is.na(y1)]
y2=y2[!is.na(y2)]
n1 <- length(y1)
n2 <- length(y2)
m1 <- mean(y1)
m2 <- mean(y2)
v1 <- var(y1)
v2 <- var(y2)
var <- v1/(n1*m1^2) + v2/(n2*m2^2)
df <- var^2/(v1^2/(m1^4*(n1^3 - n1^2)) + v2^2/(m2^4*(n2^3 - n2^2)))
t <- qt(1 - alpha/2, df)
est <- log(m1/m2)
se <- sqrt(var)
ll <- exp(est - t*se)
ul <- exp(est + t*se)
out <- t(c(m1, m2, df, exp(est), ll, ul, est, se))
colnames(out) <- c("Mean1", "Mean2", "df", " Mean1/Mean2", "LL", "UL", " Log-ratio", "SE")
return(out)
}
ci.mean.paired = function(alpha,x,y) {
r = x/y
r=r[!is.na(r)]
mr=mean(r)
s=sd(r)/sqrt(length(r)) # standard error of mean
z=qt(1-(alpha/2),length(r)-1)
out=t(c(mr,mr-(z*s),mr+z*s))
colnames(out)=c('fold_change','lower_ci','upper_ci')
return(out)
}
ci.median.bs <- function(alpha, y1, y2) {
# from 10.3102/1076998620934125
# Computes confidence interval for a ratio of population medians of ratio-scale
# measurements in a design with a 2-level between-subjects factor.
# Arguments:
# alpha: alpha level for 1-alpha confidence
# y1: n1 x 1 vector of scores for group 1
# y2: n2 x 1 vector of scores for group 2
# Returns:
# confidence interval
y1=y1[!is.na(y1)]
y2=y2[!is.na(y2)]
z <- qnorm(1 - alpha/2)
n1 <- length(y1)
y1 <- sort(y1)
n2 <- length(y2)
y2 <- sort(y2)
med1 <- median(y1)
med2 <- median(y2)
o1 <- round(n1/2 - sqrt(n1))
if (o1 < 1) {o1 = 1}
o2 <- n1 - o1 + 1
l1 <- log(y1[o1])
u1 <- log(y1[o2])
p <- pbinom(o1 - 1, size = n1, prob = .5)
z0 <- qnorm(1 - p)
se1 <- (u1 - l1)/(2*z0)
o1 <- round(n2/2 - sqrt(n2))
if (o1 < 1) {o1 = 1}
o2 <- n2 - o1 + 1
l2 <- log(y2[o1])
u2 <- log(y2[o2])
p <- pbinom(o1 - 1, size = n2, prob = .5)
z0 <- qnorm(1 - p)
se2 <- (u2 - l2)/(2*z0)
se <- sqrt(se1^2 + se2^2)
logratio <- log(med1/med2)
ll <- exp(logratio - z*se)
ul <- exp(logratio + z*se)
out <- t(c(exp(logratio), ll, ul))
colnames(out) <- c("fold_change", "LL", "UL")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.