###############################################################################
#
# Utility routines
#
# created 2012-10-11
#
# Author: Pavlo Lutsik
###############################################################################
#
# rmse
#
# Root mean square error
#
# D data matrix
# T recovered profiles
# A recovered proportions
#
#
rmse<-function(D,T,A){
sqrt(sum((D-T%*%A)^2)/nrow(D)/ncol(D))
}
###############################################################################
# logLik2
#
# Calculate log-likelihood of a fitted linear regression model
#
# @param vector of the model residuals
# @return log-likelihood value
#
# @author Pavlo Lutsik
logLik2<-function(res){
N<-length(res)
val <- 0.5 * (- N * (log(2 * pi) + 1 - log(N) + log(sum(res^2))))
return(val)
}
###############################################################################
#wilcoxTestTwins<-function(betas, twin1, twin2){
#
# deltas<-betas[,twin1]-betas[,twin2]
# wilcox<-apply(deltas, 1, wilcox.test)
# results<-data.frame(meanDeltas=rowMeans(deltas), pval=sapply(wilcox, simplify=T, function(t) return(t$p.value)))
#
#}
#
###############################################################################
#tTestTwins<-function(betas, twin1, twin2){
#
# deltas<-betas[,twin1]-betas[,twin2]
# tt<-apply(deltas, 1, t.test)
# results<-data.frame(meanDeltas=rowMeans(deltas), pval=sapply(tt, simplify=T, function(t) return(t$p.value)))
#
#}
###############################################################################
# fetch.annotations
#
# collects annotations for all probe ids in rownames(dataset) from IlluminaHumanMethylation450k.db.
# @author Pavlo Lutsik
# @param dataset a matrix/data.frame object all row names of which are valid Infinium probe ids (cgXXXXXXXX).
# @param tables a character vector of IlluminaHumanMethylation450k.db tables. If NULL all existing annotations are fetched
# @return a data.frame object with annotations
# @seealso \code{\link{IlluminaHumanMethylation450k.db}}
# @export
fetch.annotations<-function(dataset, ids, tables=NULL, verbose=F)
{
require(IlluminaHumanMethylation450k.db)
if(is.null(tables)){
all.tables<-ls("package:IlluminaHumanMethylation450k.db")
meta.tables<-all.tables[c(14:65)]
}else{
meta.tables<-tables
}
#metaTables<-allTables[grep("IlluminaHumanMethylation450k[A-Z]", allTables, perl=T)]
if(!missing(dataset)) ids<-rownames(dataset)
all.results<-data.frame(ID=ids)
cn<-colnames(all.results)
for (table.name in meta.tables) {
# table2 <- get(table.name)
do.call("<-", list(as.name("table2"), as.name(table.name)))
if(!class(table2) %in% c("function","integer") ) {
if(length(table2)>1){
if(verbose) print(paste("Extracting data from:", table.name))
present<-intersect(ids,keys(table2))
result<-data.frame(ID=present, V=as.character(as.list(table2[present])))
cn<-c(cn,strsplit(table.name,"IlluminaHumanMethylation450k")[[1]][2])
all.results<-merge(all.results, result, by="ID", all.x=T, sort=F)
colnames(all.results)<-cn
}
}
}
return(all.results[match(ids,all.results$ID),])
}
###############################################################################
fetch.by.interval<-function(chromosome, start, end, strand="*", type="probes450"){
if(!type %in% c("probes450", "hg19", "mm9")){
stop("unsupported annotation type")
}
ann<-rnb.get.annotation(type)
ann<-ann[match(chromosome,names(ann))]
interval<-GRanges(chromosome,IRanges(start=start, end=end), strand=strand)
olap<-findOverlaps(ann[[1]], interval)
return(rownames(rnb.annotation2data.frame(ann))[queryHits(olap)])
}
###############################################################################
combin<-function(V,k){
#
# if(k>1){
# vv<-rep(V,k)
# Posss<-apply(combn(length(vv),k),2,function(idx) vv[idx])
# Posss<-Posss[,!duplicated(t(Posss)), drop=FALSE]
# }else{
# return(matrix(V,ncol=length(V)))
# }
#
# print("##################### DIMENSIONS ####################")
# print(dim(Posss))
# return(Posss)
t(expand.grid(list(V)[rep(1,k)]))
}
###############################################################################
randsplxmat<-function(m,n){
U = -log(matrix(runif(m*n), nrow=m))
S = colSums(U) # probably only 1 row, so specify 1 explicitly
dump<-sapply(1:n, function(j) U[,j] <<- U[,j]/S[j])
return(U)
}
###############################################################################
projectV<-function(TP,V){
TP_proj<-apply(TP, 2, function(col){
dist<-abs(repmat(matrix(V, ncol=length(V)), n=length(col), m=1)-col)
proj<-V[apply(dist,1,which.min)]
})
TP_proj
}
###############################################################################
opt.factorize.exact<-function(affine=TRUE,
nonnegative=TRUE,
aggr="no",
replace=1L,
nsamples=0L,
naggsets=5L,
chunksize=10L,
verbose=FALSE,
varargin=NULL){
options<-list(
affine=affine,
nonnegative=nonnegative,
aggr=aggr,
replace=replace,
nsamples=nsamples,
naggsets=naggsets,
chunksize=chunksize,
verbose=verbose)
if(is.null(varargin)){
return(options)
}else{
print("not supported yet")
return(options)
}
}
###############################################################################
licols<-function(X,tol=1e-10){
if(all(X==0)){ #X has no non-zeros and hence no independent columns
return(list(r<-integer(), Xsubs=matrix(0), idx<-integer()))
}
colnames(X)<-as.character(1:ncol(X))
qr.res<-qr(X, tol = tol, LAPACK=T)
Q<-qr.Q(qr.res)
R<-qr.R(qr.res)
E<-match(colnames(R), colnames(X))
diagr <- abs(diag(R));
r <-length(which(diagr >=tol*diagr[1])) #Rank estimation
idx<-sort(E[1:r])
Xsub<-X[,idx]
return(list(r=r, idx=idx, Xsub=Xsub))
}
adjust.nmf.coefs<-function(coefs){
t(t(coefs)/colSums(coefs))
}
###############################################################################
#
# Down-rank SNP-affected probes using a one-dimentional k-means
# with k=3
#
rank.snp<-function(D){
require(Ckmeans.1d.dp)
R3 <- numeric(nrow(D))
for(i in 1:nrow(D)){
res <- Ckmeans.1d.dp(D[i,], 3)
R3[i] <- sum(res$withinss)
}
R3
}
###############################################################################
get.cpg.intervals<-function(cpg.coords, ids=NULL, offset=50){
#if(annot.full<-rnb.get.annotation("probes450")
intervals<-GRanges(seq=cpg.coords$Chromosome,
IRanges(start=cpg.coords$Start, width=1),
strand=rep("*", nrow(cpg.coords)))
intervals<-flank(intervals, width=offset, both=TRUE)
intervals<-reduce(intervals)
as.data.frame(intervals)
}
###############################################################################
#
# generateExample
#
# Examples for testing factorization methods
#
# m number of genomic features
# n number of profiles
# k hidden dimension
# t.method method used to generate matrix T: "integer", "uniform" or "beta"
# a.method method used to generate matrix A: "uniform" or "dirichlet"
# V a vector of possible values for \cs{method} integer or the upper and lower
# bounds for the \cs{method} uniform
# beta1 first beta-distribution parameter for \cs{t.method} beta
# beta2 second beta-distribution parameter for \cs{t.method} beta
# proportion.prior numeric vector of length \code{r}
# noise.sd a standard deviation for additive Gaussian noize
# digits desired precision
#
# return \cs{list} with elements \cs{D}, \cs{T} and \cs{A}
#
#
generateExample<-function(
m,
n,
k,
t.method="beta",
a.method="dirichlet",
e.method="gaussian",
V=c(0,1),
beta1=0.5,
beta2=0.5,
proportion.prior=NULL,
proportion.var.facror=1,
Alower=rep(0, k),
Aupper=rep(1, k),
noise.sd=0,
digits=12
){
if(t.method=="integer"){
Tt<-matrix(Inf,ncol=k, nrow=m)
ri<-1
it<-0
while(any(Tt==Inf) && it<100){
it<-it+1
vertex<-V[sample.int(length(V), m, replace=T)]
if(all(colSums(abs(Tt-vertex))!=0)){
Tt[1:m,ri]<-vertex
ri<-ri+1
}
}
}else if(t.method=="uniform"){
Tt<-matrix(runif(m*k, min=min(V), max=max(V)), ncol=k)
}else if(t.method=="beta"){
Tt<-matrix(rbeta(m*k, shape1=beta1, shape2=beta2), ncol=k)
}else{
stop("this method for generating T is not implemented")
}
if(a.method=="uniform"){
if(max(Alower)==0 && min(Aupper)==1){
A<-matrix(-log(runif(k*n)), ncol=n, nrow=k)
A<-t(t(A)/colSums(A))
A[1,]<-A[1,]+(rep(1,n)-colSums(A))
}else{
A <- t(sapply(1:k, function(pri){
-log(runif(n, min=Alower[pri], max=Aupper[pri]))
}))
A<-t(t(A)/colSums(A))
A[1,]<-A[1,]+(rep(1,n)-colSums(A))
# for(kk in 1:n){
# A[,kk] <- RProjSplxBox(A[,kk,drop=FALSE], Alower, Aupper);
#
}
}else if(a.method=="dirichlet"){
if(is.null(proportion.prior)){
proportion.prior<-rep(1/k,k)
}
A<-t(rdirichlet(n, proportion.prior*proportion.var.facror))
}else{
stop("this method for generating A is not implemented")
}
if(e.method=="gaussian"){
if(noise.sd>0){
E=matrix(rnorm(m*n, sd=noise.sd), ncol=n)
}else{
E=matrix(0, nrow=m, ncol=n)
}
}else{
stop("this method for generating additive noise is not implemented")
}
# get the data matrix
Tt<-round(Tt, digits=digits)
A<-round(A, digits=digits)
E<-round(E, digits=digits)
D<-Tt%*%A+E
D[D<min(V)]<-min(V)
D[D>max(V)]<-max(V)
return(list("T"=Tt,"A"=A,"D"=D, "E"=E))
}
###############################################################################
RMSE_T<-function(That, Tstar, perm){
if(length(unique(perm))==length(perm)){
rmseT<-sqrt(sum((That-Tstar[,perm])^2)/ncol(Tstar)/nrow(Tstar))
}else{
rmseT<-sqrt(mean((
sapply(unique(perm), function(comp){
abs(Tstar[,comp]-rowMeans(That[,perm==comp,drop=FALSE]))
}))^2))
}
rmseT
}
###############################################################################
MAE_A<-function(Ahat, Astar, perm){
if(length(unique(perm))==length(perm)){
maeA<-sum(abs(Ahat-Astar[perm,]))/ncol(Astar)/nrow(Astar)
}else{
# maeA<-sum(abs(
# sapply(unique(perm), function(comp){
# abs(Astar[comp,,drop=FALSE]/colSums(Astar[unique(perm),,drop=FALSE])-colSums(Ahat[perm==comp,,drop=FALSE]))
# })))/ncol(Astar)/nrow(Astar)
aggrAhat<-t(sapply(unique(perm), function(comp){
colSums(Ahat[perm==comp,,drop=FALSE])
}))
aggrAstar<-sweep(Astar[unique(perm),,drop=FALSE], 2, colSums(Astar[unique(perm),,drop=FALSE]), "/")
maeA<-sum(abs(aggrAhat-aggrAstar))/nrow(aggrAstar)/ncol(aggrAstar)
}
maeA
}
###############################################################################
estimate.accuracy<-function(fr, trueT, trueA, check=FALSE){
perm<-MeDeCom:::match.components(fr$T, trueT, check=check)
if(!is.null(perm)){
if(length(unique(perm))==length(perm)){
rmseT<-sqrt(sum((trueT-fr$T[,perm])^2)/ncol(trueT)/nrow(trueT))
maeA<-sum(abs(trueA-fr$A[perm,]))/ncol(trueA)/nrow(trueA)
}else{
rmseT<-sqrt(sum((
sapply(unique(perm), function(comp){
abs(trueT[,comp]-rowMeans(fr$T[,perm==comp,drop=FALSE]))
}))^2)/ncol(trueT)/nrow(trueT))
maeA<-sum(
sapply(unique(perm), function(comp){
abs(trueA[comp,]-colMeans(fr$A[perm==comp,,drop=FALSE]))
}))/ncol(trueA)/nrow(trueA)
}
}else{
rmseT<-NA
maeA<-NA
}
return(list(rmseT=rmseT, maeA=maeA))
}
###############################################################################
get.distance.matrix<-function(mdd, measure, centered=FALSE){
if(centered){
mdd<-lapply(mdd, function(mm) sweep(mm, 1, rowMeans(mm)))
}
mdd<-do.call("cbind", mdd)
if(measure=="euclidean"){
d <- dist(t(mdd))
}else if(measure=="angular"){
dm<-matrix(NA, ncol=ncol(mdd), nrow=ncol(mdd))
colnames(dm)<-colnames(mdd)
rownames(dm)<-colnames(mdd)
for(ri in 1:ncol(mdd)){
for(ci in 1:ncol(mdd)){
dm[ri,ci]<-sum(mdd[,ci]*mdd[,ri])/sqrt(sum(mdd[,ci]^2))/sqrt(sum(mdd[,ri]^2))
}
}
d <- as.dist(1-dm)
}else if(measure=="correlation"){
d <- as.dist(1-cor(mdd, method="pearson"))
}
}
#####
RQuadHC_dummy<-function(G, W, Tk, tol, lower, upper){
outp<-Tk+matrix(runif(ncol(Tk)*nrow(Tk))*tol*10, nrow(Tk),ncol(Tk))
outp[outp>1]<-1;
dummy_loss<-sqrt(mean((Tk-outp)^2))
return(list(outp, dummy_loss))
}
### END
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.