R/mg14.R

Defines functions sig2star donut entropy plotcvnet rotatedLabel myscale violinJitter violinJitterPlot ggPlot MoscowPlot lasso colTrans jitterBox mergeLevels na.zero na.mean plotSurv splitSurv splitIndex makeTimeDependent pbound human numericalize .numericalize asum nn.poisglm mindist poisl poisdl poisI ssnmf nmSolve roundProp scatterpie

Documented in colTrans donut entropy mergeLevels na.mean na.zero numericalize rotatedLabel sig2star violinJitter violinJitterPlot

# R convenience functions
# 
# Author: mg14
###############################################################################

#' Function to transform P-values into symbols of significance (***)
#' @param s A vector of P-values
#' @param breaks The breaks
#' @param labels The labels
#' @return A character vector.
#' 
#' @author mg14
#' @export
sig2star = function(s, breaks=c(-Inf, 0.001,0.01,0.05,0.1,1), labels=c("***","**","*",".","")) {
	r <- s
	r[] <- as.character(cut(s, breaks=breaks, labels)) 
	r[is.na(r)] <- ""
	r
}

#' Plot a donut
donut <- function(x, r0=0.5, r1=.75,...) {
	pie(x, border=NA, labels = paste(names(x), " (",round(100*x),"%)", sep=""), radius=r1, ...)
	polygon(cos(seq(0,2*pi,l=100))*r0, sin(seq(0,2*pi,l=100))*r0, col="white", border=NA)
}

#' Compute entropy (base e) from a vector of probabilities
#' 
#' Given p, the entropy is computed as -sum(log(p^p)).
#' @param p Vector of probabilities
#' @return numeric 
#' 
#' @author mg14
#' @export
entropy <- function(p) {
	stopifnot(abs(sum(p)-1) < length(p)*.Machine$double.eps)
	-sum(log(p^p))
}


require(RColorBrewer)
set1 <- brewer.pal(9,"Set1")

plotcvnet = function(cvnet,X, main="", simple.annot=NULL, col=set1, col0="grey", cex=sqrt(abs(colMeans(X, na.rm=TRUE))[n]+0.02)*5,xlim=c(0.5,ncol(X)),...){
	first=function(x) which(x)[1]
	lollyplot <- function(cvnet, beta, ..., X, main, cex) {
		r <- rank(cvnet$nzero[apply(beta!=0,1,first)], ties.method="first")
		R <- 1 - cvnet$cvm/cvnet$cvm[1]
		Rup <- 1 - cvnet$cvlo/cvnet$cvm[1]
		Rlo <- 1 - cvnet$cvup/cvnet$cvm[1]
		w <- apply(beta!=0,1,first)
		beta <- beta[,which.min(cvnet$cvm)]
		#n = names(beta)
		#ix <- which(c(diff(cvnet$nzero)>0, TRUE) & cvnet$nzero > 0)
		#xx <- c(0,cvnet$nzero[ix]) + 0.5
		
		ix <- na.omit(w)
		xx <- rank(	ix, ties.method="first") + 0.5
		o <- order(xx)
		ix <- ix[o]
		ix <- c(ix[1],ix)
		xx <- c(0.5,xx[o])
		#ix = c(ix[1],ix)
		plot(xx, R[ix], type="S", xaxt="n", xlab="", ylim=c(0,max(Rup)), ylab=expression(paste("Explained variance ",R^2)), main = main, xlim=xlim, col=set1[1],...)
		polygon(rep(c(xx,rev(xx)), each=2)[-2*length(ix) + c(0,-1)], rep(c(Rup[ix],rev(Rlo[ix])), each=2)[-c(1,4*length(ix))], border=NA, col = paste(set1[1],"22", sep=""))		
		lines(xx, cvnet$lambda[ix]/max(cvnet$lambda)*max(R), lty=1, type="S", col="black")
		u = par("usr")[3:4]
		#mtext(colnames(oncocyto), at=r, side=1, las=2, font=ifelse(grepl("[A-Z]", colnames(oncocyto)),3,1), cex=.7, line=0.5)
		n <- colnames(X)#names(ix)[-1]
		colLab <- ifelse(n %in% names(simple.annot), col[simple.annot[n]], "black")
		colLab[w > which.max(R) | is.na(w)] <- "grey"
		rotatedLabel(r, rep(0,length(r)), colnames(X), font=ifelse(grepl("^[A-Z]", n) &! grepl("PC", n),3,1), cex=0.66, col=colLab)
		scale <- diff(range(beta, na.rm=TRUE))*1.05
		if(scale ==0) scale=1
		shift <- mean(range(beta, na.rm=TRUE))#median(c, na.rm=TRUE)
		#lines(r,(beta - shift)/scale*diff(u)*.7 + mean(u), col="grey", type="h", lty=1)
		abline(h=(0 - shift)/scale*diff(u)*.7 + mean(u), col="grey", lty=2)
		points(r,(beta - shift)/scale*diff(u)*.7 + mean(u), col=colLab, cex=cex, pch=ifelse(beta==0,1,19))
		axis(4, labels=pretty(beta),at = (pretty(beta) - shift)/scale*diff(u)*.7 + mean(u), col="darkgrey", col.axis="darkgrey")
		mtext(side=4, line=3, "Coefficient", col="darkgrey", at=mean(u), las=0)
	}
	if(class(cvnet$glmnet.fit)[1] != "multnet"){
		#r <- rank(cvnet$nzero[apply(cvnet$glmnet.fit$beta!=0,1,first)], ties.method="first")
		#w = which.min(cvnet$cvm)
		#beta = cvnet$glmnet.fit$beta[,w]
		#c = c[c!=0]
		lollyplot(cvnet = cvnet, beta=cvnet$glmnet.fit$beta, ... = ..., X = X,   main=main, cex=cex)
	}else {j = 1; for(beta in cvnet$glmnet.fit$beta){
			#w = which.min(cvnet$cvm)
			#beta = beta[,w]
			#c = c[c!=0]
			#r <- rank(cvnet$nzero[apply(beta!=0,1,first)], ties.method="first")
			lollyplot(cvnet = cvnet, beta=beta, ... = ..., X = X,  main=paste(main, ": ", names(cvnet$glmnet.fit$beta)[j], sep=""), cex=cex)
			j = j+1
		}
	}
}

#' Plot a rotated axis label
#' @param x0 The x position
#' @param y0 The y position
#' @param labels The labels
#' @param pos The positioning relative to the coordinate. Default = 1 (below).
#' @param cex The character expansion factor. Default = 1.
#' @param srt The degree of rotation. Default = 45.
#' @param ... 
#' @return NULL
#' 
#' @author mg14
#' @export
rotatedLabel <- function(x0 = seq_along(labels), y0 = rep(par("usr")[3], length(labels)), labels, pos = 1, cex=1, srt=45, ...) {
	w <- strwidth(labels, units="user", cex=cex)
	h <- strheight(labels, units="user",cex=cex)
	u <- par('usr')
	p <- par('plt')
	f <- par("fin")
	xpd <- par("xpd")
	par(xpd=NA)
	text(x=x0 + ifelse(pos==1, -1,1) * w/2*cos(srt/360*2*base::pi), y = y0 + ifelse(pos==1, -1,1) * w/2 *sin(srt/360*2*base::pi) * (u[4]-u[3])/(u[2]-u[1]) / (p[4]-p[3]) * (p[2]-p[1])* f[1]/f[2] , labels, las=2, cex=cex, pos=pos, adj=1, srt=srt,...)
	par(xpd=xpd)
}


myscale <- function(data){
	apply(data,
			2,
			function(x){
				if(all(x %in% c(0,1,NA))) # Don't scale categorical variables, only re-center
					#x
					scale(x, scale=FALSE)
				else
					scale(x)
				#rank(x) / sum(!is.na(x)) - 0.5
			}
	)
}

#' Jitter by violins
#' @param x A vector of numbers
#' @param magnitude The overall magnitude of scatter
#' @return NULL
#' 
#' @author mg14
#' @export
violinJitter <- function(x, magnitude=1){
	d <- density(x)
	data.frame(x=x, y=runif(length(x),-magnitude/2, magnitude/2) * approxfun(d$x, d$y)(x))
}


#' Violins plot with jittered data points
#' @param y the values to be plotted
#' @param x the levels to be jittered
#' @param col the colors to be used for the outline
#' @param col.pty the colors used for the points
#' @param magnitude the magnitude of the violins
#' @param ... additional parameters passed to plot()
#' @return NULL
#' 
#' @author mg14
#' @examples 
#' f <- rep(1:5, each=50)
#' violinJitterPlot(rnorm(length(f), f), factor(f))
#' @export
violinJitterPlot <- function(y, x=factor(rep(1, length(y))), col=1:nlevels(x), col.pty = colTrans(col), magnitude=1,xlab="",ylab="", plot.violins=TRUE,...){
	yl <- split(y,x)
	o <- order(x)
	yj <- do.call("rbind",lapply(yl, violinJitter, magnitude = magnitude))
	dt <- lapply(yl, density)
	s <- max(sapply(dt, function(x) max(x$y)))
	plot(na.omit(as.numeric(x)[o]) + yj$y/s, yj$x, col=col.pty[na.omit(as.numeric(x)[o])], xlab=xlab, ylab=ylab, xaxt="n",...)
	axis(side=1, at=1:nlevels(x), labels=levels(x))
	qt <- lapply(yl, quantile)
	if(plot.violins)
		for(i in 1:length(dt)){
			lines(i+dt[[i]]$y*magnitude/2/s, dt[[i]]$x, col=col[i])
			lines(i-dt[[i]]$y*magnitude/2/s, dt[[i]]$x, col=col[i])
			for(q in 2:4){
				w <- which.min(abs(dt[[i]]$x-qt[[i]][q]))
				segments(i+dt[[i]]$y[w]*magnitude/2/s, dt[[i]]$x[w],i-dt[[i]]$y[w]*magnitude/2/s, dt[[i]]$x[w], lwd=ifelse(q==3,2,1))
			}
		}
}

#' Stars plot
#' 
#' Modified from graphics::stars()
#' @param x data.frame or matrix to plot
#' @return NULL
#' 
#' @author mg14
stars <- function (x, full = TRUE, scale = TRUE, radius = TRUE, labels = dimnames(x)[[1L]], 
		locations = NULL, nrow = NULL, ncol = NULL, len = 1, key.loc = NULL, 
		key.labels = dimnames(x)[[2L]], key.xpd = TRUE, xlim = NULL, 
		ylim = NULL, flip.labels = NULL, draw.segments = FALSE, col.segments = 1L:n.seg, 
		col.stars = NA, col.lines = NA, axes = FALSE, frame.plot = axes, 
		main = NULL, sub = NULL, xlab = "", ylab = "", cex = 0.8, 
		lwd = 0.25, lty = par("lty"), xpd = FALSE, mar = pmin(par("mar"), 
				1.1 + c(2 * axes + (xlab != ""), 2 * axes + (ylab != ""), 1, 0)), add = FALSE, plot = TRUE, density=NA, init.angle=0, ...) 
{
	if (is.data.frame(x)) 
		x <- data.matrix(x)
	else if (!is.matrix(x)) 
		stop("'x' must be a matrix or a data frame")
	if (!is.numeric(x)) 
		stop("data in 'x' must be numeric")
	n.loc <- nrow(x)
	n.seg <- ncol(x)
	if (is.null(locations)) {
		if (is.null(nrow)) 
			nrow <- ceiling(if (!is.numeric(ncol)) sqrt(n.loc) else n.loc/ncol)
		if (is.null(ncol)) 
			ncol <- ceiling(n.loc/nrow)
		if (nrow * ncol < n.loc) 
			stop("'nrow * ncol' is less than the number of observations")
		ff <- if (!is.null(labels)) 
					2.3
				else 2.1
		locations <- expand.grid(ff * 1L:ncol, ff * nrow:1)[1L:n.loc, 
		]
		if (!is.null(labels) && (missing(flip.labels) || !is.logical(flip.labels))) 
			flip.labels <- ncol * mean(nchar(labels, type = "c")) > 
					30
	}
	else {
		if (is.numeric(locations) && length(locations) == 2) {
			locations <- cbind(rep.int(locations[1L], n.loc), 
					rep.int(locations[2L], n.loc))
			if (!missing(labels) && n.loc > 1) 
				warning("labels do not make sense for a single location")
			else labels <- NULL
		}
		else {
			if (is.data.frame(locations)) 
				locations <- data.matrix(locations)
			if (!is.matrix(locations) || ncol(locations) != 2) 
				stop("'locations' must be a 2-column matrix.")
			if (n.loc != nrow(locations)) 
				stop("number of rows of 'locations' and 'x' must be equal.")
		}
		if (missing(flip.labels) || !is.logical(flip.labels)) 
			flip.labels <- FALSE
	}
	xloc <- locations[, 1]
	yloc <- locations[, 2]
	angles <- if (full) 
				seq.int(0, 2 * pi, length.out = n.seg + 1)[-(n.seg + 
									1)] + init.angle
			else if (draw.segments) 
				seq.int(0, pi, length.out = n.seg + 1)[-(n.seg + 1)] + init.angle
			else seq.int(0, pi, length.out = n.seg)
	if (length(angles) != n.seg) 
		stop("length of 'angles' must equal 'ncol(x)'")
	if (scale) {
		x <- apply(x, 2L, function(x) (x - min(x, na.rm = TRUE))/diff(range(x, 
									na.rm = TRUE)))
	}
	x[is.na(x)] <- 0
	mx <- max(x <- x * len)
	if (is.null(xlim)) 
		xlim <- range(xloc) + c(-mx, mx)
	if (is.null(ylim)) 
		ylim <- range(yloc) + c(-mx, mx)
	deg <- pi/180
	op <- par(mar = mar, xpd = xpd)
	on.exit(par(op))
	dev.hold()
	on.exit(dev.flush(), add = TRUE)
	if (plot && !add) 
		plot(0, type = "n", ..., xlim = xlim, ylim = ylim, main = main, 
				sub = sub, xlab = xlab, ylab = ylab, asp = 1, axes = axes)
	if (!plot) 
		return(locations)
	s.x <- xloc + x * rep.int(cos(angles), rep.int(n.loc, n.seg))
	s.y <- yloc + x * rep.int(sin(angles), rep.int(n.loc, n.seg))
	if (draw.segments) {
		aangl <- c(angles, if (full) 2 * pi + init.angle else pi+init.angle)
		for (i in 1L:n.loc) {
			px <- py <- numeric()
			for (j in 1L:n.seg) {
				k <- seq.int(from = aangl[j], to = aangl[j + 
										1], by = 1 * deg)
				px <- c(px, xloc[i], s.x[i, j], x[i, j] * cos(k) + 
								xloc[i], NA)
				py <- c(py, yloc[i], s.y[i, j], x[i, j] * sin(k) + 
								yloc[i], NA)
			}
			polygon(px, py, col = col.segments, lwd = lwd, lty = lty, border=col.lines, density=density)
		}
	}
	else {
		for (i in 1L:n.loc) {
			polygon(s.x[i, ], s.y[i, ], lwd = lwd, lty = lty, 
					border = col.lines[i], col = col.stars[i], density=density[i])
			if (radius) 
				segments(rep.int(xloc[i], n.seg), rep.int(yloc[i], 
								n.seg), s.x[i, ], s.y[i, ], lwd = lwd, lty = lty)
		}
	}
	if (!is.null(labels)) {
		y.off <- mx * (if (full) 
						1
					else 0.1)
		if (flip.labels) 
			y.off <- y.off + cex * par("cxy")[2L] * ((1L:n.loc)%%2 - 
						if (full) 
							0.4
						else 0)
		text(xloc, yloc - y.off, labels, cex = cex, adj = c(0.5, 
						1))
	}
	if (!is.null(key.loc)) {
		par(xpd = key.xpd)
		key.x <- len * cos(angles) + key.loc[1L]
		key.y <- len * sin(angles) + key.loc[2L]
		if (draw.segments) {
			px <- py <- numeric()
			for (j in 1L:n.seg) {
				k <- seq.int(from = aangl[j], to = aangl[j + 
										1], by = 1 * deg)
				px <- c(px, key.loc[1L], key.x[j], len * cos(k) + 
								key.loc[1L], NA)
				py <- c(py, key.loc[2L], key.y[j], len * sin(k) + 
								key.loc[2L], NA)
			}
			polygon(px, py, col = col.segments, lwd = lwd, lty = lty, border=col.lines, density=density)
		}
		else {
			polygon(key.x, key.y, lwd = lwd, lty = lty)
			if (radius) 
				segments(rep.int(key.loc[1L], n.seg), rep.int(key.loc[2L], 
								n.seg), key.x, key.y, lwd = lwd, lty = lty)
		}
		lab.angl <- angles + if (draw.segments) 
					(angles[2L] - angles[1L])/2
				else 0
		label.x <- 1.1 * len * cos(lab.angl) + key.loc[1L]
		label.y <- 1.1 * len * sin(lab.angl) + key.loc[2L]
		for (k in 1L:n.seg) {
			text.adj <- c(if (lab.angl[k] < 90 * deg || lab.angl[k] > 
									270 * deg) 0 else if (lab.angl[k] > 90 * deg && 
									lab.angl[k] < 270 * deg) 1 else 0.5, if (lab.angl[k] <= 
									90 * deg) (1 - lab.angl[k]/(90 * deg))/2 else if (lab.angl[k] <= 
									270 * deg) (lab.angl[k] - 90 * deg)/(180 * deg) else 1 - 
										(lab.angl[k] - 270 * deg)/(180 * deg))
			text(label.x[k], label.y[k], labels = key.labels[k], 
					cex = cex, adj = text.adj)
		}
	}
	if (frame.plot) 
		box(...)
	invisible(locations)
}


ggPlot <- function(x, g, width=.8, xlab="", ylab="",...){
	g <- factor(g)
	s <- split(x, g)
	o <- order(sapply(s[levels(g)], function(x) if(!all(is.na(x))) median(x, na.rm=TRUE) else NA), na.last=TRUE)
	g <- factor(g, levels=levels(g)[o])
	s <- split(x,g)
	plot(unlist(sapply(s[levels(g)], function(x) (rank(x)-1)/(length(x)-1) - .5)) * width + sort(as.numeric(g)), unlist(s), xaxt="n", xlab=xlab, ylab=ylab, ...)
	m <- sapply(s, median, na.rm=TRUE)
	segments(1:nlevels(g)-width/2, m, 1:nlevels(g)+width/2, m)
	abline(v=1:nlevels(g), lty=3)
	axis(side=1, at=1:nlevels(g), labels = levels(g))
}

MoscowPlot <- function(x, sd, col=1:length(x), at = seq_along(x), width = 0.9, ...){
	y <- seq(0,max(x + 3*sd), l=100)
	plot(NA,NA, xlim=c(0,length(x)+1), ylim=range(y), xaxt="n", ...)
	for(i in seq_along(x)){
		xx <- width/2 * pnorm(y, x[i], sd[i], lower.tail = FALSE)
		polygon(i +c(-xx, rev(xx)), c(y, rev(y)) , col=col[i], border=NA)
	}
}

lasso <- function(..., theta=1,  scale=TRUE) {
	x <- cbind(...)
	nvar <- ncol(x)
	xname <- as.character(parse(text=substitute(cbind(...))))[-1]
	vars <- apply(x, 2, function(z) var(z[!is.na(z)]))
	class(x) <- 'coxph.penalty'
	
	if (missing(theta))
		stop("Missing theta")
	
	if (scale) 
		pfun <- function(coef,theta, ndead, scale) {
			list(penalty= sum(abs(coef) *scale)*theta,
					first  = theta*sign(coef)*scale,
					second = 0,
					flag=FALSE)
		}
	else
		pfun <- function(coef,theta, ndead, scale) {
			list(penalty= sum(abs(coef))*theta,
					first  = theta*sign(coef),
					second = 0,
					flag=FALSE)
		}
	
	
	temp <- list(pfun=pfun,
			diag=TRUE,
			cfun=function(parms, iter, history) {
				list(theta=parms$theta, done=TRUE) }, 
			cparm=list(theta= theta),
			pparm= vars,
			varname=paste('lasso(', xname, ')', sep=''))
	
	attributes(x) <- c(attributes(x), temp)
	x
}

#' A pseudo transparency to a color
#' @param col A color 
#' @param f The amount of pseudotransparency 
#' @return color
#' 
#' @author mg14
#' @export
colTrans <- function(col, f=2){
	hsv <- apply(col2rgb(col), 2, rgb2hsv)
	tmp <- car::logit(hsv[2,]*.95 + 0.025) - f
	hsv[2,] <- pmin(1,pmax(0,(1/(exp(-tmp) +1) - 0.025)/.95))
	tmp <- car::logit(hsv[3,]*.95 + 0.025) + f
	hsv[3,] <- pmin(1,pmax(0,(1/(exp(-tmp) +1) - 0.025)/.95))
	apply(hsv,2, function(x) hsv(x[1],x[2],x[3]))
}

jitterBox <- function(x,y, xlim = c(1,nlevels(x)) + c(-.5,.5),...){
	#boxplot(y ~ x, ..., notch=TRUE)
	plot(jitter(as.numeric(x)),y, xlim = xlim, ..., xaxt="n", pch=16)
	s <- split(y,x)
	m <- sapply(s, mean, na.rm=TRUE)
	n <- 1:length(m)
	l <- sapply(s, length)
	c <- qnorm(0.975,0,sapply(s, sd, na.rm=TRUE)/sqrt(l))
	rect(n-.2,m-c,n+.2,m+c,border=NA, col="#88888844")
	segments(n-.2,m,n+.2,m, lwd=2)
	axis(side = 1, at=n, labels=levels(x))
	if(length(unique(x))>1){
		a <- anova(lm(y~x))
		mtext(side=3, line=0, text= paste("P =", format(a$`Pr(>F)`[1],digits=2)))
	}
}


survdiff <- function (formula, data, subset, na.action, rho = 0, continuity = FALSE) 
{
	call <- match.call()
	m <- match.call(expand.dots = FALSE)
	m$rho <- NULL
	m$continuity <- NULL
	if (!inherits(formula, "formula")) 
		stop("The 'formula' argument is not a formula")
	Terms <- if (missing(data)) 
				terms(formula, "strata")
			else terms(formula, "strata", data = data)
	m$formula <- Terms
	m[[1]] <- as.name("model.frame")
	if (is.R()) 
		m <- eval(m, parent.frame())
	else m <- eval(m, sys.parent())
	y <- model.extract(m, "response")
	if (!inherits(y, "Surv")) 
		stop("Response must be a survival object")
	if (attr(y, "type") != "right") 
		stop("Right censored data only")
	ny <- ncol(y)
	n <- nrow(y)
	offset <- attr(Terms, "offset")
	if (!is.null(offset)) {
		offset <- as.numeric(m[[offset]])
		if (length(attr(Terms, "factors")) > 0) 
			stop("Cannot have both an offset and groups")
		if (any(offset < 0 | offset > 1)) 
			stop("The offset must be a survival probability")
		expected <- sum(-log(offset))
		observed <- sum(y[, ny])
		if (rho != 0) {
			num <- sum(1/rho - ((1/rho + y[, ny]) * offset^rho))
			var <- sum(1 - offset^(2 * rho))/(2 * rho)
		}
		else {
			var <- sum(-log(offset))
			num <- var - observed + if(continuity) 1/2 else 0
		}
		chi <- num * num/var
		rval <- list(n = n, obs = observed, exp = expected, var = var, 
				chisq = chi)
	}
	else {
		strats <- attr(Terms, "specials")$strata
		if (length(strats)) {
			temp <- untangle.specials(Terms, "strata", 1)
			dropx <- temp$terms
			if (length(temp$vars) == 1) 
				strata.keep <- m[[temp$vars]]
			else strata.keep <- strata(m[, temp$vars], shortlabel = TRUE)
		}
		else strata.keep <- rep(1, nrow(m))
		if (length(strats)) 
			ll <- attr(Terms[-dropx], "term.labels")
		else ll <- attr(Terms, "term.labels")
		if (length(ll) == 0) 
			stop("No groups to test")
		else groups <- strata(m[ll])
		fit <- survdiff.fit(y, groups, strata.keep, rho)
		if (is.matrix(fit$observed)) {
			otmp <- apply(fit$observed, 1, sum)
			etmp <- apply(fit$expected, 1, sum)
		}
		else {
			otmp <- fit$observed
			etmp <- fit$expected
		}
		df <- (etmp > 0)
		if (sum(df) < 2) 
			chi <- 0
		else {
			temp2 <- ((otmp - etmp - if(continuity) 1/2 else 0)[df])[-1]
			vv <- (fit$var[df, df])[-1, -1, drop = FALSE]
			chi <- sum(solve(vv, temp2) * temp2)
		}
		rval <- list(n = table(groups), obs = fit$observed, exp = fit$expected, 
				var = fit$var, chisq = chi)
		if (length(strats)) 
			rval$strata <- table(strata.keep)
	}
	na.action <- attr(m, "na.action")
	if (length(na.action)) 
		rval$na.action <- na.action
	rval$call <- call
	if (is.R()) 
		class(rval) <- "survdiff"
	else oldClass(rval) <- "survdiff"
	rval
}

#' Merge levels of a factor
#' @param f A factor
#' @param mergeList A list of type list(newlevel1 = c(oldlevel1, oldlevel2), newlevel2=c(...),...)
#' @return A factor
#' 
#' @author mg14
#' @export
mergeLevels <- function(f, mergeList){
	oldLevels <- setdiff(levels(f), unlist(mergeList))
	newLevels <- as.list(oldLevels)
	names(newLevels) <- oldLevels
	newLevels <- c(newLevels, mergeList)
	newFactor <- f
	levels(newFactor) <- newLevels
	return(newFactor)
}

#' Replace NAs with zeros
#' @param X 
#' @return same as X 
#' 
#' @author mg14
#' @export
na.zero <- function(X){
	w <- is.na(X)
	X[w] <- 0
	attr(X, "na.action") <- which(w)
	return(X)
}

#' Replace NAs with mean
#' @param X 
#' @return same as X 
#' 
#' @author mg14
#' @export
na.mean <- function(X){
	w <- is.na(X)
	X[w] <- mean(X, na.rm=TRUE)
	attr(X, "na.action") <- which(w)
	return(X)
}


glmnet.formula <- function (formula, data, subset, ...) 
{

	cl <- match.call()
	mf <- match.call(expand.dots = FALSE)
	m <- match(c("formula", "data", "subset"), names(mf), 0L)
	mf <- mf[c(1L, m)]
	mf$drop.unused.levels <- TRUE
	mf[[1L]] <- quote(stats::model.frame)
	mf <- eval(mf, parent.frame())
	mt <- attr(mf, "terms")
	y <- model.response(mf, "any")
	if (is.empty.model(mt)) {
		stop()
	}
	x <- model.matrix(mt, mf, contrasts)
	g <- glmnet(x[,-1],y,...)
	g$y <- y
	g$x <- x
	return(g)
}

plotSurv <- function(f, col=1:(length(terms(f))-1)^2 , ...){
	s <- survfit(f, ...)
	c <- coxph(f, ...)
	summary(c)
	p <- 2*pnorm(abs(diff(c$coefficients)),sd=sqrt(sum(diag(c$var))), lower.tail=FALSE)
	plot(s, col=col, mark=NA)
	legend("topright", bty="", rownames(summary(s)$table), col=col, lty=1)
	title(main=paste("P =",signif(p,2)), font=1)
}

splitSurv <- function(surv, timeTpl){
	timeSurv <- surv[,1]
	w <- which(timeTpl < timeSurv)
	t1 <- c(rep(0,nrow(surv)), timeTpl[w])
	t2 <- c(pmin(timeSurv, timeTpl, na.rm=TRUE), timeSurv[w])
	event <- c(surv[,2], surv[w,2])
	event[w] <- 0
	Surv(t1,t2,event)
}

splitIndex <- function(surv, timeTpl){
	timeSurv <- surv[,1]
	w <- which(timeTpl < timeSurv)
	c(1:nrow(surv), w)
}

makeTimeDependent <- function(dataFrame, timeTpl, timeSurv = dataFrame$time, time0Surv = rep(0, nrow(dataFrame)), event=dataFrame$event){
	w <- which(timeTpl < timeSurv & timeTpl > time0Surv)
	index <- c(1:nrow(dataFrame), w) 
	d <- dataFrame[index,]
	d$index <- index
	d$time1 <- c(time0Surv, timeTpl[w])
	d$time2 <- c(pmin(timeSurv, timeTpl, na.rm=TRUE), timeSurv[w])
	d$transplant <- c(rep(0,nrow(dataFrame)), rep(1, length(w)))
	e <- c(event, event[w])
	e[w] <- 0
	d$event <- e
	return(d)
}

pbound <- function(x, lower, upper){
	pmin(pmax(x,lower),upper)
}
			
humanSvg <- '<?xml version="1.0" encoding="UTF-8"?>
		<!-- Created by grConvert v0.1-0 -->
		<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="35pt" height="84pt" viewBox="0 0 35 84" version="1.1">
		<g id="surface1">
		<path style="fill-rule:nonzero;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 34.099163 47.801325 C 34.099163 50.101674 33.001535 51.199302 30.899693 51.199302 C 28.801744 51.199302 27.700223 50.101674 27.700223 47.801325 L 27.700223 25.000237 L 26.100488 25.000237 L 26.100488 79.901116 C 26.100488 82.201465 24.80046 83.299093 22.200405 83.299093 C 19.600349 83.299093 18.300321 82.201465 18.300321 79.901116 L 18.300321 47.400419 L 16.299679 47.400419 L 16.299679 79.901116 C 16.299679 82.201465 14.999651 83.299093 12.399595 83.299093 C 9.79954 83.299093 8.499512 82.201465 8.499512 79.901116 L 8.499512 25.000237 L 6.899777 25.000237 L 6.899777 47.801325 C 6.899777 50.101674 5.798256 51.199302 3.700307 51.199302 C 1.598465 51.199302 0.500837 50.000474 0.500837 47.801325 L 0.500837 21.699568 C 0.500837 17.499777 3.4006 15.300628 9.200126 15.300628 L 25.399874 15.300628 C 31.1994 15.300628 34.099163 17.398577 34.099163 21.699568 Z M 24.099847 7.298061 C 24.099847 9.201395 23.399233 10.898438 22.099205 12.400865 C 20.701869 13.8994 19.098242 14.701214 17.1988 14.701214 C 15.299358 14.701214 13.699623 13.8994 12.298396 12.400865 C 10.90106 10.898438 10.301646 9.201395 10.301646 7.298061 C 10.301646 5.398619 10.998368 3.798884 12.298396 2.498856 C 13.598424 1.198828 15.299358 0.498214 17.1988 0.498214 C 19.098242 0.498214 20.701869 1.198828 22.099205 2.498856 C 23.500432 3.798884 24.099847 5.398619 24.099847 7.298061 Z M 24.099847 7.298061 " transform="matrix(1.003584,0,0,1.003584,0.137993,0)"/>
		</g>
		</svg>'

human <- function(){
t <- tempfile(); writeLines(humanSvg, t); pic <- readPicture(t)
}

#' Function to convert factors to numeric
#' @param x A vector or data.frame
#' @return A data.frame with numeric vectors, or a factors (if x is non-numeric), or a single vector if x was a vector
#' 
#' @author mg14
#' @export
numericalize <- function(x){
	if(class(x)=="data.frame")
		as.data.frame(sapply(x, .numericalize, simplify=FALSE))
	else
		.numericalize(x)
	} # Convert factors to numeric 

.numericalize <- function(x) if(class(x)!='factor') return(x) else if(all(is.na(as.numeric(levels(x))))) return(x) else return(as.numeric(as.character(x)))

asum <- function(x, dim, ...){
	apply(x, setdiff(seq_along(dim(x)), dim), sum, ...)
}

nn.poisglm <- function(D, P, maxIter = 1e4, tol=1e-6) {
	n <- nrow(D)
	m <- ncol(D)
	s <- ncol(P)
	tP <- t(P)
	rP <- rep(colSums(P), m)
	D <- as.matrix(D)
	E1 <- E2 <- matrix(runif(s * m, 1e-3, 1), ncol = m)
	err <- 2*tol
	
	KD <- function(D, X) sum(D * log(D/X) - D + X)
	k1 <- Inf
	k2 <- 0
	
	iter <- 1
	while ( (iter < maxIter & err > tol & k1 - k2 > tol) | iter <= 10) {
		E1 <- E2
		k1 <- k2
		E2 <- E1 * (tP %*% (D/(P %*% (E1 + .Machine$double.eps))))/rP
		k2 <- KD(D, P%*%E2)
		iter <- iter + 1
		err <- mean(abs(E2 - E1)/(E1+.Machine$double.eps), na.rm=TRUE)
		if(iter %% 100 == 0) cat(round(-log10(err)))
	}
	cat("\n")
	if(iter == maxIter) warning(paste("No convergence after",iter, "iterations."))
	E2
}


mindist <- function(x, mindist=0.03){
	y <- x
	while( any((diff(y))<mindist) ) {
		ydiff = (diff(y))
		runs = rle(ydiff<mindist)
		aux = which(ydiff<mindist)[1]
		l = runs$lengths[which(runs$values)[1]]
		cl = seq(aux,aux+l)
		# New suggested values for the cluster chosen
		centr = median(y[cl])
		y[cl] = seq(0, mindist*(length(cl)-1)+0.001, length.out=length(cl))
		y[cl] = y[cl] - median(y[cl]) + centr
	} 
	return(y)
}

poisl <- function(X, theta, Y){
	Yp <- X %*% theta
	Y %*% log(Yp) - sum(Yp) - sum(lfactorial(Y))
}

poisdl <- function(X, theta, Y){
	lambda <- as.numeric(X %*% theta)
	t(Y / lambda - 1) %*% X
}

poisI <- function(X, theta, Y){
	lambda <- as.numeric(X %*% theta)
	t(Y/lambda^2 * X) %*% X
}

ssnmf <- function(D, S, s=0, maxIter=100, minE = 0, whichS = 1:ncol(D)){
	n <- nrow(D)
	o <- ncol(S)
	m <- ncol(D)
	P <- cbind(S, if(s>0) matrix(runif(n*s,0,1), ncol=s) else NULL)
	P <- P/rep(colSums(P), each=nrow(P))
	colnames(P) <- c(colnames(S), if(s >0) paste0("N.",1:s) else NULL)
	E <- matrix(runif((s+o)*m, 0,1), ncol=m)
	E <- E * (t(P)%*% (D / (P %*% E))) / rep(colSums(P), m)
	
	iter <- 1
	while(iter < maxIter){
		P <- P * ((D / (P %*% E)) %*% t(E)) / rep(rowSums(E), each=n)
		P[,1:o] <- S
		P <- P/rep(colSums(P), each=nrow(P))
		E <- E * (t(P)%*% (D / (P %*% E))) / rep(colSums(P), m)
		E[E/rep(colSums(E), each=nrow(E)) < minE] <- 0
		E[-(1:o),setdiff(1:ncol(E), whichS)] <- 0
		E <- E * rep(colSums(D)/colSums(E), each=nrow(E))
		iter <- iter + 1
	}
	list(E=E,P=P)
}

nmSolve <- function(D, P, maxIter = 500, tol=1e-3) {
	n <- nrow(D)
	m <- ncol(D)
	s <- ncol(P)
	tP <- t(P)
	rP <- rep(colSums(P), m)
	D <- as.matrix(D)
	E1 <- E2 <- matrix(runif(s * m, 1e-3, 1), ncol = m)
	err <- 2*tol
	
	iter <- 1
	while (iter < maxIter & err > tol) {
		E1 <- E2
		E2 <- E1 * (tP %*% (D/(P %*% (E1 + .Machine$double.eps))))/rP
		iter <- iter + 1
		err <- mean(abs(E2 - E1)/(E1+.Machine$double.eps), na.rm=TRUE)
		if(iter %% 100 == 0) cat(round(-log10(err)))
	}
	cat("\n")
	if(iter == maxIter) warning(paste("No convergence after",iter, "iterations."))
	E2
}

roundProp <- function(x, p=100){
	y <- floor(x)
	d <- x-y
	o <- order(d, decreasing=TRUE)
	i <- 1
	while(sum(y) < p){
		y[o[i]] <- y[o[i]] + 1
		i <- i+1
	}
	return(y)
}

scatterpie <- function(x,y, p, r, xlab="",ylab="", circles=FALSE, lwd.circle=rep(1, length(x)), lty.circle=rep(1, length(x)), add=FALSE ,...){
	if(!add) plot(x,y, xlab=xlab, ylab=ylab, pch=NA)
	for(i in seq_along(x)){
		.pie(p[i,], x0=x[i], y0=y[i], radius=r[i], add=TRUE, ...)
		if(circles){
			u <- par("usr")
			pr <- (u[2]-u[1])/(u[4]-u[3])
			fr <- par("pin")[1]/par("pin")[2]
			polygon(x[i] + cos(seq(0,2*pi, l=100)) * r[i], y[i]+sin(seq(0,2*pi, l=100)) * r[i] / pr * fr, col=NA, lty=lty.circle[i], lwd=lwd.circle[i])
		}
	}
}

.pie <- function (x, x0=0, y0=0, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE, 
		init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45, 
		col = NULL, border = NULL, lty = NULL, main = NULL, add=FALSE,...) 
{
	if (!is.numeric(x) || any(is.na(x) | x < 0)) 
		stop("'x' values must be positive.")
	if (is.null(labels)) 
		labels <- as.character(seq_along(x))
	else labels <- as.graphicsAnnot(labels)
	x <- c(0, cumsum(x)/sum(x))
	dx <- diff(x)
	nx <- length(dx)
	if(!add) plot.new()
	pin <- par("pin")
	xlim <- ylim <- c(-1, 1) + c(x0,y0)
	if (pin[1L] > pin[2L]) 
		xlim <- (pin[1L]/pin[2L]) * xlim
	else ylim <- (pin[2L]/pin[1L]) * ylim
	if(!add) dev.hold()
	on.exit(dev.flush())
	if(!add) plot.window(xlim, ylim, "", asp = 1)
	if (is.null(col)) 
		col <- if (is.null(density)) 
					c("white", "lightblue", "mistyrose", "lightcyan", 
							"lavender", "cornsilk")
				else par("fg")
	col <- rep(col, length.out = nx)
	border <- rep(border, length.out = nx)
	lty <- rep(lty, length.out = nx)
	angle <- rep(angle, length.out = nx)
	density <- rep(density, length.out = nx)
	twopi <- if (clockwise) 
				-2 * pi
			else 2 * pi
	u <- par("usr")
	pr <- (u[2]-u[1])/(u[4]-u[3])
	fr <- par("pin")[1]/par("pin")[2]
	t2xy <- function(t) {
		t2p <- twopi * t + init.angle * pi/180
		list(x = radius * cos(t2p), y = radius * sin(t2p)/pr*fr)
	}
	for (i in 1L:nx) {
		n <- max(2, floor(edges * dx[i]))
		P <- t2xy(seq.int(x[i], x[i + 1], length.out = n))
		P$x <- P$x + x0
		P$y <- P$y + y0
		polygon(c(P$x, x0), c(P$y, y0), density = density[i], angle = angle[i], 
				border = border[i], col = col[i], lty = lty[i])
		P <- t2xy(mean(x[i + 0:1]))
		lab <- as.character(labels[i])
		if (!is.na(lab) && nzchar(lab)) {
			lines(c(1, 1.05) * P$x + x0, c(1, 1.05) * P$y + y0)
			text(1.1 * P$x + x0, 1.1 * P$y + y0, labels[i], xpd = TRUE, 
					adj = ifelse(P$x < 0, 1, 0), ...)
		}
	}
	title(main = main, ...)
	invisible(NULL)
}
mg14/mg14 documentation built on Aug. 18, 2019, 3:30 a.m.