R/FunctionsInternal_Count_EM.R
In SparseFactorAnalysis: Scaling Count and Binary Data with Sparse Factor Analysis

################################
################################
##Three functions in here:
#### 1) test.gamma for finding the ML estimate for the grand lambda
#### 2) make.Z for converting a matrix to the Z-scale
#### 3) update.UDV for updating the ideal point estimates
#### *) Small additonal ones at the end 
################################
################################

################################
################################
##1) Finding gamma
#test.gamma.pois_EM<-function(gamma.try,Theta.last.0=Theta.last[row.type=="count",],votes.mat.0=votes.mat[row.type=="count",],emp.cdf.0,cutoff.seq=NULL){
test.gamma.pois_EM<-function(gamma.try,Theta.last.0,votes.mat.0,emp.cdf.0,cutoff.seq=NULL){	
	gamma.try<-exp(gamma.try)
	votes.mat<-votes.mat.0
	taus.try<-NULL
	count.seq<-cutoff.seq
	if(length(cutoff.seq)==0) count.seq<-seq(-1,max(votes.mat)+2,1)
	taus.try<-count.seq*0

	analytic.cdf<-count.seq
	a.int<-qnorm(mean(votes.mat==0))
	taus.try[count.seq>=0]<-(a.int+gamma.try[1]*count.seq[count.seq>=0]^(gamma.try[2]))
	taus.try[count.seq<0]<- -Inf
	taus.try<-sort(taus.try)
	taus.try[1]<--Inf	
	find.pnorm<-function(x){
		a<-x[1]
		b<-x[2]
		coefs<-c( -1.82517672, 0.51283415, -0.81377290, -0.02699400, -0.49642787, -0.33379312, -0.24176661, 0.03776971)
		x<-c(1, a, b, a^2,b^2, log(abs(a-b)),log(abs(a-b))^2,a*b)
		(sum(x*coefs))
	}
	lik<-((pnorm(taus.try[votes.mat+2 ]-Theta.last.0)-pnorm(taus.try[votes.mat+1 ]-Theta.last.0)) )
	log.lik<-log(lik)
	
	which.zero<-which(lik==0)
	a0<-taus.try[votes.mat[which.zero]+2]-Theta.last.0[which.zero]
	b0<-taus.try[votes.mat[which.zero]+1]-Theta.last.0[which.zero]
	log.lik[which.zero]<-apply(cbind(a0,b0),1,find.pnorm)
	log.lik[is.infinite(lik)&lik>0]<-max(log.lik[is.finite(lik)])
	log.lik[is.infinite(lik)&lik<0]<-min(log.lik[is.finite(lik)])

	thresh<-min(-1e30, min(log.lik[is.finite(log.lik)],na.rm=TRUE))
	log.lik[log.lik<thresh]<-thresh
	dev.out<--2*sum(log.lik,na.rm=TRUE)+sum(log(gamma.try)^2)
	
	
	return(list("deviance"=dev.out,"tau"=taus.try))

}	



################################
################################
##2) Converting a matrix to the Z scale
	make.Z_EM<-function(
			Theta.last.0=Theta.last, 
			votes.mat.0=votes.mat,row.type.0=row.type, 
			n0=n, k0=k, params=NULL, iter.curr=0,empir=NULL,cutoff.seq.0=NULL,missing.mat.0=NULL,lambda.lasso,proposal.sd,scale.sd, max.optim,step.size,maxdim.0,tau.ord.0
			){
		
		tau.ord<-tau.ord.0
		Theta.last<-Theta.last.0;votes.mat<-votes.mat.0;
		row.type<-row.type.0; n<-n0; k<-k0
		cutoff.seq<-cutoff.seq.0
		sigma<-1
		row.type<-row.type.0; votes.mat<-votes.mat.0
		Z.next<-matrix(NA,nrow=n,ncol=k)
		missing.mat<-missing.mat.0
		i.gibbs<-iter.curr
		maxdim<-maxdim.0
		#print(i.gibbs)
		#print(iter.curr)
	
	estep.bin<-function(means,a,b){
		means+(dnorm(a-means)-dnorm(b-means))/(pnorm(b-means)-pnorm(a-	means))
		}
		
		
	
			if(sum(row.type=="bin")>0){
		toplimit.mat<-bottomlimit.mat<-votes.mat[row.type=="bin",]*0
		toplimit.mat[votes.mat[row.type=="bin",]==1]<-Inf
		toplimit.mat[votes.mat[row.type=="bin",]==0]<-0
		toplimit.mat[votes.mat[row.type=="bin",]==0.5]<-Inf
		bottomlimit.mat[votes.mat[row.type=="bin",]==1]<-0
		bottomlimit.mat[votes.mat[row.type=="bin",]==0]<--Inf
		bottomlimit.mat[votes.mat[row.type=="bin",]==0.5]<- -Inf


		Z.next[row.type=="bin",]<-
estep.bin(means=Theta.last[row.type=="bin",],a=bottomlimit.mat,b=toplimit.mat)
		Z.next[row.type=="bin",][is.na(Z.next[row.type=="bin",])]<-0
		Z.next[row.type=="bin",][is.infinite(Z.next[row.type=="bin",])]<-0

		
		pars.max<-1
		accept.out<-prob.accept<-1
		}
		
		
			if(sum(row.type=="ord")>0){
				
			#Z.next[row.type=="ord",][votes.mat[row.type=="ord",]==-2]<-rtnorm(sum(votes.mat[row.type=="ord",]==-2), mean=sigma^.5*Theta.last[row.type=="ord",][votes.mat[row.type=="ord",]==-2], lower=tau.ord, sd=sigma^.5)
			#Z.next[row.type=="ord",][votes.mat[row.type=="ord",]==-1]<-rtnorm(sum(votes.mat[row.type=="ord",]==-1), mean=sigma^.5*Theta.last[row.type=="ord",][votes.mat[row.type=="ord",]==-1], lower=0,upper=tau.ord, sd=sigma^.5)
		#Z.next[row.type=="ord",][votes.mat[row.type=="ord",]==0]<-rtnorm(sum(votes.mat[row.type=="ord",]==0), mean=sigma^.5*Theta.last[row.type=="ord",][votes.mat[row.type=="ord",]==0], upper=0 , sd=sigma^.5)
		#Z.next[row.type=="ord",][missing.mat[row.type=="ord",]==1]<-rnorm(sum(missing.mat[row.type=="ord",]==1), mean=sigma^.5*Theta.last[row.type=="ord",][missing.mat[row.type=="ord",]==1], sd=sigma^.5)
		
		
		lower.mat<-upper.mat<-matrix(0,nrow=nrow(votes.mat[row.type=="ord",]), ncol=ncol(votes.mat))
		##Top category
		lower.mat[votes.mat[row.type=="ord",]==-2]<- tau.ord
		upper.mat[votes.mat[row.type=="ord",]==-2]<- Inf
		##Middle category
		lower.mat[votes.mat[row.type=="ord",]==-1]<- 0
		upper.mat[votes.mat[row.type=="ord",]==-1]<- tau.ord
		##Lower category
		lower.mat[votes.mat[row.type=="ord",]==0]<- -Inf
		upper.mat[votes.mat[row.type=="ord",]==0]<- 0
		#Missing data
		lower.mat[missing.mat[row.type=="ord",]==1]<- -Inf
		upper.mat[missing.mat[row.type=="ord",]==1]<- Inf

	
		Z.next.temp<-estep.bin(means=Theta.last[row.type=="ord",],a=lower.mat,b=upper.mat)
		which.change<-!is.finite(Z.next.temp)	
		Z.next.temp[which.change]<-Theta.last[row.type=="ord",][which.change]
		Z.next[row.type=="ord",]<-Z.next.temp



		tau.min<-max(Z.next[row.type=="ord",][votes.mat[row.type=="ord",]==-1&missing.mat[row.type=="ord"]==0])
		tau.max<-min(Z.next[row.type=="ord",][votes.mat[row.type=="ord",]==-2&missing.mat[row.type=="ord"]==0])
		tau.min<-max(tau.min,0.001)		
		tau.samp<-sort(c(tau.ord,runif(100,min(tau.min,tau.max),max(tau.min,tau.max))))
		#which.tau<-which(tau.samp==tau.ord)
		#tau.ord<-tau.samp[102-which.tau]
		#print(c(tau.min,tau.max))
		tau.ord<-runif(1,min(tau.min,tau.max),max(tau.min,tau.max))
		pars.max<-1
		accept.out<-prob.accept<-1

				}

		if(sum(row.type=="count")>0){
		#begin type = "pois"


	num.sample.count<-sum(row.type=="count")*k
	accept.out<-prob.accept<-NA

	dev.est<-function(x) test.gamma.pois_EM(x,votes.mat.0=votes.mat[row.type=="count",], Theta.last.0=Theta.last[row.type=="count",],cutoff.seq=cutoff.seq)$de
	dev.est1<-function(x) test.gamma.pois_EM(c(x,params[2]),votes.mat.0=votes.mat[row.type=="count",], Theta.last.0=Theta.last[row.type=="count",],cutoff.seq=cutoff.seq)$de
	dev.est2<-function(x) test.gamma.pois_EM(c(params[1],x),votes.mat.0=votes.mat[row.type=="count",], Theta.last.0=Theta.last[row.type=="count",],cutoff.seq=cutoff.seq)$de
	
	
	#range.opt<-c(params-1,params+1)
	range.opt<-rbind(params-1,params+1)
	if(iter.curr>10) range.opt<-rbind(params-.25,params+.25)


	if(iter.curr%%1==0|iter.curr<3){
	gamma.opt.1<-optimize(dev.est1,
			lower=range.opt[1,1],upper=range.opt[2,1],tol=0.01)
	params[1]<-gamma.opt.1$minimum
	gamma.opt.2<-optimize(dev.est2,
		lower=range.opt[1],upper=range.opt[2],tol=0.01)
	params[2]<-gamma.opt.2$minimum
	gamma.opt<-list(gamma.opt.1,gamma.opt.2)
	"M Step"
	#print(gamma.opt.2)

	}
	gamma.next<-pars.max<-params

		taus<-test.gamma.pois_EM(gamma.next,votes.mat.0=votes.mat[row.type=="count",], Theta.last.0=Theta.last[row.type=="count",],cutoff.seq=cutoff.seq)$tau

		taus<-sort(taus)
		taus[1]<--Inf
		#print("Range of gamma")
		#print(range.opt)
		#print(gamma.next)
		#print(range(taus))

#function(means,a,b){
#	means+(dnorm(a-means)-dnorm(b-means))/(pnorm(b-means)-pnorm(a-means))
	#}		
		
		lower.mat<-matrix(taus[votes.mat[row.type=="count",]+1 ],nrow=nrow(votes.mat[row.type=="count",]))
		upper.mat<-matrix(taus[votes.mat[row.type=="count",]+2 ],nrow=nrow(votes.mat[row.type=="count",]))

	

		Z.next.temp<-estep.bin(means=Theta.last[row.type=="count"],a=lower.mat,b=upper.mat)

		which.change<-is.na(Z.next.temp)|is.infinite(Z.next.temp)

		Z.next.temp[which.change]<-
		(Theta.last[row.type=="count",][which.change] > upper.mat[which.change])*
		upper.mat[which.change]+
		(Theta.last[row.type=="count",][which.change] < lower.mat[which.change])*lower.mat[which.change]
		
		which.change<-is.na(Z.next.temp)|is.infinite(Z.next.temp)
		Z.next.temp[which.change]<-Theta.last[which.change]
		
				
		Z.next[row.type=="count",]<-Z.next.temp
		}
		
	return(list("Z.next"=Z.next,"params"=(pars.max),"accept"=accept.out,"prob"=prob.accept,"proposal.sd"=proposal.sd,"step.size"=step.size,"tau.ord"=tau.ord))
	}##Closes out make.Z function


################################
################################
##2) Converting a matrix to the Z scale
	update_UDV_EM<-function(
	Z.next.0=Z.next,
	k0=k, n0=n, lambda.lasso.0=lambda.lasso,lambda.shrink.0=lambda.shrink,
	Dtau.0=Dtau,
	votes.mat.0=votes.mat, iter.curr=0,row.type.0,missing.mat.0=missing.mat,maxdim.0,
	V.last
	){

	missing.mat<-missing.mat.0
	Dtau<-Dtau.0;
	Z.next<-Z.next.0;
	votes.mat<-votes.mat.0;
	n<-n0; k<-k0; 
	lambda.lasso<-lambda.lasso.0
	row.type <- row.type.0
	maxdim<-maxdim.0

	#Declare some vectors
	sigma<-1
	ones.r<-rep(1,k0)
	ones.c<-rep(1,n0)

	#Update intercepts
	mu.r<-rowMeans(Z.next)#-ones.c%*%t(mu.c)-Theta.last.0+mu.grand)
	mu.r<-mu.r*n/(n+1)#+rnorm(length(mu.r),sd=1/k)

	mu.c<-colMeans(Z.next)#-mu.r%*%t(ones.r)-Theta.last.0+mu.grand)
	#mu.c<-mu.c*k/(k+1)#+rnorm(length(mu.c),sd=1/n)
	mu.grand<-mean(Z.next)
	mu.grand<-mu.grand*(n*k)/(n*k+1)#+rnorm(1,sd=1/(n*k))
	
	mean.mat<-ones.c%*%t(mu.c)+mu.r%*%t(ones.r)-mu.grand
	
	if(length(unique(row.type))>1){
		is.bin<-sum(row.type=="bin")>0
		is.count<-sum(row.type=="count")>0
		is.ord<-sum(row.type=="ord")>0
		
		#print("Two Means Being Used")
		mean.c.mat<-matrix(NA,nrow=n,ncol=k)
		if(is.bin) mu.c.bin<-colMeans(Z.next[row.type=="bin",])
		if(is.count) mu.c.count<-colMeans(Z.next[row.type=="count",])
		if(is.ord) mu.c.ord<-colMeans(Z.next[row.type=="ord",])

		if(is.bin) mu.c.bin<-(mu.c.bin*k)/(k+1)#+rnorm(length(mu.c.bin),sd=1/sum(row.type=="bin"))
		if(is.count) mu.c.count<-mu.c.count*k/(k+1)#+rnorm(length(mu.c.count),sd=1/sum(row.type=="count"))
		if(is.ord) mu.c.ord<-mu.c.ord*k/(k+1)#+rnorm(length(mu.c.count),sd=1/sum(row.type=="count"))

		if(is.bin) mean.c.mat[row.type=="bin",]<-ones.c[row.type=="bin"]%*%t(mu.c.bin)
		if(is.count) mean.c.mat[row.type=="count",]<-ones.c[row.type=="count"]%*%t(mu.c.count)
		if(is.ord) mean.c.mat[row.type=="ord",]<-ones.c[row.type=="ord"]%*%t(mu.c.ord)

	
		mean.grand.mat<-matrix(NA,nrow=n,ncol=k)
		if(is.bin) mean.grand.mat[row.type=="bin",]<-mean(Z.next[row.type=="bin",])* (sum(row.type=="bin")*k)/(sum(row.type=="bin")*k+1)#+rnorm(1,sd=1/(sum(row.type=="bin")*k))
		if(is.count) mean.grand.mat[row.type=="count",]<-mean(Z.next[row.type=="count",])* (sum(row.type=="count")*k)/(sum(row.type=="count")*k+1)#+rnorm(1,sd=1/(sum(row.type=="bin")*k))#+rnorm(1,sd=1/(sum(row.type=="count")*k))
		if(is.ord) mean.grand.mat[row.type=="ord",]<-mean(Z.next[row.type=="ord",])* (sum(row.type=="count")*k)/(sum(row.type=="ord")*k+1)#+rnorm(1,sd=1/(sum(row.type=="bin")*k))#+rnorm(1,sd=1/(sum(row.type=="count")*k))


	mean.mat<-mean.c.mat+mu.r%*%t(ones.r)-mean.grand.mat
	
	}

	Z.starstar<-svd.mat<- Z.next- mean.mat




	

	#Take svd, give each column an sd of 1 (rather than norm of 1)
	num.zeroes<-1#colMeans(votes.mat!=0)
	svd.mat.0<-svd.mat

	
	#save(svd.mat,file="svd.mat")
	
	svd.mat[is.na(svd.mat)]<-0
	
	wts.dum<-rep(1,nrow(svd.mat))
	wts.dum[row.type=="bin"]<-1/sum(1-missing.mat[row.type=="bin",])^.5
	wts.dum[row.type=="count"]<-1/sum(1-missing.mat[row.type=="count",])^.5
	wts.dum[row.type=="ord"]<-1/sum(1-missing.mat[row.type=="ord",])^.5


	wts.dum<-wts.dum/mean(wts.dum)

	#svd.dum<-irlba(svd.mat*wts.dum,nu=maxdim,nv=maxdim,V=V.last)
	svd.dum<-svd(svd.mat*wts.dum,nu=maxdim,nv=maxdim)
	svd.dum$u[is.na(svd.dum$u)|is.infinite(svd.dum$u)]<-0
	svd.dum$d[is.na(svd.dum$d)|is.infinite(svd.dum$d)]<-0
	svd.dum$v[is.na(svd.dum$v)|is.infinite(svd.dum$v)]<-0


	svd.dum$d<-(t(svd.dum$u)%*%svd.mat%*%svd.dum$v)
	which.rows<-which(rowMeans(svd.dum$d^2)^.5<1e-4)
	which.cols<-which(colMeans(svd.dum$d^2)^.5<1e-4)
	svd.dum$d[which.rows,]<-rnorm(length(svd.dum$d[which.rows,]),sd=.001)
	svd.dum$d[which.cols,]<-rnorm(length(svd.dum$d[which.cols,]),sd=.001)
	svd2<-svd(svd.dum$d,nu=maxdim,nv=maxdim)
	#print(svd.dum$d)
	#print(maxdim)
	#print(maxdim-2)
	#svd2<-irlba(svd.dum$d,nu=maxdim-5,nv=maxdim-5)
	svd2$u[is.na(svd2$u)|is.infinite(svd2$u)]<-0
	svd2$d[is.na(svd2$d)|is.infinite(svd2$d)]<-0
	svd2$v[is.na(svd2$v)|is.infinite(svd2$v)]<-0
	
	svd.dum$u<-svd.dum$u%*%(svd2$u)
	svd.dum$v<-svd.dum$v%*%(svd2$v)

	svd.dum$d<-svd2$d
	
	svd0<-svd.dum
	
	svd0$v<-t(t(svd0$u)%*%svd.mat.0)
	svd0$v<-apply(svd0$v,2,FUN=function(x) x/sum(x^2)^.5)
	svd0$d<-diag(t(svd0$u)%*%svd.mat.0%*%svd0$v)
	sort.ord<-sort(svd0$d,ind=T,decreasing=T)$ix
	svd0$u<-svd0$u[,sort.ord]
	svd0$v<-svd0$v[,sort.ord]
	svd0$d<-svd0$d[sort.ord]
	svd0$u<-svd0$u*(n-1)^.5
	svd0$v<-svd0$v*(k-1)^.5
	svd0$d<-svd0$d*((n-1)*(k-1))^-.5
	Theta.last.0<-svd.mat
	Theta.last<-Theta.last.0+(ones.c%*%t(mu.c)+mu.r%*%t(ones.r)-mu.grand)

	#Update d; follows from Blasso and DvD
	Y.tilde<-as.vector(svd.mat)
	if(n>length(Dtau)) Dtau[(length(Dtau)+1):n]<-1
	A<- (n*k)*diag(n)+diag(as.vector(Dtau^(-1)))
	gA<-A*0+NA
	gA[1:maxdim,1:maxdim]<-ginv(A[1:maxdim,1:maxdim])
	gA[is.na(gA)]<-0
	XprimeY<-sapply(1:maxdim, FUN=function(i, svd2=svd0, Z.use=svd.mat) sum(Z.use*(svd2$u[,i]%*%t(svd2$v[,i]))))
	if(length(XprimeY)<dim(gA)[1]) XprimeY[(length(XprimeY)+1) :dim(gA)[1]]<-0
	D.post.mean<- as.vector(gA%*%XprimeY)
	D.post.var.2<-gA

##Sample D and reconstruct theta, putting intercepts back in
	D.post<-rep(NA,length(D.post.mean))
	D.post[1:maxdim]<-D.post.mean[1:maxdim] + diag(D.post.var.2[1:maxdim,1:maxdim]) ^.5 #as.vector(mvrnorm(1, mu=D.post.mean[1:maxdim], D.post.var.2[1:maxdim,1:maxdim] ) )/sigma^.5
	D.post[is.na(D.post)]<-0
	abs.D.post<-abs(D.post)
	#abs.D.post.loop<-abs(mvrnorm(50, mu=D.post.mean[1:maxdim], D.post.var.2[1:maxdim,1:maxdim] ) )
	#print(abs.D.post.loop)

##Calculate MAP and mean estimate
	D.trunc<-pmax(svd0$d-lambda.lasso.0,0)
	
	U.last<-svd0$u
	V.last<-svd0$v


	U.mean.next<-0*U.last
	V.mean.next<-0*V.last
##Construct U and V
#prior var of 2

	#D.adj<-abs(D.post[1:maxdim])/svd0$d
	#U.last<-t(t(U.last)*(D.post[1:maxdim]^2/(D.post[1:maxdim]^2+1/(4*k))))
	#V.last<-t(t(V.last)*D.post[1:maxdim]^2/(D.post[1:maxdim]^2+1/(4*n)))
	U.last[!is.finite(U.last)]<-0
	V.last[!is.finite(V.last)]<-0

	U.next<-U.last
	V.next<-V.last

	Theta.last.0<-U.next%*%diag(D.post[1:maxdim])%*%t(V.next)
	Theta.last<-Theta.last.0+mean.mat
	Theta.mode<- U.next%*%diag(D.trunc[1:maxdim])%*%t(V.next)+mean.mat
	##Update muprime, invTau2, lambda.lasso
	muprime<-(abs(lambda.lasso*sqrt(sigma)))/abs.D.post#*colMeans(1/abs.D.post.loop)
	invTau2<-muprime#sapply(1:maxdim, FUN=function(i) rinv.gaussian(1, muprime[i], (lambda.lasso^2 ) ) )
	#invTau2<-matrix(NA,nrow=500,ncol=maxdim)
	invTau2<-sapply(1:maxdim, FUN=function(i) rinv.gaussian(500, muprime[i], (lambda.lasso^2 ) ) )
	Dtau<-colMeans(1/abs(invTau2))
	#lambda.lasso<-((maxdim)/(sum(Dtau[1:maxdim])/2))^.5
	#lambda.lasso<-(2/mean(Dtau))^.5
	#ran.gamma<-rgamma(1000, shape=maxdim+1  , rate=sum(Dtau[1:maxdim])/2+1.78  )
	#d1<-density(ran.gamma^.5,cut=0)
	#lambda.shrink<-lambda.lasso<-d1$x[d1$y==max(d1$y)]
	lambda.shrink<-lambda.lasso<-((maxdim)/(sum(Dtau[1:maxdim])/2+1.78))^.5
	#lambda.shrink<-lambda.lasso<-rgamma(1, shape=maxdim+1  , rate=sum(Dtau[1:maxdim])/2+1.78  )^.5
	
	return(list(
	"Theta.last"=Theta.last,
	"U.next"=U.next,
	"V.next"=V.next,
	"lambda.lasso"=lambda.lasso,
	"lambda.shrink"=lambda.shrink,
	"D.trunc"=D.trunc,
	"D.post"=D.post,
	"Theta.mode"=Theta.mode,
	"svd0"=svd0, 
	"Dtau"=Dtau,
	"D.ols"=svd0$d
	))
}



	expit<-function(x) exp(x)/(1+exp(x))
Any scripts or data that you put into this service are public.
SparseFactorAnalysis documentation built on May 2, 2019, 6 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
SparseFactorAnalysis
Scaling Count and Binary Data with Sparse Factor Analysis

R/FunctionsInternal_Count_EM.R
In SparseFactorAnalysis: Scaling Count and Binary Data with Sparse Factor Analysis

Try the SparseFactorAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SparseFactorAnalysis Scaling Count and Binary Data with Sparse Factor Analysis

R/FunctionsInternal_Count_EM.R In SparseFactorAnalysis: Scaling Count and Binary Data with Sparse Factor Analysis

Try the SparseFactorAnalysis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SparseFactorAnalysis
Scaling Count and Binary Data with Sparse Factor Analysis

R/FunctionsInternal_Count_EM.R
In SparseFactorAnalysis: Scaling Count and Binary Data with Sparse Factor Analysis