########################################################################################################
########################################################################################################
########################### ALL TESTED BIPARTITE CODE FOR DIFFERENT CASES ##############################
########################################################################################################
########################################################################################################
## All models below consider static undirected density networks for Bipartite with proportional odds model for discrete weighted rating networks
library(Rcpp)
file_path<-"/Users/Amal/Box Sync/PSU/Fall 2018/Main_Research/Network Models/Project 5 (Bipartite)/code/bipartite_All/"
########################################################################################################
########################################################################################################
## Model 7 features (works):
## 1) Block structure over intercept
## 2) Proportional odds model for ratings based on my own derivations
wrapper_bipartite_model7<-function(net_adjacency,net_rating,nclust1,nclust2,thres=10^(-6),n_iter_min=50,n_iter_max=500,theta_init,delta_0_init,sim_indicator,theta_true=NA, delta_0_true=NA,K_true_U=NA,K_true_P=NA,cluster_ids_true_U=NA,cluster_ids_true_P=NA,R=NA){
## This is the combined final updated and working code for EM undirected case for all K
########################################################################################################
########################################################################################################
## Loading the required packages
require(lda)
library(quadprog)
library(combinat)
library(MASS)
library(Rcpp)
library(Matrix)
library(gtools)
#sourceCpp(file = paste0(file_path,"model_7.cpp"))
library(biSBM)
#################################################
## Defining a function to update variational parameters gamma using quadratic program solver. Input:
## gamma.curr is a N*K matrix for current gamma estimates, pi.curr is a K*1 vector for current pi
## estimates, theta.curr is a T_grid*K array for current theta estimates. Given Network is assumed to be an N*N*T_data array
solve_QP_wrapper<-function(quad_lin_coeff,constraint_matrix,constraint_vector,node_ID,gamma.curr){
try_QP<-try(gamma_next_vec<-solve.QP((-2*diag(as.vector(quad_lin_coeff[node_ID,,1]))),as.vector(quad_lin_coeff[node_ID,,2]),t(constraint_matrix),constraint_vector)$solution,silent = TRUE)
if('numeric' %in% class(try_QP)){
gamma_next_vec<-try_QP
}
else{
gamma_next_vec<-gamma.curr[node_ID,]
print("red flag")
print(paste0("Node ID for red flag is ", node_ID))
}
return(gamma_next_vec)
}
gamma_U.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_U.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, N1,N2,K1,K2,R){
gamma_U.next<-matrix(NA_real_,N1,K1)
constraint_matrix<-matrix(NA_real_,2+K1,K1)
constraint_matrix[1,]<-rep(1,K1)
constraint_matrix[2,]<-rep(-1,K1)
constraint_matrix[3:(K1+2),]<-diag(K1)
constraint_vector<-c(1,-1,rep(0,K1))
quad_lin_coeff<-gamma_U_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_U=pi_U.curr, theta=theta.curr,logitInv_delta_0=logitInv_delta_0.curr,net_adjacency=net_adjacency, net_rating = net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print(quad_lin_coeff)
for (i in 1:N1){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
#print(i)
}
#print('gammaU')
#print(quad_lin_coeff)
# for (i in 1:N1){
# gamma_U.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_U.next<-t(apply(X = gamma_U.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_U.next)
}
gamma_P.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_P.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, N1,N2,K1,K2,R){
gamma_P.next<-matrix(NA_real_,N2,K2)
constraint_matrix<-matrix(NA_real_,2+K2,K2)
constraint_matrix[1,]<-rep(1,K2)
constraint_matrix[2,]<-rep(-1,K2)
constraint_matrix[3:(K2+2),]<-diag(K2)
constraint_vector<-c(1,-1,rep(0,K2))
quad_lin_coeff<-gamma_P_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_P=pi_P.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating = net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('gammaP')
#print(quad_lin_coeff)
#print(quad_lin_coeff)
for (i in 1:N2){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
#print(i)
}
# for (i in 1:N2){
# gamma_P.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_P.next<-t(apply(X = gamma_P.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_P.next)
}
#################################################
## Defining a function to update K*1 vector of pi
pi.update<-function(gamma.curr){
pi.next<-as.vector(apply(X = gamma.curr,MARGIN = 2,FUN = mean))
## normalization of pi
pi.next<-pi.next/sum(pi.next)
return(pi.next)
}
#################################################
## Defining the update function of full matrix theta with dimensions K1,K2
theta.update<-function(theta.curr,gamma_U,gamma_P,net_adjacency,N1,N2,K1,K2){
theta.next<-matrix(NA_real_,K1,K2)
gradient<-grad_bipartite_stat_undir_theta(theta=theta.curr, gamma_U=gamma_U, gamma_P=gamma_P, net_adjacency=net_adjacency, N1=N1, N2=N2, K1=K1, K2=K2)
hess<-hess_bipartite_stat_undir_theta(theta=theta.curr, gamma_U=gamma_U, gamma_P=gamma_P, N1=N1, N2=N2, K1=K1, K2=K2)
for(k in 1:K1){
for(l in 1:K2){
theta.next[k,l]<-theta.curr[k,l]-gradient[k,l]/hess[k,l]
}
}
return(theta.next)
}
#################################################
## Defining the update function of full proportional odds nuisance parameters delta_0
delta_0.update<-function(gamma_U,gamma_P,delta_0,logitInv_delta_0,logitInv_phi,net_adjacency,net_rating_statistic,N1,N2,K1,K2,R){
delta_0.next<-array(NA_real_,dim=c(K1,K2,(R-1)))
## Getting the estimated cluster memberships of all nodes in two sets from variational E step
#clust_est_U<-as.vector(apply(X = gamma_U,MARGIN = 1,FUN = which.max))
#clust_est_P<-as.vector(apply(X = gamma_P,MARGIN = 1,FUN = which.max))
#ratings_list<-tie_clust_partition(clust_est_U=clust_est_U, clust_est_P=clust_est_P, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2)
gradient_delta_0<-grad_bipartite_stat_undir_delta_0(gamma_U=gamma_U, gamma_P=gamma_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
for (k in 1:K1){
for (l in 1:K2){
hess_mat<-hess_bipartite_stat_undir_delta_0(k=k-1, l=l-1, gamma_U=gamma_U, gamma_P=gamma_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, R=R)
delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat+(10^(-6)*diag(R-1)))%*%gradient_delta_0[k,l,])))
#delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat)%*%gradient_delta_0[k,l,])))
}
}
#print(delta_0.next)
return(delta_0.next)
}
#################################################
## Defining a function that takes inputs as: 1) initial values list of parameters, 2) network, 3) Number of clusters K and 4) number of iterations and outputs the whole list of all parameter iterates
iterator<-function(start,net_adjacency,net_rating,K1,K2,n_iter,thres,n_iter_min,n_iter_max,R){
## Assuming a network adjacency matrix of dimensions N1*N2 with rows corresponding to users and columns corresponding to products and each entry is an indicator whether the user rated the product. This framework can be generalized.
N1<-dim(net_adjacency)[1]
N2<-dim(net_adjacency)[2]
## initializing the arrays for parameters
## Initializing the cluster memberships gamma
gamma_U<-array(NA_real_,dim=c(N1,K1,n_iter)) ## assuming there are K1 clusters in user set
gamma_P<-array(NA_real_,dim=c(N2,K2,n_iter)) ## assuming there are K2 clusters in product set
## Initializing the mixture proportions pi
pi_U<-matrix(NA_real_,K1,n_iter)
pi_P<-matrix(NA_real_,K2,n_iter)
## Initializing the network parameters theta
theta<-array(NA_real_,dim=c(K1,K2,n_iter))
delta_0<-array(NA_real_,dim=c(K1,K2,R-1,n_iter))
gamma_U[,,1]<-start[[1]]
gamma_P[,,1]<-start[[2]]
pi_U[,1]<-start[[3]]
pi_P[,1]<-start[[4]]
theta[,,1]<-start[[5]]
delta_0[,,,1]<-start[[6]]
#print(theta[,,1])
#print(omega[,1])
#print(mu[,,,1])
## iterations
## Defining the iteration index
iter_index<-2
## Initializing the error
error<-Inf
## Initializing the current ELBO values over the whole grid
ELBO_curr<-10^10
time_per_iter<-c()
while(((error>thres)&(iter_index<n_iter_max))|(iter_index<n_iter_min)){
#while((error>thres|iter_index<200)&(iter_index<250)&(iter_index==2|error<10)){
#while(iter_index<21){
## Starting the stopwatch to calculate the per iteration time
ptm<-proc.time()
## Initializing the G arrays
#Gvals_cube_0<-array(NA_real_,dim=c(K1,K2,R))
#Gvals_cube_1<-array(NA_real_,dim=c(K1,K2,R))
#Gvals_cube_2<-array(NA_real_,dim=c(K1,K2,R))
## Updating the G arrays
#print(mu[,,,iter_index-1])
#if (R==1){mu[,,,iter_index-1]=array(mu[,,,iter_index-1],dim=c(2,2,1))}
#Gvals_cube_0<-get_Gval_0(omega=omega[,,iter_index-1], mu=mu[,,,iter_index-1],K1=K1,K2=K2,R=R)
#Gvals_cube_1<-get_Gval_1(omega=omega[,,iter_index-1], mu=mu[,,,iter_index-1],K1=K1,K2=K2,R=R)
#Gvals_cube_2<-get_Gval_2(omega=omega[,,iter_index-1], mu=mu[,,,iter_index-1],K1=K1,K2=K2,R=R)
#print('Gvals_cube_0')
#print(Gvals_cube_0)
#Gvals_mat_0=apply(X = Gvals_cube_0,MARGIN = c(1,2),sum)
#Gvals_mat_1=apply(X = Gvals_cube_1,MARGIN = c(1,2),sum)
#Gvals_mat_2=apply(X = Gvals_cube_2,MARGIN = c(1,2),sum)
#log_Gval_0 <- matrix(NA_real_,K1,K2)
#log_Gval_0 <- get_log_Gval_0(omega=omega[,,iter_index-1], mu=mu[,,,iter_index-1],K1=K1,K2=K2,R=R)
## Calculating the logit inverse of delta_0
logitInv_delta_0.curr<-logitInv_delta_0_cal(delta_0=delta_0[,,,iter_index-1], K1=K1, K2=K2, R=R)
## Transforming logit inverse of delta_0 to phi
#phi.curr<-phi_cal(logitInv_delta_0=logitInv_delta_0.curr, K1=K1, K2=K2, R=R)
## Calculating the logit inverse of phi
#logitInv_phi.curr<-logitInv_phi_cal(phi=phi.curr, K1=K1, K2=K2, R=R)
## Updating gamma_U, the mixed memberships for users
gamma_U[,,iter_index]<-gamma_U.update.wrapper(gamma_U.curr=gamma_U[,,iter_index-1], gamma_P.curr=gamma_P[,,iter_index-1], pi_U.curr=pi_U[,iter_index-1], theta.curr=theta[,,iter_index-1], logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
#print('gamma_U[,,iter_index]')
# print(gamma_U[,,iter_index])
## Updating the mixture proportions pi_U
pi_U[,iter_index]<-pi.update(gamma.curr=gamma_U[,,iter_index])
#print('pi_U[,,iter_index]')
#print(pi_U[,,iter_index])
## Updating gamma_P, the mixed memberships for products
gamma_P[,,iter_index]<-gamma_P.update.wrapper(gamma_U.curr=gamma_U[,,iter_index],gamma_P.curr=gamma_P[,,iter_index-1],pi_P.curr=pi_P[,iter_index-1], theta.curr=theta[,,iter_index-1], logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
# print('gamma_P[,,iter_index]')
#print(gamma_P[,,iter_index])
## Updating the mixture proportions pi_P
pi_P[,iter_index]<-pi.update(gamma.curr=gamma_P[,,iter_index])
#print('pi_P[,,iter_index]')
#print(pi_P[,,iter_index])
## Updating the network parameters theta
theta[,,iter_index]<-theta.update(theta.curr=theta[,,iter_index-1],gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], net_adjacency=net_adjacency, N1=N1, N2=N2, K1=K1, K2=K2)
#print('theta')
#print(theta[,,iter_index])
## Updating the omega matrix
delta_0[,,,iter_index]<-delta_0.update(gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], delta_0=delta_0[,,,iter_index-1],logitInv_delta_0 = logitInv_delta_0.curr,logitInv_phi = logitInv_phi.curr, net_adjacency=net_adjacency, net_rating_statistic = net_rating_statistic, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('delta_0')
#print(delta_0[,,,iter_index])
#Sys.sleep(1.5)
ELBO_prev<-ELBO_curr
ELBO_curr<-ELBO_conv_bipartite_stat_undir(gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], pi_U=pi_U[,iter_index], pi_P=pi_P[,iter_index], theta = theta[,,iter_index], logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2)
error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
#print('error')
#print(error)
#print("probs")
#print(Gvals_cube_0)
# print(apply(X = Gvals_cube_0,MARGIN = c(1,2),FUN = function(x){
#
# return(x/sum(x))
# }))
if((iter_index%%1)==0){
#print(theta[,,iter_index])
print('iter_index')
print(iter_index)
print('error')
print(error)
print(proc.time()-ptm)
}
ptm_diff<-proc.time()-ptm
time_per_iter<-c(time_per_iter,ptm_diff[3])
#print(proc.time()-ptm)
iter_index<-iter_index+1
}
names(time_per_iter)<-NULL
total_iterations<-iter_index-1
return(list(gamma_U,gamma_P,pi_U,pi_P,theta,delta_0,time_per_iter,total_iterations))
}
########################################################################################################
########################################################################################################
## Defining Model Selection functions based on converged paramters
#################################################
## Defining a function to calculate the estimate of complete log likelihood
comp_loglik_cal<-function(cluster_ids_est_U = NA, cluster_ids_est_P = NA, pi_U = NA, pi_P = NA, theta, Gvals_cube_0, Gvals_mat_0, net_adjacency, net_rating, N1, N2, K1, K2, R=R){
if((K1!=1)&(K2!=1)){
## 1st term
t1<-0
for(i in 1:N1){
for(j in 1:N2){
cluster_id_i<-cluster_ids_est_U[i]
cluster_id_j<-cluster_ids_est_P[j]
exp_val<-exp(theta[cluster_id_i,cluster_id_j])
if(net_adjacency[i,j]==0){
t1<-t1-(log(1+exp_val))
}else{
t1<-t1+(theta[cluster_id_i,cluster_id_j]-log(1+exp_val))+log(Gvals_cube_0[cluster_id_i,cluster_id_j,net_rating[i,j]])-log(Gvals_mat_0[cluster_id_i,cluster_id_j])
}
}
}
## 2nd term
t2<-0
for (i in 1:N1){
t2<-t2+log(pi_U[cluster_ids_est_U[i]])
}
t3<-0
for (j in 1:N2){
t3<-t3+log(pi_P[cluster_ids_est_P[j]])
}
comp_val<-t1+t2+t3
}
return(comp_val)
}
#################################################
## Defining a function to calculate the integrated classification likelihood
ICL_cal<-function(cluster_ids_est_U, cluster_ids_est_P, pi_U, pi_P, theta, Gvals_cube_0, Gvals_mat_0, net_adjacency, net_rating, N1, N2, K1, K2, R){
t1<-comp_loglik_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U = pi_U, pi_P = pi_P, theta = theta, Gvals_cube_0 = Gvals_cube_0, Gvals_mat_0 = Gvals_mat_0, net_adjacency = net_adjacency, net_rating = net_rating, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R=R)
## Penalization over pi
t2<-(K1-1)*log(N1)+(K2-1)*log(N2)
## Penalization over theta
t3<-(K1*K2)*log(N1*N2)
## Calculating the number of edges
N0_val<-N0_cal(net_adjacency=net_adjacency, N1=N1, N2=N2)
## Penalization over omega, mu
t4<-(K1*K2*(R-1))*log(N0_val)
ICL_val<-t1-t2-t3-t4
return(ICL_val)
}
########################################################################################################
## Defining the Rand Index and RASE functions for evaluating the performance of clustering and estimation respectively
## Rand Index function
RI_cal<-function(cluster_ids_est, cluster_ids_true){
n=length(cluster_ids_est)
RI_val=0
for(i in 1:(n-1)){
for(j in (i+1):n){
RI_val=RI_val+as.numeric((cluster_ids_est[i]==cluster_ids_est[j])==(cluster_ids_true[i]==cluster_ids_true[j]))
}
}
RI_mean=RI_val/(n*(n-1)/2)
return(RI_mean)
}
#################################################
## RASE functions
RASE_cal<-function(param_est, param_true){
if(is.vector(param_est)!=1){
RASE_val<-sqrt(sum((param_est-param_true)^2)/prod(dim(param_est)))
}else{RASE_val<-sqrt(sum((param_est-param_true)^2)/length(param_est))}
return(RASE_val)
}
########################################################################################################
## Defining the functions to evaluate clustering performance using normalized mutual information.
probab_clust_cal<-function(cluster_ids,N,K){
probab_vec<-rep(NA_real_,K)
for(k in 1:K){
probab_vec[k]<-sum(cluster_ids==k)/N
}
return(probab_vec)
}
NMI_cal<-function(cluster_ids_true_U,cluster_ids_true_P,K_true_U,K_true_P,cluster_ids_est_U,cluster_ids_est_P,K1,K2,N1,N2){
probab_vec_true_U<-probab_clust_cal(cluster_ids = cluster_ids_true_U,N = N1+N2,K = K_true_U)
probab_vec_true_P<-probab_clust_cal(cluster_ids = cluster_ids_true_P,N = N1+N2,K = K_true_P)
probab_vec_true<-c(probab_vec_true_U,probab_vec_true_P)
total_entropy_true<-0
for(k in 1:length(probab_vec_true)){
if(probab_vec_true[k]!=0){
total_entropy_true<-total_entropy_true-(probab_vec_true[k]*log(probab_vec_true[k],base = 2))
}
}
probab_vec_est_U<-probab_clust_cal(cluster_ids = cluster_ids_est_U,N = N1+N2,K = K1)
probab_vec_est_P<-probab_clust_cal(cluster_ids = cluster_ids_est_P,N = N1+N2,K = K2)
probab_vec_est<-c(probab_vec_est_U,probab_vec_est_P)
total_entropy_est<-0
for(k in 1:length(probab_vec_est)){
if(probab_vec_est[k]!=0){
total_entropy_est<-total_entropy_est-(probab_vec_est[k]*log(probab_vec_est[k],base = 2))
}
}
clust_ids_true_total<-c(cluster_ids_true_U,(cluster_ids_true_P+K_true_U))
clust_ids_est_total<-c(cluster_ids_est_U,(cluster_ids_est_P+K1))
conditional_entropy<-rep(NA_real_,(K1+K2))
for(k in 1:(K1+K2)){
probab_conditional_vec<-rep(0,(K_true_U+K_true_P))
for(m in 1:(K_true_U+K_true_P)){
if(sum(clust_ids_est_total==k)!=0){
probab_conditional_vec[m]<-sum((clust_ids_true_total==m)&(clust_ids_est_total==k))/sum(clust_ids_est_total==k)
}
}
plogp_conditional_vec<-rep(0,length(probab_conditional_vec))
for(m in 1:length(probab_conditional_vec)){
if(probab_conditional_vec[m]!=0){
plogp_conditional_vec[m]<-probab_conditional_vec[m]*log(x = probab_conditional_vec[m],base = 2)
}
}
conditional_entropy[k]<--probab_vec_est[k]*sum(plogp_conditional_vec)
}
mutual_info<-total_entropy_true-sum(conditional_entropy)
NMI_val<-(2*mutual_info)/(total_entropy_true+total_entropy_est)
return(NMI_val)
}
########################################################################################################
## Calculating the recovery rate
recovery_rate_cal <- function(cluster_ids_est, cluster_ids_true){
all_perm<-permutations(n=4,r=2,repeats.allowed = T)
clust_perm_mat<-matrix(NA_integer_,4,4)
for(i in 1:nrow(all_perm)){
clust_perm_mat[all_perm[i,1],all_perm[i,2]] <- sum((cluster_ids_true==all_perm[i,1])&(cluster_ids_est==all_perm[i,2]))
}
recovery_rate_val<-sum(apply(X = clust_perm_mat, MARGIN = 1, FUN = max))/length(cluster_ids_est)
return(recovery_rate_val)
}
#########################################################################################################
## Defining the parameters
N1<-dim(net_adjacency)[1] ## Number of nodes from the network
N2<-dim(net_adjacency)[2]
## Total time points in the network for which we have data in the form of adjacency matrix
K1<-nclust1 ## Defining the number of clusters
K2<-nclust2
K <-K1*K2
#################################################
## Defining the starting values of the iterator and running the main algorithm
start<-list()
start[[1]]<-matrix(rep(1/K1,N1*K1),N1,K1)
start[[2]]<-matrix(rep(1/K2,N2*K2),N2,K2)
start[[3]]<-rep(1/K1,K1)
start[[4]]<-rep(1/K2,K2)
start[[5]]<-theta_init
start[[6]]<-delta_0_init
if(K1!=1&K2!=1){ param<-iterator(start=start, net_adjacency = net_adjacency, net_rating=net_rating, K1=K1, K2=K2, n_iter=1000, thres=thres, n_iter_min=n_iter_min, n_iter_max=n_iter_max, R=R)}
#################################################
## extracting the coverged parameter values and calculating ICL
n_iter=1
indicator_last<-0
while(indicator_last==0){
temp<-is.na(param[[1]][1,1,n_iter])
if(temp==TRUE){
n_last<-n_iter-1
indicator_last<-1
}
n_iter<-n_iter+1
}
param_converge<-list()
#print(param)
if(K1!=1&K2!=1){
param_converge[[1]]<-param[[1]][,,n_last]
param_converge[[2]]<-param[[2]][,,n_last]
param_converge[[3]]<-param[[3]][,n_last]
param_converge[[4]]<-param[[4]][,n_last]
param_converge[[5]]<-param[[5]][,,n_last]
param_converge[[6]]<-param[[6]][,,,n_last]
output_list<-param_converge
cluster_ids_est_U<-as.vector(apply(X = matrix(1:N1),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[1]][x,])
return(cluster_id)
}))
cluster_ids_est_P<-as.vector(apply(X = matrix(1:N2),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[2]][x,])
return(cluster_id)
}))
ICL_val<-NA
#ICL_val<-ICL_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U=param_converge[[3]], pi_P=param_converge[[4]] ,theta = param_converge[[5]], Gvals_cube_0 = Gvals_cube_0, Gvals_mat_0 = Gvals_mat_0, net_adjacency=net_adjacency, net_rating=net_rating, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R = R)
if(sim_indicator==1){
RI_val_U<-RI_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
RI_val_P<-RI_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
NMI_val<-NMI_cal(cluster_ids_true_U = cluster_ids_true_U,cluster_ids_true_P = cluster_ids_true_P,K_true_U = K_true_U,K_true_P = K_true_P,cluster_ids_est_U = cluster_ids_est_U,cluster_ids_est_P = cluster_ids_est_P, K1 = K1, K2 = K2, N1 = N1, N2 = N2)
Recovery_rate_val_U<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
Recovery_rate_val_P<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
if((K1==K_true_U)&(K2==K_true_P)){
K1_permute_mat<-do.call(rbind,permn(1:K1))
K2_permute_mat<-do.call(rbind,permn(1:K2))
RASE_theta_mat<-matrix(NA_real_,nrow(K1_permute_mat),nrow(K2_permute_mat))
for (k in 1:nrow(K1_permute_mat)){
for (l in 1:nrow(K2_permute_mat)){
theta_true_rotated<-as(as.integer(K1_permute_mat[k,]), "pMatrix")%*%theta_true%*%t(as(as.integer(K2_permute_mat[l,]), "pMatrix"))
RASE_theta_mat[k,l]<-RASE_cal(param_est = param_converge[[5]], param_true = theta_true_rotated)
}
}
RASE_theta_val<-min(RASE_theta_mat)
rotate_indices<-which(RASE_theta_mat == min(RASE_theta_mat), arr.ind = TRUE)
delta_0_true_rotated<-array(NA_real_,dim=c(K1,K2,(R-1)))
for (r in 1:(R-1)){
delta_0_true_rotated[,,r]<-as(as.integer(K1_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K2_permute_mat[rotate_indices[2],]), "pMatrix"))
}
RASE_delta_0_val<-RASE_cal(param_est = param_converge[[6]], param_true = delta_0_true_rotated)
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_val,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val,"Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P,"RASE_theta"=RASE_theta_val,"RASE_delta_0"=RASE_delta_0_val,"time_per_iter_vec"=param[[7]], "total_iterations"=param[[8]])
}else{
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_val,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val,"Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P,"time_per_iter_vec"=param[[7]], "total_iterations"=param[[8]])
}
}else{output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_val,"time_per_iter_vec"=param[[7]], "total_iterations"=param[[8]])}
#}
}
return(output_list)
}
simulate_network_bipartite_model7<-function(N1,N2,pi_U,pi_P,theta,delta_0,K1,K2,R){
sourceCpp(file = paste0(file_path,"model_7.cpp"))
#library(Matrix)
net_adjacency<-matrix(0,N1,N2)
net_rating<-matrix(0,N1,N2)
#net_adjacency<-Matrix(0,N1,N2,sparse = T)
#net_rating<-Matrix(0,N1,N2,sparse = T)
if((K1==1)&(K2==1)){
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta)
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==0){
network[i,j]<-0
}else if(edge_sample==1){
G_vec<-rep(NA_real_,R)
for (r in 1:R){
G_vec[r]<-exp(mu[r]*omega-(mu[r])^2/2)
}
probab_discrete<-G_vec/sum(G_vec)
network[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_discrete))==1)
}
}
}
}
else{
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K1=K1, K2=K2, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K1,K2,R))
for(k in 1:K1){
for(l in 1:K2){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta[clust_ids_U[i],clust_ids_P[j]])
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==1){
net_adjacency[i,j]<-1
net_rating[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_block_ratings[clust_ids_U[i],clust_ids_P[j],]))==1)
}
}
}
}
return(list(net_adjacency,net_rating,clust_ids_U,clust_ids_P))
}
########################################################################################################
########################################################################################################
## Model 8 features (works):
## 1) Block structure over intercept
## 2) Proportional odds model for ratings based on my own derivations
## 3) Segmentation level (K*1) Covariate parameters in outer edge part
wrapper_bipartite_model8<-function(net_adjacency,net_rating,nclust1,nclust2,thres=10^(-6),n_iter_min=50,n_iter_max=500,theta_init,beta_u_init,beta_p_init,delta_0_init,cov_u_outer,cov_p_outer,sim_indicator,theta_true=NA,beta_u_true=NA,beta_p_true=NA, delta_0_true=NA,K_true_U=NA,K_true_P=NA,cluster_ids_true_U=NA,cluster_ids_true_P=NA,R=NA){
## This is the combined final updated and working code for EM undirected case for all K
########################################################################################################
########################################################################################################
## Loading the required packages
require(lda)
library(quadprog)
library(combinat)
library(MASS)
library(Rcpp)
library(Matrix)
library(gtools)
#sourceCpp(file = paste0(file_path,"model_8.cpp"))
library(biSBMcovNoSEM)
#################################################
## Defining a function to update variational parameters gamma using quadratic program solver. Input:
## gamma.curr is a N*K matrix for current gamma estimates, pi.curr is a K*1 vector for current pi
## estimates, theta.curr is a T_grid*K array for current theta estimates. Given Network is assumed to be an N*N*T_data array
solve_QP_wrapper<-function(quad_lin_coeff,constraint_matrix,constraint_vector,node_ID,gamma.curr){
try_QP<-try(gamma_next_vec<-solve.QP((-2*diag(as.vector(quad_lin_coeff[node_ID,,1]))),as.vector(quad_lin_coeff[node_ID,,2]),t(constraint_matrix),constraint_vector)$solution,silent = TRUE)
if('numeric' %in% class(try_QP)){
gamma_next_vec<-try_QP
}
else{
gamma_next_vec<-gamma.curr[node_ID,]
print("red flag")
print(paste0("Node ID for red flag is ", node_ID))
}
return(gamma_next_vec)
}
gamma_U.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_U.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, cov_beta_u, cov_beta_p, N1,N2,K1,K2,R){
gamma_U.next<-matrix(NA_real_,N1,K1)
constraint_matrix<-matrix(NA_real_,2+K1,K1)
constraint_matrix[1,]<-rep(1,K1)
constraint_matrix[2,]<-rep(-1,K1)
constraint_matrix[3:(K1+2),]<-diag(K1)
constraint_vector<-c(1,-1,rep(0,K1))
quad_lin_coeff<-gamma_U_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_U=pi_U.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print(quad_lin_coeff)
for (i in 1:N1){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
#print(i)
}
#print('gammaU')
#print(quad_lin_coeff)
# for (i in 1:N1){
# gamma_U.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_U.next<-t(apply(X = gamma_U.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_U.next)
}
gamma_P.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_P.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, cov_beta_u, cov_beta_p, N1,N2,K1,K2,R){
gamma_P.next<-matrix(NA_real_,N2,K2)
constraint_matrix<-matrix(NA_real_,2+K2,K2)
constraint_matrix[1,]<-rep(1,K2)
constraint_matrix[2,]<-rep(-1,K2)
constraint_matrix[3:(K2+2),]<-diag(K2)
constraint_vector<-c(1,-1,rep(0,K2))
quad_lin_coeff<-gamma_P_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_P=pi_P.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('gammaP')
#print(quad_lin_coeff)
#print(quad_lin_coeff)
for (i in 1:N2){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
#print(i)
}
# for (i in 1:N2){
# gamma_P.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_P.next<-t(apply(X = gamma_P.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_P.next)
}
#################################################
## Defining a function to update variational parameters gamma using quadratic program solver. Input:
## gamma.curr is a N*K matrix for current gamma estimates, pi.curr is a K*1 vector for current pi
## estimates, theta.curr is a T_grid*K array for current theta estimates. Given Network is assumed to be an N*N*T_data array
# gamma_U.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_U.curr,theta.curr,logitInv_delta_0.curr,net_adjacency,net_rating,N1,N2,K1,K2,R){
# gamma_U.next<-matrix(NA_real_,N1,K1)
# constraint_matrix<-matrix(NA_real_,2+K1,K1)
# constraint_matrix[1,]<-rep(1,K1)
# constraint_matrix[2,]<-rep(-1,K1)
# constraint_matrix[3:(K1+2),]<-diag(K1)
# constraint_vector<-c(1,-1,rep(0,K1))
#
# quad_lin_coeff<-gamma_U_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_U=pi_U.curr, theta=theta.curr, beta_u=beta_u.curr, beta_p=beta_p.curr, logitInv_delta_0=logitInv_delta_0.curr,net_adjacency=net_adjacency, net_rating = net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
# #print('gammaU')
# #print(quad_lin_coeff)
# for (i in 1:N1){
# gamma_U.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# }
#
# ## normalizing gamma_i. deal with case outside (0,1) later
# gamma_U.next<-t(apply(X = gamma_U.next,MARGIN = 1,FUN = function(x){
# x_norm<-x/sum(x)
# return(x_norm)
# }))
# return(gamma_U.next)
# }
#
# gamma_P.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_P.curr,theta.curr,beta_u.curr, beta_p.curr,logitInv_delta_0.curr,net_adjacency,net_rating,N1,N2,K1,K2,R){
# gamma_P.next<-matrix(NA_real_,N2,K2)
# constraint_matrix<-matrix(NA_real_,2+K2,K2)
# constraint_matrix[1,]<-rep(1,K2)
# constraint_matrix[2,]<-rep(-1,K2)
# constraint_matrix[3:(K2+2),]<-diag(K2)
# constraint_vector<-c(1,-1,rep(0,K2))
#
# quad_lin_coeff<-gamma_P_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_P=pi_P.curr, theta=theta.curr, beta_u=beta_u.curr, beta_p=beta_p.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating = net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
# #print('gammaP')
# #print(quad_lin_coeff)
# for (i in 1:N2){
# gamma_P.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# }
#
# ## normalizing gamma_i. deal with case outside (0,1) later
# gamma_P.next<-t(apply(X = gamma_P.next,MARGIN = 1,FUN = function(x){
# x_norm<-x/sum(x)
# return(x_norm)
# }))
# return(gamma_P.next)
# }
#################################################
## Defining a function to update K*1 vector of pi
pi.update<-function(gamma.curr){
pi.next<-as.vector(apply(X = gamma.curr,MARGIN = 2,FUN = mean))
## normalization of pi
pi.next<-pi.next/sum(pi.next)
return(pi.next)
}
#################################################
## Defining the update function of full matrix theta with dimensions K1,K2
theta.update<-function(theta.curr,gamma_U,gamma_P,net_adjacency, cov_beta_u, cov_beta_p, N1,N2,K1,K2){
theta.next<-matrix(NA_real_,K1,K2)
gradient<-grad_bipartite_stat_undir_theta(theta=theta.curr, gamma_U=gamma_U, gamma_P=gamma_P, net_adjacency=net_adjacency, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
hess<-hess_bipartite_stat_undir_theta(theta=theta.curr, gamma_U=gamma_U, gamma_P=gamma_P, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
for(k in 1:K1){
for(l in 1:K2){
theta.next[k,l]<-theta.curr[k,l]-gradient[k,l]/hess[k,l]
}
}
return(theta.next)
}
#################################################
## Defining the update function of full vector beta_u with dimensions K1*C1_outer
beta_u.update<-function(beta_u.curr,gamma_U,gamma_P,theta,net_adjacency, cov_beta_u, cov_beta_p, cov_u_outer, N1,N2,K1,K2,C1_outer){
beta_u.next<-array(NA_real_,dim=c(K1,C1_outer))
gradient<-grad_bipartite_stat_undir_beta_u(gamma_U=gamma_U, gamma_P=gamma_P, theta=theta, net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer=C1_outer)
hess<-hess_bipartite_stat_undir_beta_u(gamma_U=gamma_U, gamma_P=gamma_P, theta=theta, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer=C1_outer)
for(c_u in 1:C1_outer){
for(k in 1:K1){
beta_u.next[k,c_u]<-beta_u.curr[k,c_u]-(gradient[k,c_u]/hess[k,c_u])
}
}
return(beta_u.next)
}
#################################################
## Defining the update function of full vector beta_p with dimensions K2*C2_outer
beta_p.update<-function(beta_p.curr,gamma_U,gamma_P,theta,net_adjacency, cov_beta_u, cov_beta_p, cov_p_outer, N1,N2,K1,K2,C2_outer){
beta_p.next<-array(NA_real_,dim=c(K2,C2_outer))
gradient<-grad_bipartite_stat_undir_beta_p(gamma_U=gamma_U, gamma_P=gamma_P, theta=theta, net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
hess<-hess_bipartite_stat_undir_beta_p(gamma_U=gamma_U, gamma_P=gamma_P, theta=theta, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
for(c_p in 1:C2_outer){
for(l in 1:K2){
beta_p.next[l,c_p]<-beta_p.curr[l,c_p]-(gradient[l,c_p]/hess[l,c_p])
}
}
return(beta_p.next)
}
#################################################
## Defining the update function of full proportional odds nuisance parameters delta_0
delta_0.update<-function(gamma_U,gamma_P,delta_0,logitInv_delta_0,logitInv_phi,net_adjacency,net_rating_statistic,N1,N2,K1,K2,R){
delta_0.next<-array(NA_real_,dim=c(K1,K2,(R-1)))
## Getting the estimated cluster memberships of all nodes in two sets from variational E step
#clust_est_U<-as.vector(apply(X = gamma_U,MARGIN = 1,FUN = which.max))
#clust_est_P<-as.vector(apply(X = gamma_P,MARGIN = 1,FUN = which.max))
#ratings_list<-tie_clust_partition(clust_est_U=clust_est_U, clust_est_P=clust_est_P, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2)
gradient_delta_0<-grad_bipartite_stat_undir_delta_0(gamma_U=gamma_U, gamma_P=gamma_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
for (k in 1:K1){
for (l in 1:K2){
hess_mat<-hess_bipartite_stat_undir_delta_0(k=k-1, l=l-1, gamma_U=gamma_U, gamma_P=gamma_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, R=R)
delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat+(10^(-6)*diag(R-1)))%*%gradient_delta_0[k,l,])))
#delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat)%*%gradient_delta_0[k,l,])))
}
}
#print(delta_0.next)
return(delta_0.next)
}
#################################################
## Defining a function that takes inputs as: 1) initial values list of parameters, 2) network, 3) Number of clusters K and 4) number of iterations and outputs the whole list of all parameter iterates
iterator<-function(start,net_adjacency,net_rating,K1,K2,n_iter,thres,n_iter_min,n_iter_max,R,cov_u_outer,cov_p_outer){
## Assuming a network adjacency matrix of dimensions N1*N2 with rows corresponding to users and columns corresponding to products and each entry is an indicator whether the user rated the product. This framework can be generalized.
N1<-dim(net_adjacency)[1]
N2<-dim(net_adjacency)[2]
C1_outer<-dim(cov_u_outer)[2]
C2_outer<-dim(cov_p_outer)[2]
## initializing the arrays for parameters
## Initializing the cluster memberships gamma
gamma_U<-array(NA_real_,dim=c(N1,K1,n_iter)) ## assuming there are K1 clusters in user set
gamma_P<-array(NA_real_,dim=c(N2,K2,n_iter)) ## assuming there are K2 clusters in product set
## Initializing the mixture proportions pi
pi_U<-matrix(NA_real_,K1,n_iter)
pi_P<-matrix(NA_real_,K2,n_iter)
## Initializing the network parameters theta
theta<-array(NA_real_,dim=c(K1,K2,n_iter))
## Initializing the covariate parameters beta_u and beta_p
beta_u<-array(NA_real_,dim=c(K1,C1_outer,n_iter))
beta_p<-array(NA_real_,dim=c(K2,C2_outer,n_iter))
## Initializing the proportional odds model intercept parameters
delta_0<-array(NA_real_,dim=c(K1,K2,R-1,n_iter))
gamma_U[,,1]<-start[[1]]
gamma_P[,,1]<-start[[2]]
pi_U[,1]<-start[[3]]
pi_P[,1]<-start[[4]]
theta[,,1]<-start[[5]]
beta_u[,,1]<-start[[6]]
beta_p[,,1]<-start[[7]]
delta_0[,,,1]<-start[[8]]
#print(theta[,,1])
#print(omega[,1])
#print(mu[,,,1])
## iterations
## Defining the iteration index
iter_index<-2
## Initializing the error
error<-Inf
## Initializing the current ELBO values over the whole grid
ELBO_curr<-10^10
time_per_iter<-c()
while(((error>thres)&(iter_index<n_iter_max))|(iter_index<n_iter_min)){
#while((error>thres|iter_index<200)&(iter_index<250)&(iter_index==2|error<10)){
#while(iter_index<21){
## Starting the stopwatch to calculate the per iteration time
ptm<-proc.time()
## Calculating cov_beta_u and cov_beta_p
cov_beta_u<-cov_beta_mul(cov=cov_u_outer,beta=t(beta_u[,,iter_index-1]))
cov_beta_p<-cov_beta_mul(cov=cov_p_outer,beta=t(beta_p[,,iter_index-1]))
## Calculating the logit inverse of delta_0
logitInv_delta_0.curr<-logitInv_delta_0_cal(delta_0=delta_0[,,,iter_index-1], K1=K1, K2=K2, R=R)
## Transforming logit inverse of delta_0 to phi
#phi.curr<-phi_cal(logitInv_delta_0=logitInv_delta_0.curr, K1=K1, K2=K2, R=R)
## Calculating the logit inverse of phi
#logitInv_phi.curr<-logitInv_phi_cal(phi=phi.curr, K1=K1, K2=K2, R=R)
## Updating gamma_U, the mixed memberships for users
gamma_U[,,iter_index]<-gamma_U.update.wrapper(gamma_U.curr=gamma_U[,,iter_index-1], gamma_P.curr=gamma_P[,,iter_index-1], pi_U.curr=pi_U[,iter_index-1], theta.curr=theta[,,iter_index-1], cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
## Updating the mixture proportions pi_U
pi_U[,iter_index]<-pi.update(gamma.curr=gamma_U[,,iter_index])
## Updating gamma_P, the mixed memberships for products
gamma_P[,,iter_index]<-gamma_P.update.wrapper(gamma_U.curr=gamma_U[,,iter_index],gamma_P.curr=gamma_P[,,iter_index-1],pi_P.curr=pi_P[,iter_index-1], theta.curr=theta[,,iter_index-1], cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
## Updating the mixture proportions pi_P
pi_P[,iter_index]<-pi.update(gamma.curr=gamma_P[,,iter_index])
## Updating the network parameters theta
theta[,,iter_index]<-theta.update(theta.curr=theta[,,iter_index-1],gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], net_adjacency=net_adjacency, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
#print('theta')
#print(theta[,,iter_index])
## Updating the covariate parameters beta_u
beta_u[,,iter_index]<-beta_u.update(beta_u.curr = matrix(beta_u[,,iter_index-1],K1,C1_outer),gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], theta=theta[,,iter_index], net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer= C1_outer)
#print('beta_u and beta_p')
#print(beta_u[,,iter_index])
## Updating the covariate parameters beta_p
beta_p[,,iter_index]<-beta_p.update(beta_p.curr = matrix(beta_p[,,iter_index-1],K2,C2_outer),gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], theta=theta[,,iter_index], net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
#print(beta_p[,,iter_index])
## Updating the omega matrix
delta_0[,,,iter_index]<-delta_0.update(gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], delta_0=delta_0[,,,iter_index-1],logitInv_delta_0 = logitInv_delta_0.curr,logitInv_phi = logitInv_phi.curr, net_adjacency=net_adjacency, net_rating_statistic = net_rating_statistic, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('delta_0')
#print(delta_0[,,,iter_index])
#Sys.sleep(1.5)
ELBO_prev<-ELBO_curr
ELBO_curr<-ELBO_conv_bipartite_stat_undir(gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], pi_U=pi_U[,iter_index], pi_P=pi_P[,iter_index], theta = theta[,,iter_index], logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
if((iter_index%%1)==0){
#print(theta[,,iter_index])
print('iter_index')
print(iter_index)
print('error')
print(error)
print(proc.time()-ptm)
}
ptm_diff<-proc.time()-ptm
time_per_iter<-c(time_per_iter,ptm_diff[3])
#print("probs")
#print(Gvals_cube_0)
# print(apply(X = Gvals_cube_0,MARGIN = c(1,2),FUN = function(x){
#
# return(x/sum(x))
# }))
print('iter_index')
print(iter_index)
print(proc.time()-ptm)
iter_index<-iter_index+1
}
names(time_per_iter)<-NULL
total_iterations<-iter_index-1
return(list(gamma_U,gamma_P,pi_U,pi_P,theta,beta_u,beta_p,delta_0,time_per_iter,total_iterations))
}
########################################################################################################
########################################################################################################
## Defining Model Selection functions based on converged paramters
#################################################
## Defining a function to calculate the estimate of complete log likelihood
comp_loglik_cal<-function(cluster_ids_est_U = NA, cluster_ids_est_P = NA, pi_U = NA, pi_P = NA, theta, beta_u, beta_p, delta_0, net_adjacency, net_rating, cov_u_outer, cov_p_outer, N1, N2, K1, K2, R){
if((K1!=1)&(K2!=1)){
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K1=K1, K2=K2, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K1,K2,R))
for(k in 1:K1){
for(l in 1:K2){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
## 1st term
t1<-0
for(i in 1:N1){
for(j in 1:N2){
cluster_id_i<-cluster_ids_est_U[i]
cluster_id_j<-cluster_ids_est_P[j]
predictor<-theta[cluster_id_i,cluster_id_j]+sum(cov_u_outer[i,]*beta_u[cluster_id_i,])+sum(cov_p_outer[j,]*beta_p[cluster_id_j,])
exp_val<-exp(predictor)
if(net_adjacency[i,j]==0){
t1<-t1-(log(1+exp_val))
}else{
for(r in 1:R){
if(net_rating[i,j]==r){
t1<-t1+(predictor-log(1+exp_val)+log(probab_block_ratings[cluster_id_i,cluster_id_j,r]))
}
}
}
}
}
## 2nd term
t2<-0
for (i in 1:N1){
t2<-t2+log(pi_U[cluster_ids_est_U[i]])
}
t3<-0
for (j in 1:N2){
t3<-t3+log(pi_P[cluster_ids_est_P[j]])
}
comp_val<-t1+t2+t3
}
return(comp_val)
}
#################################################
## Defining a function to calculate the integrated classification likelihood
ICL_cal<-function(cluster_ids_est_U, cluster_ids_est_P, pi_U, pi_P, theta, beta_u, beta_p, delta_0, net_adjacency, net_rating, cov_u_outer, cov_p_outer, N1, N2, K1, K2, R){
comp_loglik_val<-comp_loglik_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U = pi_U, pi_P = pi_P, theta = theta, beta_u = beta_u, beta_p = beta_p, delta_0 = delta_0, cov_u_outer = cov_u_outer, cov_p_outer = cov_p_outer, net_adjacency = net_adjacency, net_rating = net_rating, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R=R)
## Penalization over pi
t2<-(K1-1)*log(N1)+(K2-1)*log(N2)
## Penalization over theta
t3<-(K1*K2)*log(N1*N2)
## Penalization over beta_u
C1_outer<-dim(cov_u_outer)[2]
t4<-(K1*C1_outer)*log(N1*N2)
## Penalization over beta_p
C2_outer<-dim(cov_p_outer)[2]
t5<-(K2*C2_outer)*log(N1*N2)
## Calculating the number of edges
N0_val<-N0_cal(net_adjacency=net_adjacency, N1=N1, N2=N2)
## Penalization over omega, mu
t6<-(K1*K2*(R-1))*log(N0_val)
## all penalty terms together:
penalty<-t2+t3+t4+t5+t6
ICL_val<-comp_loglik_val-penalty
ICL_vec<-c(comp_loglik_val,penalty,ICL_val)
return(ICL_vec)
}
########################################################################################################
## Defining the Rand Index and RASE functions for evaluating the performance of clustering and estimation respectively
## Rand Index function
RI_cal<-function(cluster_ids_est, cluster_ids_true){
n=length(cluster_ids_est)
RI_val=0
for(i in 1:(n-1)){
for(j in (i+1):n){
RI_val=RI_val+as.numeric((cluster_ids_est[i]==cluster_ids_est[j])==(cluster_ids_true[i]==cluster_ids_true[j]))
}
}
RI_mean=RI_val/(n*(n-1)/2)
return(RI_mean)
}
#################################################
## RASE functions
RASE_cal<-function(param_est, param_true){
if(is.vector(param_est)!=1){
RASE_val<-sqrt(sum((param_est-param_true)^2)/prod(dim(param_est)))
}else{RASE_val<-sqrt(sum((param_est-param_true)^2)/length(param_est))}
return(RASE_val)
}
########################################################################################################
## Defining the functions to evaluate clustering performance using normalized mutual information.
probab_clust_cal<-function(cluster_ids,N,K){
probab_vec<-rep(NA_real_,K)
for(k in 1:K){
probab_vec[k]<-sum(cluster_ids==k)/N
}
return(probab_vec)
}
########################################################################################################
## Normalized Mutual Info
NMI_cal<-function(cluster_ids_true_U,cluster_ids_true_P,K_true_U,K_true_P,cluster_ids_est_U,cluster_ids_est_P,K1,K2,N1,N2){
probab_vec_true_U<-probab_clust_cal(cluster_ids = cluster_ids_true_U,N = N1+N2,K = K_true_U)
probab_vec_true_P<-probab_clust_cal(cluster_ids = cluster_ids_true_P,N = N1+N2,K = K_true_P)
probab_vec_true<-c(probab_vec_true_U,probab_vec_true_P)
total_entropy_true<-0
for(k in 1:length(probab_vec_true)){
if(probab_vec_true[k]!=0){
total_entropy_true<-total_entropy_true-(probab_vec_true[k]*log(probab_vec_true[k],base = 2))
}
}
probab_vec_est_U<-probab_clust_cal(cluster_ids = cluster_ids_est_U,N = N1+N2,K = K1)
probab_vec_est_P<-probab_clust_cal(cluster_ids = cluster_ids_est_P,N = N1+N2,K = K2)
probab_vec_est<-c(probab_vec_est_U,probab_vec_est_P)
total_entropy_est<-0
for(k in 1:length(probab_vec_est)){
if(probab_vec_est[k]!=0){
total_entropy_est<-total_entropy_est-(probab_vec_est[k]*log(probab_vec_est[k],base = 2))
}
}
clust_ids_true_total<-c(cluster_ids_true_U,(cluster_ids_true_P+K_true_U))
clust_ids_est_total<-c(cluster_ids_est_U,(cluster_ids_est_P+K1))
conditional_entropy<-rep(NA_real_,(K1+K2))
for(k in 1:(K1+K2)){
probab_conditional_vec<-rep(0,(K_true_U+K_true_P))
for(m in 1:(K_true_U+K_true_P)){
if(sum(clust_ids_est_total==k)!=0){
probab_conditional_vec[m]<-sum((clust_ids_true_total==m)&(clust_ids_est_total==k))/sum(clust_ids_est_total==k)
}
}
plogp_conditional_vec<-rep(0,length(probab_conditional_vec))
for(m in 1:length(probab_conditional_vec)){
if(probab_conditional_vec[m]!=0){
plogp_conditional_vec[m]<-probab_conditional_vec[m]*log(x = probab_conditional_vec[m],base = 2)
}
}
conditional_entropy[k]<--probab_vec_est[k]*sum(plogp_conditional_vec)
}
mutual_info<-total_entropy_true-sum(conditional_entropy)
NMI_val<-(2*mutual_info)/(total_entropy_true+total_entropy_est)
return(NMI_val)
}
########################################################################################################
## Calculating the recovery rate
recovery_rate_cal <- function(cluster_ids_est, cluster_ids_true){
all_perm<-permutations(n=4,r=2,repeats.allowed = T)
clust_perm_mat<-matrix(NA_integer_,4,4)
for(i in 1:nrow(all_perm)){
clust_perm_mat[all_perm[i,1],all_perm[i,2]] <- sum((cluster_ids_true==all_perm[i,1])&(cluster_ids_est==all_perm[i,2]))
}
recovery_rate_val<-sum(apply(X = clust_perm_mat, MARGIN = 1, FUN = max))/length(cluster_ids_est)
return(recovery_rate_val)
}
#########################################################################################################
## Defining the parameters
N1<-dim(net_adjacency)[1] ## Number of nodes from the network
N2<-dim(net_adjacency)[2]
## Total time points in the network for which we have data in the form of adjacency matrix
K1<-nclust1 ## Defining the number of clusters
K2<-nclust2
K <-K1*K2
#################################################
## Defining the starting values of the iterator and running the main algorithm
start<-list()
start[[1]]<-matrix(rep(1/K1,N1*K1),N1,K1)
start[[2]]<-matrix(rep(1/K2,N2*K2),N2,K2)
start[[3]]<-rep(1/K1,K1)
start[[4]]<-rep(1/K2,K2)
start[[5]]<-theta_init
start[[6]]<-beta_u_init
start[[7]]<-beta_p_init
start[[8]]<-delta_0_init
if(K1!=1&K2!=1){ param<-iterator(start=start, net_adjacency = net_adjacency, net_rating=net_rating, K1=K1, K2=K2, n_iter=1000, thres=thres, n_iter_min=n_iter_min, n_iter_max=n_iter_max, R=R, cov_u_outer=cov_u_outer, cov_p_outer=cov_p_outer)}
if(K1==1&K2!=1){ param<-iterator_K1(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
if(K1!=1&K2==1){ param<-iterator_K2(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
if(K1==1&K2==1){ param<-iterator_K1K2(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
#################################################
## extracting the coverged parameter values and calculating ICL
n_iter=1
indicator_last<-0
while(indicator_last==0){
temp<-is.na(param[[1]][1,1,n_iter])
if(temp==TRUE){
n_last<-n_iter-1
indicator_last<-1
}
n_iter<-n_iter+1
}
param_converge<-list()
#print(param)
if(K1!=1&K2!=1){
param_converge[[1]]<-param[[1]][,,n_last]
param_converge[[2]]<-param[[2]][,,n_last]
param_converge[[3]]<-param[[3]][,n_last]
param_converge[[4]]<-param[[4]][,n_last]
param_converge[[5]]<-param[[5]][,,n_last]
param_converge[[6]]<-param[[6]][,,n_last]
param_converge[[7]]<-param[[7]][,,n_last]
param_converge[[8]]<-param[[8]][,,,n_last]
output_list<-param_converge
cluster_ids_est_U<-as.vector(apply(X = matrix(1:N1),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[1]][x,])
return(cluster_id)
}))
cluster_ids_est_P<-as.vector(apply(X = matrix(1:N2),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[2]][x,])
return(cluster_id)
}))
ICL_vec<-NA
#ICL_vec<-ICL_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U=param_converge[[3]], pi_P=param_converge[[4]] ,theta = param_converge[[5]], beta_u = as.matrix(param_converge[[6]]), beta_p = as.matrix(param_converge[[7]]), delta_0 = param_converge[[8]], net_adjacency=net_adjacency, net_rating=net_rating, cov_u_outer = cov_u_outer, cov_p_outer = cov_p_outer, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R = R)
if(sim_indicator==1){
RI_val_U<-RI_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
RI_val_P<-RI_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
NMI_val<-NMI_cal(cluster_ids_true_U = cluster_ids_true_U,cluster_ids_true_P = cluster_ids_true_P,K_true_U = K_true_U,K_true_P = K_true_P,cluster_ids_est_U = cluster_ids_est_U,cluster_ids_est_P = cluster_ids_est_P, K1 = K1, K2 = K2, N1 = N1, N2 = N2)
Recovery_rate_val_U<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
Recovery_rate_val_P<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
if((K1==K_true_U)&(K2==K_true_P)){
K1_permute_mat<-do.call(rbind,permn(1:K1))
K2_permute_mat<-do.call(rbind,permn(1:K2))
RASE_beta_u_vec<-rep(NA_real_,nrow(K1_permute_mat))
for (k in 1:nrow(K1_permute_mat)){
beta_u_est<-as.vector(param_converge[[6]])[K1_permute_mat[k,]]
RASE_beta_u_vec[k]<-RASE_cal(param_est = beta_u_est, param_true = as.vector(beta_u_true))
}
permute_true_id_beta_u<-which.min(RASE_beta_u_vec)
RASE_beta_u_val<-RASE_beta_u_vec[permute_true_id_beta_u]
RASE_beta_p_vec<-rep(NA_real_,nrow(K2_permute_mat))
for (k in 1:nrow(K2_permute_mat)){
beta_p_est<-as.vector(param_converge[[7]])[K2_permute_mat[k,]]
RASE_beta_p_vec[k]<-RASE_cal(param_est = beta_p_est, param_true = as.vector(beta_p_true))
}
permute_true_id_beta_p<-which.min(RASE_beta_p_vec)
RASE_beta_p_val<-RASE_beta_p_vec[permute_true_id_beta_p]
RASE_theta_mat<-matrix(NA_real_,nrow(K1_permute_mat),nrow(K2_permute_mat))
for (k in 1:nrow(K1_permute_mat)){
for (l in 1:nrow(K2_permute_mat)){
theta_true_rotated<-as(as.integer(K1_permute_mat[k,]), "pMatrix")%*%theta_true%*%t(as(as.integer(K2_permute_mat[l,]), "pMatrix"))
RASE_theta_mat[k,l]<-RASE_cal(param_est = param_converge[[5]], param_true = theta_true_rotated)
}
}
RASE_theta_val<-min(RASE_theta_mat)
rotate_indices<-which(RASE_theta_mat == min(RASE_theta_mat), arr.ind = TRUE)
## RASE for delta_0
# RASE_delta_0_vec<-rep(NA_real_,R-1)
# for (r in 1:(R-1)){
# delta_0_true_rotated<-as(as.integer(K1_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K2_permute_mat[rotate_indices[2],]), "pMatrix"))
# RASE_delta_0_vec[r]<-RASE_cal(param_est = param_converge[[8]][,,r], param_true = delta_0_true_rotated)
# }
delta_0_true_rotated<-array(NA_real_,dim=c(K1,K2,(R-1)))
for (r in 1:(R-1)){
delta_0_true_rotated[,,r]<-as(as.integer(K1_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K2_permute_mat[rotate_indices[2],]), "pMatrix"))
}
RASE_delta_0_val<-RASE_cal(param_est = param_converge[[8]], param_true = delta_0_true_rotated)
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val, "Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P, "RASE_theta"=RASE_theta_val, "RASE_beta_u"=RASE_beta_u_val, "RASE_beta_p"=RASE_beta_p_val, "RASE_delta_0"=RASE_delta_0_val,"time_per_iter_vec"=param[[9]],"total_iterations"=param[[10]])
}else{
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val, "Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P, "time_per_iter_vec"=param[[9]],"total_iterations"=param[[10]])
}
}else{output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"time_per_iter_vec"=param[[9]],"total_iterations"=param[[10]])}
#}
}
return(output_list)
}
simulate_network_bipartite_model8<-function(N1,N2,pi_U,pi_P,theta,beta_u,beta_p,delta_0,cov_u_outer,cov_p_outer,K1,K2,R){
library(Rcpp)
sourceCpp(file = paste0(file_path,"model_8.cpp"))
#library(Matrix)
net_adjacency<-matrix(0,N1,N2)
net_rating<-matrix(0,N1,N2)
#net_adjacency<-Matrix(0,N1,N2,sparse = T)
#net_rating<-Matrix(0,N1,N2,sparse = T)
if((K1==1)&(K2==1)){
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta)
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==0){
network[i,j]<-0
}else if(edge_sample==1){
G_vec<-rep(NA_real_,R)
for (r in 1:R){
G_vec[r]<-exp(mu[r]*omega-(mu[r])^2/2)
}
probab_discrete<-G_vec/sum(G_vec)
network[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_discrete))==1)
}
}
}
}
else{
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K1=K1, K2=K2, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K1,K2,R))
for(k in 1:K1){
for(l in 1:K2){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta[clust_ids_U[i],clust_ids_P[j]]+sum(cov_u_outer[i,]*beta_u[clust_ids_U[i],])+sum(cov_p_outer[j,]*beta_p[clust_ids_P[j],]))
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==1){
net_adjacency[i,j]<-1
net_rating[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_block_ratings[clust_ids_U[i],clust_ids_P[j],]))==1)
}
}
}
}
return(list(net_adjacency,net_rating,clust_ids_U,clust_ids_P))
}
########################################################################################################
########################################################################################################
## Model 10 features (This works!):
## 1) Block structure over intercept
## 2) Proportional odds model for ratings based on my own derivations
## 3) Segmentation level (K*1) Covariate parameters in outer edge part
## 4) Stochastic over K
wrapper_bipartite_model10<-function(net_adjacency,net_rating,nclust1,nclust2,thres=10^(-6),n_iter_min=50,n_iter_max=500,theta_init,beta_u_init,beta_p_init,delta_0_init,cov_u_outer,cov_p_outer,sim_indicator,theta_true=NA,beta_u_true=NA,beta_p_true=NA, delta_0_true=NA,K_true_U=NA,K_true_P=NA,cluster_ids_true_U=NA,cluster_ids_true_P=NA,R=NA){
## This is the combined final updated and working code for EM undirected case for all K
########################################################################################################
########################################################################################################
## Loading the required packages
require(lda)
library(quadprog)
library(combinat)
library(MASS)
library(Rcpp)
library(Matrix)
library(gtools)
#sourceCpp(file = paste0(file_path,"model_10.cpp"))
library(biSBMcovSEM)
#################################################
## Defining a function to update variational parameters gamma using quadratic program solver. Input:
## gamma.curr is a N*K matrix for current gamma estimates, pi.curr is a K*1 vector for current pi
## estimates, theta.curr is a T_grid*K array for current theta estimates. Given Network is assumed to be an N*N*T_data array
solve_QP_wrapper<-function(quad_lin_coeff,constraint_matrix,constraint_vector,node_ID,gamma.curr){
try_QP<-try(gamma_next_vec<-solve.QP((-2*diag(as.vector(quad_lin_coeff[node_ID,,1]))),as.vector(quad_lin_coeff[node_ID,,2]),t(constraint_matrix),constraint_vector)$solution,silent = TRUE)
if('numeric' %in% class(try_QP)){
gamma_next_vec<-try_QP
}
else{
gamma_next_vec<-gamma.curr[node_ID,]
print("red flag")
print(paste0("Node ID for red flag is ", node_ID))
}
return(gamma_next_vec)
}
gamma_U.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_U.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, cov_beta_u, cov_beta_p, N1,N2,K1,K2,R){
gamma_U.next<-matrix(NA_real_,N1,K1)
constraint_matrix<-matrix(NA_real_,2+K1,K1)
constraint_matrix[1,]<-rep(1,K1)
constraint_matrix[2,]<-rep(-1,K1)
constraint_matrix[3:(K1+2),]<-diag(K1)
constraint_vector<-c(1,-1,rep(0,K1))
quad_lin_coeff<-gamma_U_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_U=pi_U.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print(quad_lin_coeff)
for (i in 1:N1){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K1)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_U.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_U.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K1){
gamma_U.next[i,]<-gamma_U.curr[i,]
}
#print(i)
}
#print('gammaU')
#print(quad_lin_coeff)
# for (i in 1:N1){
# gamma_U.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_U.next<-t(apply(X = gamma_U.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_U.next)
}
gamma_P.update.wrapper<-function(gamma_U.curr,gamma_P.curr,pi_P.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, cov_beta_u, cov_beta_p, N1,N2,K1,K2,R){
gamma_P.next<-matrix(NA_real_,N2,K2)
constraint_matrix<-matrix(NA_real_,2+K2,K2)
constraint_matrix[1,]<-rep(1,K2)
constraint_matrix[2,]<-rep(-1,K2)
constraint_matrix[3:(K2+2),]<-diag(K2)
constraint_vector<-c(1,-1,rep(0,K2))
quad_lin_coeff<-gamma_P_update_stat_undir(gamma_U=gamma_U.curr, gamma_P=gamma_P.curr, pi_P=pi_P.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('gammaP')
#print(quad_lin_coeff)
#print(quad_lin_coeff)
for (i in 1:N2){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_U.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K2)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma_P.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma_P.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K2){
gamma_P.next[i,]<-gamma_P.curr[i,]
}
#print(i)
}
# for (i in 1:N2){
# gamma_P.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
# print(i)
# }
## normalizing gamma_i. deal with case outside (0,1) later
gamma_P.next<-t(apply(X = gamma_P.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma_P.next)
}
#################################################
## Defining a function to update K*1 vector of pi
pi.update<-function(gamma.curr){
pi.next<-as.vector(apply(X = gamma.curr,MARGIN = 2,FUN = mean))
## normalization of pi
pi.next<-pi.next/sum(pi.next)
return(pi.next)
}
#################################################
## Defining the update function of full matrix theta with dimensions K1,K2
theta.update<-function(theta.curr,cluster_ids_U, cluster_ids_P,net_adjacency, cov_beta_u, cov_beta_p, N1,N2,K1,K2){
theta.next<-matrix(NA_real_,K1,K2)
gradient<-grad_bipartite_stat_undir_theta(theta=theta.curr, cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, net_adjacency=net_adjacency, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
hess<-hess_bipartite_stat_undir_theta(theta=theta.curr, cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
for(k in 1:K1){
for(l in 1:K2){
theta.next[k,l]<-theta.curr[k,l]-gradient[k,l]/hess[k,l]
}
}
return(theta.next)
}
#################################################
## Defining the update function of full vector beta_u with dimensions K1*C1_outer
beta_u.update<-function(beta_u.curr,cluster_ids_U, cluster_ids_P,theta,net_adjacency, cov_beta_u, cov_beta_p, cov_u_outer, N1,N2,K1,K2,C1_outer){
beta_u.next<-array(NA_real_,dim=c(K1,C1_outer))
gradient<-grad_bipartite_stat_undir_beta_u(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta, net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer=C1_outer)
hess<-hess_bipartite_stat_undir_beta_u(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer=C1_outer)
for(c_u in 1:C1_outer){
for(k in 1:K1){
beta_u.next[k,c_u]<-beta_u.curr[k,c_u]-(gradient[k,c_u]/hess[k,c_u])
}
}
return(beta_u.next)
}
#################################################
## Defining the update function of full vector beta_p with dimensions K2*C2_outer
beta_p.update<-function(beta_p.curr,cluster_ids_U,cluster_ids_P,theta,net_adjacency, cov_beta_u, cov_beta_p, cov_p_outer, N1,N2,K1,K2,C2_outer){
beta_p.next<-array(NA_real_,dim=c(K2,C2_outer))
gradient<-grad_bipartite_stat_undir_beta_p(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta, net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
hess<-hess_bipartite_stat_undir_beta_p(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
for(c_p in 1:C2_outer){
for(l in 1:K2){
beta_p.next[l,c_p]<-beta_p.curr[l,c_p]-(gradient[l,c_p]/hess[l,c_p])
}
}
return(beta_p.next)
}
#################################################
## Defining the update function of full proportional odds nuisance parameters delta_0
delta_0.update<-function(cluster_ids_U,cluster_ids_P,delta_0,logitInv_delta_0,net_adjacency,net_rating_statistic,N1,N2,K1,K2,R){
delta_0.next<-array(NA_real_,dim=c(K1,K2,(R-1)))
gradient_delta_0<-grad_bipartite_stat_undir_delta_0(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
for (k in 1:K1){
for (l in 1:K2){
hess_mat<-hess_bipartite_stat_undir_delta_0(k=k-1, l=l-1, cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1, N2=N2, R=R)
delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat+(10^(-6)*diag(R-1)))%*%gradient_delta_0[k,l,])))
#delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat)%*%gradient_delta_0[k,l,])))
}
}
#print(delta_0.next)
return(delta_0.next)
}
#################################################
## Defining a function that takes inputs as: 1) initial values list of parameters, 2) network, 3) Number of clusters K and 4) number of iterations and outputs the whole list of all parameter iterates
iterator<-function(start,net_adjacency,net_rating,K1,K2,n_iter,thres,n_iter_min,n_iter_max,R,cov_u_outer,cov_p_outer){
## Assuming a network adjacency matrix of dimensions N1*N2 with rows corresponding to users and columns corresponding to products and each entry is an indicator whether the user rated the product. This framework can be generalized.
N1<-dim(net_adjacency)[1]
N2<-dim(net_adjacency)[2]
C1_outer<-dim(cov_u_outer)[2]
C2_outer<-dim(cov_p_outer)[2]
## initializing the arrays for parameters
## Initializing the cluster memberships gamma
gamma_U<-array(NA_real_,dim=c(N1,K1,n_iter)) ## assuming there are K1 clusters in user set
gamma_P<-array(NA_real_,dim=c(N2,K2,n_iter)) ## assuming there are K2 clusters in product set
## Initializing the mixture proportions pi
pi_U<-matrix(NA_real_,K1,n_iter)
pi_P<-matrix(NA_real_,K2,n_iter)
## Initializing the network parameters theta
theta<-array(NA_real_,dim=c(K1,K2,n_iter))
## Initializing the covariate parameters beta_u and beta_p
beta_u<-array(NA_real_,dim=c(K1,C1_outer,n_iter))
beta_p<-array(NA_real_,dim=c(K2,C2_outer,n_iter))
## Initializing the proportional odds model intercept parameters
delta_0<-array(NA_real_,dim=c(K1,K2,R-1,n_iter))
gamma_U[,,1]<-start[[1]]
gamma_P[,,1]<-start[[2]]
pi_U[,1]<-start[[3]]
pi_P[,1]<-start[[4]]
theta[,,1]<-start[[5]]
beta_u[,,1]<-start[[6]]
beta_p[,,1]<-start[[7]]
delta_0[,,,1]<-start[[8]]
#print(theta[,,1])
#print(omega[,1])
#print(mu[,,,1])
## iterations
## Defining the iteration index
iter_index<-2
## Initializing the error
error<-Inf
## Initializing the current ELBO values over the whole grid
ELBO_curr<-10^10
time_per_iter<-c()
#iter_index<-iter_index+1
while(((error>thres)&(iter_index<n_iter_max))|(iter_index<n_iter_min)){
#while((error>thres|iter_index<200)&(iter_index<250)&(iter_index==2|error<10)){
#while(iter_index<21){
## Starting the stopwatch to calculate the per iteration time
ptm<-proc.time()
## Calculating cov_beta_u and cov_beta_p
cov_beta_u<-cov_beta_mul(cov=cov_u_outer,beta=t(beta_u[,,iter_index-1]))
cov_beta_p<-cov_beta_mul(cov=cov_p_outer,beta=t(beta_p[,,iter_index-1]))
## Calculating the logit inverse of delta_0
logitInv_delta_0.curr<-logitInv_delta_0_cal(delta_0=delta_0[,,,iter_index-1], K1=K1, K2=K2, R=R)
## Transforming logit inverse of delta_0 to phi
#phi.curr<-phi_cal(logitInv_delta_0=logitInv_delta_0.curr, K1=K1, K2=K2, R=R)
## Calculating the logit inverse of phi
#logitInv_phi.curr<-logitInv_phi_cal(phi=phi.curr, K1=K1, K2=K2, R=R)
## Updating gamma_U, the mixed memberships for users
gamma_U[,,iter_index]<-gamma_U.update.wrapper(gamma_U.curr=gamma_U[,,iter_index-1], gamma_P.curr=gamma_P[,,iter_index-1], pi_U.curr=pi_U[,iter_index-1], theta.curr=theta[,,iter_index-1], cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
## Updating the mixture proportions pi_U
pi_U[,iter_index]<-pi.update(gamma.curr=gamma_U[,,iter_index])
## Stochastic Step: Sampling the cluster memberships for each user node
cluster_ids_U<-as.vector(apply(X = matrix(1:N1),MARGIN = 1,FUN = function(x){
y<-as.vector(rmultinom(n = 1,size = 1,prob = gamma_U[x,,iter_index]))
cluster_id<-which(y==1)
return(cluster_id)
}))
## Updating gamma_P, the mixed memberships for products
gamma_P[,,iter_index]<-gamma_P.update.wrapper(gamma_U.curr=gamma_U[,,iter_index],gamma_P.curr=gamma_P[,,iter_index-1],pi_P.curr=pi_P[,iter_index-1], theta.curr=theta[,,iter_index-1], cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N1=N1,N2=N2,K1=K1,K2=K2,R=R)
## Updating the mixture proportions pi_P
pi_P[,iter_index]<-pi.update(gamma.curr=gamma_P[,,iter_index])
## Stochastic Step: Sampling the cluster memberships for each user node
cluster_ids_P<-as.vector(apply(X = matrix(1:N2),MARGIN = 1,FUN = function(x){
y<-as.vector(rmultinom(n = 1,size = 1,prob = gamma_P[x,,iter_index]))
cluster_id<-which(y==1)
return(cluster_id)
}))
## Updating the network parameters theta
theta[,,iter_index]<-theta.update(theta.curr=theta[,,iter_index-1],cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, net_adjacency=net_adjacency, cov_beta_u = cov_beta_u, cov_beta_p = cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
#print('theta')
#print(theta[,,iter_index])
## Updating the covariate parameters beta_u
beta_u[,,iter_index]<-beta_u.update(beta_u.curr = matrix(beta_u[,,iter_index-1],K1,C1_outer),cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta[,,iter_index], net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_u_outer = cov_u_outer, N1=N1, N2=N2, K1=K1, K2=K2, C1_outer= C1_outer)
#print('beta_u and beta_p')
#print(beta_u[,,iter_index])
## Updating the covariate parameters beta_p
beta_p[,,iter_index]<-beta_p.update(beta_p.curr = matrix(beta_p[,,iter_index-1],K2,C2_outer),cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, theta=theta[,,iter_index], net_adjacency=net_adjacency, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, cov_p_outer = cov_p_outer, N1=N1, N2=N2, K1=K1, K2=K2, C2_outer=C2_outer)
#print(beta_p[,,iter_index])
## Updating the omega matrix
delta_0[,,,iter_index]<-delta_0.update(cluster_ids_U=cluster_ids_U, cluster_ids_P=cluster_ids_P, delta_0=delta_0[,,,iter_index-1],logitInv_delta_0 = logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating_statistic = net_rating_statistic, N1=N1, N2=N2, K1=K1, K2=K2, R=R)
#print('delta_0')
#print(delta_0[,,,iter_index])
#Sys.sleep(1.5)
ELBO_prev<-ELBO_curr
ELBO_curr<-ELBO_conv_bipartite_stat_undir(gamma_U=gamma_U[,,iter_index], gamma_P=gamma_P[,,iter_index], pi_U=pi_U[,iter_index], pi_P=pi_P[,iter_index], theta = theta[,,iter_index], logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
# if(!is.nan(abs(((ELBO_prev-ELBO_curr)/ELBO_prev)))){
# error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
# }
#print('error')
#print(error)
#print("probs")
#print(Gvals_cube_0)
# print(apply(X = Gvals_cube_0,MARGIN = c(1,2),FUN = function(x){
#
# return(x/sum(x))
# }))
if((iter_index%%1)==0){
#print(theta[,,iter_index])
print('iter_index')
print(iter_index)
print('error')
print(error)
print(proc.time()-ptm)
}
ptm_diff<-proc.time()-ptm
time_per_iter<-c(time_per_iter,ptm_diff[3])
iter_index<-iter_index+1
}
names(time_per_iter)<-NULL
total_iterations<-iter_index-1
return(list(gamma_U,gamma_P,pi_U,pi_P,theta,beta_u,beta_p,delta_0,time_per_iter,total_iterations))
}
########################################################################################################
########################################################################################################
## Defining Model Selection functions based on converged paramters
#################################################
## Defining a function to calculate the estimate of complete log likelihood
comp_loglik_cal<-function(cluster_ids_est_U = NA, cluster_ids_est_P = NA, pi_U = NA, pi_P = NA, theta, beta_u, beta_p, delta_0, net_adjacency, net_rating, cov_u_outer, cov_p_outer, N1, N2, K1, K2, R){
if((K1!=1)&(K2!=1)){
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K1=K1, K2=K2, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K1,K2,R))
for(k in 1:K1){
for(l in 1:K2){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
## 1st term
t1<-0
for(i in 1:N1){
for(j in 1:N2){
cluster_id_i<-cluster_ids_est_U[i]
cluster_id_j<-cluster_ids_est_P[j]
predictor<-theta[cluster_id_i,cluster_id_j]+sum(cov_u_outer[i,]*beta_u[cluster_id_i,])+sum(cov_p_outer[j,]*beta_p[cluster_id_j,])
exp_val<-exp(predictor)
if(net_adjacency[i,j]==0){
t1<-t1-(log(1+exp_val))
}else{
for(r in 1:R){
if(net_rating[i,j]==r){
t1<-t1+(predictor-log(1+exp_val)+log(probab_block_ratings[cluster_id_i,cluster_id_j,r]))
}
}
}
}
}
## 2nd term
t2<-0
for (i in 1:N1){
t2<-t2+log(pi_U[cluster_ids_est_U[i]])
}
t3<-0
for (j in 1:N2){
t3<-t3+log(pi_P[cluster_ids_est_P[j]])
}
comp_val<-t1+t2+t3
}
return(comp_val)
}
#################################################
## Defining a function to calculate the integrated classification likelihood
ICL_cal<-function(cluster_ids_est_U, cluster_ids_est_P, pi_U, pi_P, theta, beta_u, beta_p, delta_0, net_adjacency, net_rating, cov_u_outer, cov_p_outer, N1, N2, K1, K2, R){
comp_loglik_val<-comp_loglik_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U = pi_U, pi_P = pi_P, theta = theta, beta_u = beta_u, beta_p = beta_p, delta_0 = delta_0, cov_u_outer = cov_u_outer, cov_p_outer = cov_p_outer, net_adjacency = net_adjacency, net_rating = net_rating, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R=R)
## Penalization over pi
t2<-(K1-1)*log(N1)+(K2-1)*log(N2)
## Penalization over theta
t3<-(K1*K2)*log(N1*N2)
## Penalization over beta_u
C1_outer<-dim(cov_u_outer)[2]
t4<-(K1*C1_outer)*log(N1*N2)
## Penalization over beta_p
C2_outer<-dim(cov_p_outer)[2]
t5<-(K2*C2_outer)*log(N1*N2)
## Calculating the number of edges
N0_val<-N0_cal(net_adjacency=net_adjacency, N1=N1, N2=N2)
## Penalization over omega, mu
t6<-(K1*K2*(R-1))*log(N0_val)
## all penalty terms together:
penalty<-(t2+t3+t4+t5+t6)/2
ICL_val<-comp_loglik_val-penalty
ICL_vec<-c(comp_loglik_val,penalty,ICL_val)
return(ICL_vec)
}
########################################################################################################
## Defining the Rand Index and RASE functions for evaluating the performance of clustering and estimation respectively
## Rand Index function
RI_cal<-function(cluster_ids_est, cluster_ids_true){
n=length(cluster_ids_est)
RI_val=0
for(i in 1:(n-1)){
for(j in (i+1):n){
RI_val=RI_val+as.numeric((cluster_ids_est[i]==cluster_ids_est[j])==(cluster_ids_true[i]==cluster_ids_true[j]))
}
}
RI_mean=RI_val/(n*(n-1)/2)
return(RI_mean)
}
#################################################
## RASE functions
RASE_cal<-function(param_est, param_true){
if(is.vector(param_est)!=1){
RASE_val<-sqrt(sum((param_est-param_true)^2)/prod(dim(param_est)))
}else{RASE_val<-sqrt(sum((param_est-param_true)^2)/length(param_est))}
return(RASE_val)
}
########################################################################################################
## Defining the functions to evaluate clustering performance using normalized mutual information.
probab_clust_cal<-function(cluster_ids,N,K){
probab_vec<-rep(NA_real_,K)
for(k in 1:K){
probab_vec[k]<-sum(cluster_ids==k)/N
}
return(probab_vec)
}
NMI_cal<-function(cluster_ids_true_U,cluster_ids_true_P,K_true_U,K_true_P,cluster_ids_est_U,cluster_ids_est_P,K1,K2,N1,N2){
probab_vec_true_U<-probab_clust_cal(cluster_ids = cluster_ids_true_U,N = N1+N2,K = K_true_U)
probab_vec_true_P<-probab_clust_cal(cluster_ids = cluster_ids_true_P,N = N1+N2,K = K_true_P)
probab_vec_true<-c(probab_vec_true_U,probab_vec_true_P)
total_entropy_true<-0
for(k in 1:length(probab_vec_true)){
if(probab_vec_true[k]!=0){
total_entropy_true<-total_entropy_true-(probab_vec_true[k]*log(probab_vec_true[k],base = 2))
}
}
probab_vec_est_U<-probab_clust_cal(cluster_ids = cluster_ids_est_U,N = N1+N2,K = K1)
probab_vec_est_P<-probab_clust_cal(cluster_ids = cluster_ids_est_P,N = N1+N2,K = K2)
probab_vec_est<-c(probab_vec_est_U,probab_vec_est_P)
total_entropy_est<-0
for(k in 1:length(probab_vec_est)){
if(probab_vec_est[k]!=0){
total_entropy_est<-total_entropy_est-(probab_vec_est[k]*log(probab_vec_est[k],base = 2))
}
}
clust_ids_true_total<-c(cluster_ids_true_U,(cluster_ids_true_P+K_true_U))
clust_ids_est_total<-c(cluster_ids_est_U,(cluster_ids_est_P+K1))
conditional_entropy<-rep(NA_real_,(K1+K2))
for(k in 1:(K1+K2)){
probab_conditional_vec<-rep(0,(K_true_U+K_true_P))
for(m in 1:(K_true_U+K_true_P)){
if(sum(clust_ids_est_total==k)!=0){
probab_conditional_vec[m]<-sum((clust_ids_true_total==m)&(clust_ids_est_total==k))/sum(clust_ids_est_total==k)
}
}
plogp_conditional_vec<-rep(0,length(probab_conditional_vec))
for(m in 1:length(probab_conditional_vec)){
if(probab_conditional_vec[m]!=0){
plogp_conditional_vec[m]<-probab_conditional_vec[m]*log(x = probab_conditional_vec[m],base = 2)
}
}
conditional_entropy[k]<--probab_vec_est[k]*sum(plogp_conditional_vec)
}
mutual_info<-total_entropy_true-sum(conditional_entropy)
NMI_val<-(2*mutual_info)/(total_entropy_true+total_entropy_est)
return(NMI_val)
}
########################################################################################################
## Calculating the recovery rate
recovery_rate_cal <- function(cluster_ids_est, cluster_ids_true){
all_perm<-permutations(n=4,r=2,repeats.allowed = T)
clust_perm_mat<-matrix(NA_integer_,4,4)
for(i in 1:nrow(all_perm)){
clust_perm_mat[all_perm[i,1],all_perm[i,2]] <- sum((cluster_ids_true==all_perm[i,1])&(cluster_ids_est==all_perm[i,2]))
}
recovery_rate_val<-sum(apply(X = clust_perm_mat, MARGIN = 1, FUN = max))/length(cluster_ids_est)
return(recovery_rate_val)
}
#########################################################################################################
## Defining the parameters
N1<-dim(net_adjacency)[1] ## Number of nodes from the network
N2<-dim(net_adjacency)[2]
## Total time points in the network for which we have data in the form of adjacency matrix
K1<-nclust1 ## Defining the number of clusters
K2<-nclust2
K <-K1*K2
#################################################
## Defining the starting values of the iterator and running the main algorithm
start<-list()
start[[1]]<-matrix(rep(1/K1,N1*K1),N1,K1)
start[[2]]<-matrix(rep(1/K2,N2*K2),N2,K2)
start[[3]]<-rep(1/K1,K1)
start[[4]]<-rep(1/K2,K2)
start[[5]]<-theta_init
start[[6]]<-beta_u_init
start[[7]]<-beta_p_init
start[[8]]<-delta_0_init
if(K1!=1&K2!=1){ param<-iterator(start=start, net_adjacency = net_adjacency, net_rating=net_rating, K1=K1, K2=K2, n_iter=2000, thres=thres, n_iter_min = n_iter_min, n_iter_max = n_iter_max, R=R, cov_u_outer=cov_u_outer, cov_p_outer=cov_p_outer)}
if(K1==1&K2!=1){ param<-iterator_K1(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
if(K1!=1&K2==1){ param<-iterator_K2(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
if(K1==1&K2==1){ param<-iterator_K1K2(start=start, network=sim.net, K1=K1, K2=K2, n_iter=1000, thres=thres,R=R)}
#################################################
## extracting the coverged parameter values and calculating ICL
n_iter=1
indicator_last<-0
while(indicator_last==0){
temp<-is.na(param[[1]][1,1,n_iter])
if(temp==TRUE){
n_last<-n_iter-1
indicator_last<-1
}
n_iter<-n_iter+1
}
param_converge<-list()
#print(param)
if(K1!=1&K2!=1){
param_converge[[1]]<-param[[1]][,,n_last]
param_converge[[2]]<-param[[2]][,,n_last]
param_converge[[3]]<-param[[3]][,n_last]
param_converge[[4]]<-param[[4]][,n_last]
param_converge[[5]]<-param[[5]][,,n_last]
param_converge[[6]]<-param[[6]][,,n_last]
param_converge[[7]]<-param[[7]][,,n_last]
param_converge[[8]]<-param[[8]][,,,n_last]
output_list<-param_converge
cluster_ids_est_U<-as.vector(apply(X = matrix(1:N1),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[1]][x,])
return(cluster_id)
}))
cluster_ids_est_P<-as.vector(apply(X = matrix(1:N2),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[2]][x,])
return(cluster_id)
}))
#ICL_val<-NA
ICL_vec<-ICL_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U=param_converge[[3]], pi_P=param_converge[[4]] ,theta = param_converge[[5]], beta_u = as.matrix(param_converge[[6]]), beta_p = as.matrix(param_converge[[7]]), delta_0 = param_converge[[8]], net_adjacency=net_adjacency, net_rating=net_rating, cov_u_outer = cov_u_outer, cov_p_outer = cov_p_outer, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R = R)
## Calculating cov_beta_u and cov_beta_p
cov_beta_u<-cov_beta_mul(cov=cov_u_outer,beta=t(as.matrix(param_converge[[6]])))
cov_beta_p<-cov_beta_mul(cov=cov_p_outer,beta=t(as.matrix(param_converge[[7]])))
logitInv_delta_0.curr<-logitInv_delta_0_cal(delta_0=param_converge[[8]], K1=K1, K2=K2, R=R)
ELBO_val<-ELBO_conv_bipartite_stat_undir(gamma_U=param_converge[[1]], gamma_P=param_converge[[2]], pi_U=param_converge[[3]], pi_P=param_converge[[4]], theta = param_converge[[5]], logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, cov_beta_u=cov_beta_u, cov_beta_p=cov_beta_p, N1=N1, N2=N2, K1=K1, K2=K2)
ELBO_modsel_vec<-c(ELBO_val,ICL_vec[2],ELBO_val-ICL_vec[2])
if(sim_indicator==1){
RI_val_U<-RI_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
RI_val_P<-RI_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
NMI_val<-NMI_cal(cluster_ids_true_U = cluster_ids_true_U,cluster_ids_true_P = cluster_ids_true_P,K_true_U = K_true_U,K_true_P = K_true_P,cluster_ids_est_U = cluster_ids_est_U,cluster_ids_est_P = cluster_ids_est_P, K1 = K1, K2 = K2, N1 = N1, N2 = N2)
Recovery_rate_val_U<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_U,cluster_ids_true = cluster_ids_true_U)
Recovery_rate_val_P<-recovery_rate_cal(cluster_ids_est = cluster_ids_est_P,cluster_ids_true = cluster_ids_true_P)
if((K1==K_true_U)&(K2==K_true_P)){
K1_permute_mat<-do.call(rbind,permn(1:K1))
K2_permute_mat<-do.call(rbind,permn(1:K2))
RASE_beta_u_vec<-rep(NA_real_,nrow(K1_permute_mat))
for (k in 1:nrow(K1_permute_mat)){
beta_u_est<-as.vector(param_converge[[6]])[K1_permute_mat[k,]]
RASE_beta_u_vec[k]<-RASE_cal(param_est = beta_u_est, param_true = as.vector(beta_u_true))
}
permute_true_id_beta_u<-which.min(RASE_beta_u_vec)
RASE_beta_u_val<-RASE_beta_u_vec[permute_true_id_beta_u]
RASE_beta_p_vec<-rep(NA_real_,nrow(K2_permute_mat))
for (k in 1:nrow(K2_permute_mat)){
beta_p_est<-as.vector(param_converge[[7]])[K2_permute_mat[k,]]
RASE_beta_p_vec[k]<-RASE_cal(param_est = beta_p_est, param_true = as.vector(beta_p_true))
}
permute_true_id_beta_p<-which.min(RASE_beta_p_vec)
RASE_beta_p_val<-RASE_beta_p_vec[permute_true_id_beta_p]
RASE_theta_mat<-matrix(NA_real_,nrow(K1_permute_mat),nrow(K2_permute_mat))
for (k in 1:nrow(K1_permute_mat)){
for (l in 1:nrow(K2_permute_mat)){
theta_true_rotated<-as(as.integer(K1_permute_mat[k,]), "pMatrix")%*%theta_true%*%t(as(as.integer(K2_permute_mat[l,]), "pMatrix"))
RASE_theta_mat[k,l]<-RASE_cal(param_est = param_converge[[5]], param_true = theta_true_rotated)
}
}
RASE_theta_val<-min(RASE_theta_mat)
rotate_indices<-which(RASE_theta_mat == min(RASE_theta_mat), arr.ind = TRUE)
## RASE for delta_0
# RASE_delta_0_vec<-rep(NA_real_,R-1)
# for (r in 1:(R-1)){
# delta_0_true_rotated<-as(as.integer(K1_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K2_permute_mat[rotate_indices[2],]), "pMatrix"))
# RASE_delta_0_vec[r]<-RASE_cal(param_est = param_converge[[8]][,,r], param_true = delta_0_true_rotated)
# }
delta_0_true_rotated<-array(NA_real_,dim=c(K1,K2,(R-1)))
for (r in 1:(R-1)){
delta_0_true_rotated[,,r]<-as(as.integer(K1_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K2_permute_mat[rotate_indices[2],]), "pMatrix"))
}
RASE_delta_0_val<-RASE_cal(param_est = param_converge[[8]], param_true = delta_0_true_rotated)
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"ELBO_modsel"=ELBO_modsel_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val, "Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P, "RASE_theta"=RASE_theta_val, "RASE_beta_u"=RASE_beta_u_val, "RASE_beta_p"=RASE_beta_p_val, "RASE_delta_0"=RASE_delta_0_val,"time_per_iter_vec"=param[[9]], "total_iterations"=param[[10]])
}else{
output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"ELBO_modsel"=ELBO_modsel_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P,"Normalized_Mutual_Info"=NMI_val, "Set1_Recovery_Rate"=Recovery_rate_val_U, "Set2_Recovery_Rate"=Recovery_rate_val_P, "time_per_iter_vec"=param[[9]], "total_iterations"=param[[10]])
}
}else{output_list<-list("parameters_converged"=param_converge,"Set1_estimated_cluster_IDs"=cluster_ids_est_U,"Set2_estimated_cluster_IDs"=cluster_ids_est_P,"ICL"=ICL_vec,"ELBO_modsel"=ELBO_modsel_vec,"time_per_iter_vec"=param[[9]], "total_iterations"=param[[10]])}
#}
}
return(output_list)
}
simulate_network_bipartite_model10<-function(N1,N2,pi_U,pi_P,theta,beta_u,beta_p,delta_0,cov_u_outer,cov_p_outer,K1,K2,R){
library(Rcpp)
sourceCpp(file = paste0(file_path,"model_10.cpp"))
#library(Matrix)
net_adjacency<-matrix(0,N1,N2)
net_rating<-matrix(0,N1,N2)
#net_adjacency<-Matrix(0,N1,N2,sparse = T)
#net_rating<-Matrix(0,N1,N2,sparse = T)
if((K1==1)&(K2==1)){
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta)
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==0){
network[i,j]<-0
}else if(edge_sample==1){
G_vec<-rep(NA_real_,R)
for (r in 1:R){
G_vec[r]<-exp(mu[r]*omega-(mu[r])^2/2)
}
probab_discrete<-G_vec/sum(G_vec)
network[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_discrete))==1)
}
}
}
}
else{
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K1=K1, K2=K2, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K1,K2,R))
for(k in 1:K1){
for(l in 1:K2){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta[clust_ids_U[i],clust_ids_P[j]]+sum(cov_u_outer[i,]*beta_u[clust_ids_U[i],])+sum(cov_p_outer[j,]*beta_p[clust_ids_P[j],]))
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==1){
net_adjacency[i,j]<-1
net_rating[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_block_ratings[clust_ids_U[i],clust_ids_P[j],]))==1)
}
}
}
}
return(list(net_adjacency,net_rating,clust_ids_U,clust_ids_P))
}
########################################################################################################
########################################################################################################
## Model 11 features (This works!):
## 1) Combined (reviewers and products) single mode network
## 2) Block structure over intercept
## 3) Proportional odds model for ratings based on my own derivations
wrapper_bipartite_model11<-function(net_adjacency,net_rating,nclust,thres=10^(-6),n_iter_min=50,n_iter_max=500,theta_init,delta_0_init,sim_indicator,theta_true=NA, delta_0_true=NA,K_true=NA,cluster_ids_true_U=NA, cluster_ids_true_P=NA, N1, N2, K1, K2, R=NA){
## This is the combined final updated and working code for EM undirected case for all K
########################################################################################################
########################################################################################################
## Loading the required packages
require(lda)
library(quadprog)
library(combinat)
library(MASS)
library(Rcpp)
library(Matrix)
library(gtools)
#sourceCpp(file = paste0(file_path,"model_11.cpp"))
library(SBMcov)
#################################################
## Defining a function to update variational parameters gamma using quadratic program solver. Input:
## gamma.curr is a N*K matrix for current gamma estimates, pi.curr is a K*1 vector for current pi
## estimates, theta.curr is a T_grid*K array for current theta estimates. Given Network is assumed to be an N*N*T_data array
solve_QP_wrapper<-function(quad_lin_coeff,constraint_matrix,constraint_vector,node_ID,gamma.curr){
try_QP<-try(gamma_next_vec<-solve.QP((-2*diag(as.vector(quad_lin_coeff[node_ID,,1]))),as.vector(quad_lin_coeff[node_ID,,2]),t(constraint_matrix),constraint_vector)$solution,silent = TRUE)
if('numeric' %in% class(try_QP)){
gamma_next_vec<-try_QP
}
else{
gamma_next_vec<-gamma.curr[node_ID,]
print("red flag")
print(paste0("Node ID for red flag is ", node_ID))
}
return(gamma_next_vec)
}
gamma.update.wrapper<-function(gamma.curr,pi.curr,theta.curr, logitInv_delta_0.curr, net_adjacency, net_rating, N,K,R){
gamma.next<-matrix(NA_real_,N,K)
constraint_matrix<-matrix(NA_real_,2+K,K)
constraint_matrix[1,]<-rep(1,K)
constraint_matrix[2,]<-rep(-1,K)
constraint_matrix[3:(K+2),]<-diag(K)
constraint_vector<-c(1,-1,rep(0,K))
quad_lin_coeff<-gamma_update_stat_undir(gamma=gamma.curr, pi=pi.curr, theta=theta.curr, logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N=N, K=K, R=R)
#print(quad_lin_coeff)
for (i in 1:N){
if(sum(is.nan(quad_lin_coeff[i,,1]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,1])),1]<--Inf
}
if(sum(is.nan(quad_lin_coeff[i,,2]))>0){
quad_lin_coeff[i,which(is.nan(quad_lin_coeff[i,,2])),2]<-Inf
}
if(sum(quad_lin_coeff[i,,1]<=-10^8)==0){
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K){
gamma.next[i,]<-gamma.curr[i,]
}
}else if((sum(quad_lin_coeff[i,,1]<=-10^8)>0)&&(sum(quad_lin_coeff[i,,1]<=-10^8)<K)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,1]<=-10^8),1]<--(10^10)
if(sum(quad_lin_coeff[i,,2]>=10^8)==0){
gamma.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if((sum(quad_lin_coeff[i,,2]>=10^8)>0)&&(sum(quad_lin_coeff[i,,2]>=10^8)<K)){
quad_lin_coeff[i,which(quad_lin_coeff[i,,2]>=10^8),2]<-10^10
gamma.next[i,]<-solve_QP_wrapper(quad_lin_coeff = quad_lin_coeff,constraint_matrix = constraint_matrix,constraint_vector = constraint_vector,node_ID = i,gamma.curr = gamma.curr)
#gamma.next[i,]<-solve.QP((-2*diag(as.vector(quad_lin_coeff[i,,1]))),as.vector(quad_lin_coeff[i,,2]),t(constraint_matrix),constraint_vector)$solution
}else if(sum(quad_lin_coeff[i,,2]>=10^8)==K){
gamma.next[i,]<-gamma.curr[i,]
}
}else if(sum(quad_lin_coeff[i,,1]<=-10^8)==K){
gamma.next[i,]<-gamma.curr[i,]
}
#print(i)
}
## normalizing gamma_i. deal with case outside (0,1) later
gamma.next<-t(apply(X = gamma.next,MARGIN = 1,FUN = function(x){
x_norm<-x/sum(x)
return(x_norm)
}))
return(gamma.next)
}
#################################################
## Defining a function to update K*1 vector of pi
pi.update<-function(gamma.curr){
pi.next<-as.vector(apply(X = gamma.curr,MARGIN = 2,FUN = mean))
## normalization of pi
pi.next<-pi.next/sum(pi.next)
return(pi.next)
}
#################################################
## Defining the update function of full matrix theta with dimensions K1,K2
theta.update<-function(theta.curr,gamma,net_adjacency,N,K){
theta.next<-matrix(NA_real_,K,K)
gradient<-grad_bipartite_stat_undir_theta(theta=theta.curr, gamma=gamma, net_adjacency=net_adjacency, N=N, K=K)
hess<-hess_bipartite_stat_undir_theta(theta=theta.curr, gamma=gamma, N=N, K=K)
for(k in 1:K){
for(l in 1:K){
theta.next[k,l]<-theta.curr[k,l]-gradient[k,l]/hess[k,l]
}
}
return(theta.next)
}
#################################################
## Defining the update function of full proportional odds nuisance parameters delta_0
delta_0.update<-function(gamma,delta_0,logitInv_delta_0,net_adjacency,net_rating,N,K,R){
delta_0.next<-array(NA_real_,dim=c(K,K,(R-1)))
gradient_delta_0<-grad_bipartite_stat_undir_delta_0(gamma=gamma, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N=N, K=K, R=R)
for (k in 1:K){
for (l in 1:K){
hess_mat<-hess_bipartite_stat_undir_delta_0(k=k-1, l=l-1, gamma=gamma, delta_0=delta_0, logitInv_delta_0=logitInv_delta_0, net_adjacency=net_adjacency, net_rating=net_rating, N=N, R=R)
try_delta_0_update<-try(delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat+(10^(-6)*diag(R-1)))%*%gradient_delta_0[k,l,]))))
if('numeric' %in% class(try_delta_0_update)){
delta_0.next[k,l,]<-try_delta_0_update
}
else{
delta_0.next[k,l,]<-delta_0[k,l,]
}
#delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat+(10^(-6)*diag(R-1)))%*%gradient_delta_0[k,l,])))
#delta_0.next[k,l,]<-sort(delta_0[k,l,]-as.vector((solve(hess_mat)%*%gradient_delta_0[k,l,])))
}
}
#print(delta_0.next)
return(delta_0.next)
}
#################################################
## Defining a function that takes inputs as: 1) initial values list of parameters, 2) network, 3) Number of clusters K and 4) number of iterations and outputs the whole list of all parameter iterates
iterator<-function(start,net_adjacency,net_rating,K,n_iter,thres,n_iter_min,n_iter_max,R){
## Assuming a network adjacency matrix of dimensions N1*N2 with rows corresponding to users and columns corresponding to products and each entry is an indicator whether the user rated the product. This framework can be generalized.
N<-dim(net_adjacency)[1]
## initializing the arrays for parameters
## Initializing the cluster memberships gamma
gamma<-array(NA_real_,dim=c(N,K,n_iter)) ## assuming there are K clusters in the single mode network
## Initializing the mixture proportions pi
pi<-matrix(NA_real_,K,n_iter)
## Initializing the network parameters theta
theta<-array(NA_real_,dim=c(K,K,n_iter))
## Initializing the proportional odds model intercept parameters
delta_0<-array(NA_real_,dim=c(K,K,R-1,n_iter))
gamma[,,1]<-start[[1]]
pi[,1]<-start[[2]]
theta[,,1]<-start[[3]]
delta_0[,,,1]<-start[[4]]
#print(theta[,,1])
#print(omega[,1])
#print(mu[,,,1])
## iterations
## Defining the iteration index
iter_index<-2
## Initializing the error
error<-Inf
## Initializing the current ELBO values over the whole grid
ELBO_curr<-10^10
time_per_iter<-c()
while(((error>thres)&(iter_index<n_iter_max))|(iter_index<n_iter_min)){
#while((error>thres|iter_index<200)&(iter_index<250)&(iter_index==2|error<10)){
#while(iter_index<21){
## Starting the stopwatch to calculate the per iteration time
ptm<-proc.time()
## Calculating the logit inverse of delta_0
logitInv_delta_0.curr<-logitInv_delta_0_cal(delta_0=delta_0[,,,iter_index-1], K=K, R=R)
## Transforming logit inverse of delta_0 to phi
#phi.curr<-phi_cal(logitInv_delta_0=logitInv_delta_0.curr, K1=K1, K2=K2, R=R)
## Calculating the logit inverse of phi
#logitInv_phi.curr<-logitInv_phi_cal(phi=phi.curr, K1=K1, K2=K2, R=R)
## Updating gamma_U, the mixed memberships for users
gamma[,,iter_index]<-gamma.update.wrapper(gamma.curr=gamma[,,iter_index-1], pi.curr=pi[,iter_index-1], theta.curr=theta[,,iter_index-1], logitInv_delta_0.curr=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating, N=N,K=K,R=R)
## Updating the mixture proportions pi_U
pi[,iter_index]<-pi.update(gamma.curr=gamma[,,iter_index])
## Updating the network parameters theta
theta[,,iter_index]<-theta.update(theta.curr=theta[,,iter_index-1],gamma=gamma[,,iter_index], net_adjacency=net_adjacency, N=N, K=K)
#print('theta')
#print(theta[,,iter_index])
## Updating the omega matrix
delta_0[,,,iter_index]<-delta_0.update(gamma=gamma[,,iter_index], delta_0=delta_0[,,,iter_index-1],logitInv_delta_0 = logitInv_delta_0.curr,net_adjacency=net_adjacency, net_rating=net_rating,N=N, K=K, R=R)
#print('delta_0')
#print(delta_0[,,,iter_index])
#Sys.sleep(1.5)
ELBO_prev<-ELBO_curr
ELBO_curr<-ELBO_conv_bipartite_stat_undir(gamma=gamma[,,iter_index], pi=pi[,iter_index], theta = theta[,,iter_index], logitInv_delta_0=logitInv_delta_0.curr, net_adjacency=net_adjacency, net_rating=net_rating,N=N,K=K)
#error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
if(!is.nan(abs(((ELBO_prev-ELBO_curr)/ELBO_prev)))){
error<-abs(((ELBO_prev-ELBO_curr)/ELBO_prev))
}
#print('error')
#print(error)
#print("probs")
#print(Gvals_cube_0)
# print(apply(X = Gvals_cube_0,MARGIN = c(1,2),FUN = function(x){
#
# return(x/sum(x))
# }))
if((iter_index%%1)==0){
#print(theta[,,iter_index])
print('iter_index')
print(iter_index)
print('error')
print(error)
print(proc.time()-ptm)
}
ptm_diff<-proc.time()-ptm
time_per_iter<-c(time_per_iter,ptm_diff[3])
iter_index<-iter_index+1
}
names(time_per_iter)<-NULL
total_iterations<-iter_index-1
return(list(gamma,pi,theta,delta_0,time_per_iter,total_iterations))
}
########################################################################################################
########################################################################################################
## Defining Model Selection functions based on converged paramters
########################################################################################################
## Defining the Rand Index and RASE functions for evaluating the performance of clustering and estimation respectively
## Rand Index function
RI_cal<-function(cluster_ids_est, cluster_ids_true){
n=length(cluster_ids_est)
RI_val=0
for(i in 1:(n-1)){
for(j in (i+1):n){
RI_val=RI_val+as.numeric((cluster_ids_est[i]==cluster_ids_est[j])==(cluster_ids_true[i]==cluster_ids_true[j]))
}
}
RI_mean=RI_val/(n*(n-1)/2)
return(RI_mean)
}
########################################################################################################
## Defining the function to evaluate clustering performance using normalized mutual information.
# NMI_cal<-function(cluster_ids_true_U,cluster_ids_true_P,K_true_U,K_true_P,cluster_ids_est_U,cluster_ids_est_P,K1,K2,N1,N2){
# probab_vec_true_U<-probab_clust_cal(cluster_ids = cluster_ids_true_U,N = N1+N2,K = K_true_U)
# probab_vec_true_P<-probab_clust_cal(cluster_ids = cluster_ids_true_P,N = N1+N2,K = K_true_P)
# probab_vec_true<-c(probab_vec_true_U,probab_vec_true_P)
# total_entropy_true<-0
# for(k in 1:length(probab_vec_true)){
# if(probab_vec_true[k]!=0){
# total_entropy_true<-total_entropy_true-(probab_vec_true[k]*log(probab_vec_true[k],base = 2))
# }
# }
#
# probab_vec_est_U<-probab_clust_cal(cluster_ids = cluster_ids_est_U,N = N1+N2,K = K1)
# probab_vec_est_P<-probab_clust_cal(cluster_ids = cluster_ids_est_P,N = N1+N2,K = K2)
# probab_vec_est<-c(probab_vec_est_U,probab_vec_est_P)
# total_entropy_est<-0
# for(k in 1:length(probab_vec_est)){
# if(probab_vec_est[k]!=0){
# total_entropy_est<-total_entropy_est-(probab_vec_est[k]*log(probab_vec_est[k],base = 2))
# }
# }
#
# clust_ids_true_total<-c(cluster_ids_true_U,(cluster_ids_true_P+K_true_U))
# clust_ids_est_total<-c(cluster_ids_est_U,(cluster_ids_est_P+K1))
#
# conditional_entropy<-rep(NA_real_,(K1+K2))
# for(k in 1:(K1+K2)){
# probab_conditional_vec<-rep(0,(K_true_U+K_true_P))
# for(m in 1:(K_true_U+K_true_P)){
# if(sum(clust_ids_est_total==k)!=0){
# probab_conditional_vec[m]<-sum((clust_ids_true_total==m)&(clust_ids_est_total==k))/sum(clust_ids_est_total==k)
# }
# }
# plogp_conditional_vec<-rep(0,length(probab_conditional_vec))
# for(m in 1:length(probab_conditional_vec)){
# if(probab_conditional_vec[m]!=0){
# plogp_conditional_vec[m]<-probab_conditional_vec[m]*log(x = probab_conditional_vec[m],base = 2)
# }
# }
#
# conditional_entropy[k]<--probab_vec_est[k]*sum(plogp_conditional_vec)
# }
#
# mutual_info<-total_entropy_true-sum(conditional_entropy)
# NMI_val<-(2*mutual_info)/(total_entropy_true+total_entropy_est)
# return(NMI_val)
# }
## Defining the functions to evaluate clustering performance using normalized mutual information.
probab_clust_cal<-function(cluster_ids,N,K){
probab_vec<-rep(NA_real_,K)
for(k in 1:K){
probab_vec[k]<-sum(cluster_ids==k)/N
}
return(probab_vec)
}
NMI_cal<-function(cluster_ids_true_U,cluster_ids_true_P,K_true_U,K_true_P,cluster_ids_est_U,cluster_ids_est_P,K1,K2,N1,N2){
probab_vec_true_U<-probab_clust_cal(cluster_ids = cluster_ids_true_U,N = N1+N2,K = K_true_U)
probab_vec_true_P<-probab_clust_cal(cluster_ids = cluster_ids_true_P,N = N1+N2,K = K_true_P)
probab_vec_true<-c(probab_vec_true_U,probab_vec_true_P)
total_entropy_true<-0
for(k in 1:length(probab_vec_true)){
if(probab_vec_true[k]!=0){
total_entropy_true<-total_entropy_true-(probab_vec_true[k]*log(probab_vec_true[k],base = 2))
}
}
probab_vec_est_U<-probab_clust_cal(cluster_ids = cluster_ids_est_U,N = N1+N2,K = K1)
probab_vec_est_P<-probab_clust_cal(cluster_ids = cluster_ids_est_P,N = N1+N2,K = K2)
probab_vec_est<-c(probab_vec_est_U,probab_vec_est_P)
total_entropy_est<-0
for(k in 1:length(probab_vec_est)){
if(probab_vec_est[k]!=0){
total_entropy_est<-total_entropy_est-(probab_vec_est[k]*log(probab_vec_est[k],base = 2))
}
}
clust_ids_true_total<-c(cluster_ids_true_U,(cluster_ids_true_P+K_true_U))
clust_ids_est_total<-c(cluster_ids_est_U,(cluster_ids_est_P+K1))
conditional_entropy<-rep(NA_real_,(K1+K2))
for(k in 1:(K1+K2)){
probab_conditional_vec<-rep(0,(K_true_U+K_true_P))
for(m in 1:(K_true_U+K_true_P)){
if(sum(clust_ids_est_total==k)!=0){
probab_conditional_vec[m]<-sum((clust_ids_true_total==m)&(clust_ids_est_total==k))/sum(clust_ids_est_total==k)
}
}
plogp_conditional_vec<-rep(0,length(probab_conditional_vec))
for(m in 1:length(probab_conditional_vec)){
if(probab_conditional_vec[m]!=0){
plogp_conditional_vec[m]<-probab_conditional_vec[m]*log(x = probab_conditional_vec[m],base = 2)
}
}
conditional_entropy[k]<--probab_vec_est[k]*sum(plogp_conditional_vec)
}
mutual_info<-total_entropy_true-sum(conditional_entropy)
NMI_val<-(2*mutual_info)/(total_entropy_true+total_entropy_est)
return(NMI_val)
}
#################################################
## RASE functions
RASE_cal<-function(param_est, param_true){
if(is.vector(param_est)!=1){
RASE_val<-sqrt(sum((param_est-param_true)^2)/prod(dim(param_est)))
}else{RASE_val<-sqrt(sum((param_est-param_true)^2)/length(param_est))}
return(RASE_val)
}
#########################################################################################################
## Defining the parameters
N<-dim(net_adjacency)[1] ## Number of nodes from the network
## Total time points in the network for which we have data in the form of adjacency matrix
K<-nclust ## Defining the number of clusters
#################################################
## Defining the starting values of the iterator and running the main algorithm
start<-list()
start[[1]]<-matrix(rep(1/K,N*K),N,K)
start[[2]]<-rep(1/K,K)
start[[3]]<-theta_init
start[[4]]<-delta_0_init
if(K!=1){ param<-iterator(start=start, net_adjacency = net_adjacency, net_rating=net_rating, K=K, n_iter=1000, thres=thres, n_iter_min=n_iter_min, n_iter_max=n_iter_max, R=R)}
#################################################
## extracting the coverged parameter values and calculating ICL
n_iter=1
indicator_last<-0
while(indicator_last==0){
temp<-is.na(param[[1]][1,1,n_iter])
if(temp==TRUE){
n_last<-n_iter-1
indicator_last<-1
}
n_iter<-n_iter+1
}
param_converge<-list()
#print(param)
if(K!=1){
param_converge[[1]]<-param[[1]][,,n_last]
param_converge[[2]]<-param[[2]][,n_last]
param_converge[[3]]<-param[[3]][,,n_last]
param_converge[[4]]<-param[[4]][,,,n_last]
output_list<-param_converge
cluster_ids_est<-as.vector(apply(X = matrix(1:N),MARGIN = 1,FUN = function(x){
cluster_id<-which.max(param_converge[[1]][x,])
return(cluster_id)
}))
#ICL_vec<-ICL_cal(cluster_ids_est_U = cluster_ids_est_U, cluster_ids_est_P = cluster_ids_est_P, pi_U=param_converge[[3]], pi_P=param_converge[[4]] ,theta = param_converge[[5]], beta_u = as.matrix(param_converge[[6]]), beta_p = as.matrix(param_converge[[7]]), delta_0 = param_converge[[8]], net_adjacency=net_adjacency, net_rating=net_rating, cov_u_outer = cov_u_outer, cov_p_outer = cov_p_outer, N1 = N1, N2 = N2, K1 = K1, K2 = K2, R = R)
ICL_vec<-NA
if(sim_indicator==1){
RI_val_U<-RI_cal(cluster_ids_est = cluster_ids_est[1:N1],cluster_ids_true = cluster_ids_true_U)
RI_val_P<-RI_cal(cluster_ids_est = cluster_ids_est[(N1+1):(N1+N2)],cluster_ids_true = cluster_ids_true_P)
NMI_val<-NMI_cal(cluster_ids_true_U = cluster_ids_true_U, cluster_ids_true_P = cluster_ids_true_P, K_true_U = K_true, K_true_P = K_true, cluster_ids_est_U = cluster_ids_est[1:N1], cluster_ids_est_P = cluster_ids_est[(N1+1):(N1+N2)], K1 = K1, K2 = K2, N1 = N1, N2 = N2)
if(K==K_true){
K_permute_mat<-do.call(rbind,permn(1:K))
RASE_theta_mat<-matrix(NA_real_,nrow(K_permute_mat),nrow(K_permute_mat))
for (k in 1:nrow(K_permute_mat)){
for (l in 1:nrow(K_permute_mat)){
theta_true_rotated<-as(as.integer(K_permute_mat[k,]), "pMatrix")%*%theta_true%*%t(as(as.integer(K_permute_mat[l,]), "pMatrix"))
RASE_theta_mat[k,l]<-RASE_cal(param_est = param_converge[[3]], param_true = theta_true_rotated)
}
}
RASE_theta_val<-min(RASE_theta_mat)
rotate_indices<-which(RASE_theta_mat == min(RASE_theta_mat), arr.ind = TRUE)
delta_0_true_rotated<-array(NA_real_,dim=c(K,K,(R-1)))
for (r in 1:(R-1)){
delta_0_true_rotated[,,r]<-as(as.integer(K_permute_mat[rotate_indices[1],]), "pMatrix")%*%delta_0_true[,,r]%*%t(as(as.integer(K_permute_mat[rotate_indices[2],]), "pMatrix"))
}
RASE_delta_0_val<-RASE_cal(param_est = param_converge[[4]], param_true = delta_0_true_rotated)
output_list<-list("parameters_converged"=param_converge,"Estimated_cluster_IDs"=cluster_ids_est,"ICL"=ICL_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P, "Normalized_Mutual_Info"=NMI_val, "RASE_theta"=RASE_theta_val, "RASE_delta_0"=RASE_delta_0_val,"time_per_iter_vec"=param[[5]], "total_iterations"=param[[6]])
}else{
output_list<-list("parameters_converged"=param_converge,"Estimated_cluster_IDs"=cluster_ids_est,"ICL"=ICL_vec,"Set1_Rand_Index"=RI_val_U,"Set2_Rand_Index"=RI_val_P, "Normalized_Mutual_Info"=NMI_val, "time_per_iter_vec"=param[[5]], "total_iterations"=param[[6]])
}
}else{output_list<-list("parameters_converged"=param_converge,"Estimated_cluster_IDs"=cluster_ids_est,"ICL"=ICL_vec,"time_per_iter_vec"=param[[5]], "total_iterations"=param[[6]])}
#}
}
return(output_list)
}
simulate_network_bipartite_model11<-function(N,pi,theta,delta_0,K,R){
library(Rcpp)
sourceCpp(file = paste0(file_path,"model_11.cpp"))
#library(Matrix)
net_adjacency<-matrix(0,N,N)
net_rating<-matrix(0,N,N)
#net_adjacency<-Matrix(0,N1,N2,sparse = T)
#net_rating<-Matrix(0,N1,N2,sparse = T)
if(K==1){
clust_id_U_sampling<-rmultinom(n = N1, size = 1, prob = pi_U)
clust_id_P_sampling<-rmultinom(n = N2, size = 1, prob = pi_P)
clust_ids_U<-apply(X = clust_id_U_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
clust_ids_P<-apply(X = clust_id_P_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
for (i in 1:N1){
for (j in 1:N2){
exp_val<-exp(theta)
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==0){
network[i,j]<-0
}else if(edge_sample==1){
G_vec<-rep(NA_real_,R)
for (r in 1:R){
G_vec[r]<-exp(mu[r]*omega-(mu[r])^2/2)
}
probab_discrete<-G_vec/sum(G_vec)
network[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_discrete))==1)
}
}
}
}
else{
clust_id_sampling<-rmultinom(n = N, size = 1, prob = pi)
clust_ids<-apply(X = clust_id_sampling,MARGIN = 2,FUN = function(x){
y<-which(x==1)
return(y)
})
logitInv_delta_0<-logitInv_delta_0_cal(delta_0=delta_0, K=K, R=R)
probab_block_ratings<-array(NA_real_,dim=c(K,K,R))
for(k in 1:K){
for(l in 1:K){
for(r in 1:R){
if(r==1){
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]
}else if(r==R){
probab_block_ratings[k,l,r]<-1-logitInv_delta_0[k,l,(R-1)]
}else{
probab_block_ratings[k,l,r]<-logitInv_delta_0[k,l,r]-logitInv_delta_0[k,l,(r-1)]
}
}
}
}
for (i in 1:(N-1)){
for (j in (i+1):N){
exp_val<-exp(theta[clust_ids[i],clust_ids[j]])
probab<-exp_val/(1+exp_val)
edge_sample<-rbinom(n = 1, size = 1, prob = probab)
if(edge_sample==1){
net_adjacency[i,j]<-1
net_rating[i,j] <- which(as.vector(rmultinom(n = 1, size = 1, prob = probab_block_ratings[clust_ids[i],clust_ids[j],]))==1)
}
}
}
}
return(list(net_adjacency,net_rating,clust_ids))
}
########################################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.