Nothing
###################################################
#######################################################
#
# Summary of all functions updated
#
#######################################################
#######################################################
rm(list=ls())
library(survival)
library(numDeriv)
library(Matrix)
#library(xtable)
simulWeibDiscreteFrail = function(N, S = NULL, lambda, rho, beta, p, w_values)
{
#set.seed(1234)
if( is.null(S) )
S = rpois( N, 50 )
#n is the total sample size
n = ifelse( length( S ) > 1, sum(S), S )
# covariate --> random from a normal
x = rnorm( n, 0, 1)
# Frailty term
w = rep( w_values, round( p*N ) )
if( length(w) < N )
{
w = c(w,tail(w,1))
}
else if(length(w) > N)
{
length( w ) = N
}
#set.seed(1234)
index_shuffled = sample( 1:N )
#try to shuffle the w
w = w[ index_shuffled ]
coef = S
if( length( S ) == 1 )
coef = rep( S, N )
w_tot = rep( w, coef )
# Weibull latent event times
v <- runif( n )
Tlat <- (- log(v) / (lambda * w_tot * exp( x * beta ) ) )^(1 / rho)
mean_cens = mean(Tlat)
var_cens = mean(Tlat)/10
# censoring times
C <- rnorm( n, mean_cens, var_cens)
# follow-up times and event indicators
time <- pmin(Tlat, C)
status <- as.numeric(Tlat <= C)
# data set
data.frame(family=rep( 1:N, coef ),
#rep = rep( 1:S, N ),
time=time,
status=status,
x=x,
belong = w_tot)
}
simulWeibDiscreteFrailCovNPbaseInv <- function(N, S=NULL, beta, Lambda_0_inv, p, w_values)
{
#set.seed(1234)
if( is.null(S) )
S = rpois( N, 50 )
#n is the total sample size
n = ifelse( length( S ) > 1, sum(S), S*N )
x = matrix(0, nrow=n, ncol=length(beta))
# covariate --> random from a normal
for( i in 1:length(beta))
{
x[ ,i] = rnorm( n, 0, 1)
}
# Frailty term
w = rep( w_values, round( p*N ) )
if( length(w) < N )
{
w = c(w,tail(w,1))
}
if(length(w) > N)
{
length( w ) = N
}
#set.seed(1234)
index_shuffled = sample( 1:N )
#try to shuffle the w
w = w[ index_shuffled ]
coef = S
if( length( S ) == 1 )
coef = rep( S, N )
w_tot = rep( w, coef )
# Lambda_0_inv
v <- runif( n )
Tlat <- Lambda_0_inv(- log(v) / ( w_tot * exp(x %*% beta) ) )
#Lambda_0 <- Vectorize(function(y) uniroot(function(x)
# Lambda_0_inv(x) - y, lower=0, upper=100, extendInt="yes")$root)
mean_cens = mean(Tlat)
var_cens = mean(Tlat)/10
# censoring times
C <- rnorm( n , mean_cens, var_cens)
# follow-up times and event indicators
time <- pmin(Tlat, C)
status <- as.numeric(Tlat <= C)
# data set
data.frame(family=rep( 1:N, coef ),
#rep = rep(1:S, N ),
time=time,
status=status,
x=x,
belong = w_tot)
}
##########################
# Computing the s.e matrix
##########################
exact_sec_der = function( ncovs, w, p, D, sumhaz, dsumhaz, d2sumhaz, lamsum)
{
K = length(w)
H = length(D)
#to build
d2p = array(dim=c(H, K-1, K-1))
d2w = array(dim=c(H, K, K))
d2cov = array(dim=c(H, ncovs, ncovs))
d2pw = array(dim=c(H, K-1, K))
d2pcov = array(dim=c(H, K-1, ncovs))
d2wcov = array(dim=c(H, K, ncovs))
exact_der = matrix(0, nrow=(K-1+K+ncovs), ncol=(K-1+K+ncovs))
if( K > 1 )
{
w_sum_haz = matrix( unlist( lapply( 1:K, function(i) w[i] * sumhaz )), nrow = H, ncol= K)
denominator = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^D )), nrow = H, ncol= K)*exp(-w_sum_haz))
numerator1 = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^(D+1) )), nrow = H, ncol= K)*exp(-w_sum_haz))
numerator2 = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^(D+2) )), nrow = H, ncol= K)*exp(-w_sum_haz))
}else{
denominator = numerator1 = numerator2 = exp(-sumhaz)
}
check=0
for( j in 1:H)
{
if(K>=2){
for( g in 1:(K-1) )
{
#Pure term 1 #dim(dpdp) = (K-1)x(K-1)
for( l in 1:(K-1) )
{
d2p[ j, g, l] = -(w[g]^D[j]*exp(-w[g]*sumhaz[j])-w[K]^D[j]*exp(-w[K]*sumhaz[j]))*(w[l]^D[j]*exp(-w[l]*sumhaz[j])-w[K]^D[j]*exp(-w[K]*sumhaz[j]))/(denominator[j])^2
}
#Mixed term 1-2 #dim(d2pw) = (K-1)xK
for(l in 1:K)
{
d2pw[ j, g, l ] = -(w[g]^D[j]*exp(-w[g]*sumhaz[j])-w[K]^D[j]*exp(-w[K]*sumhaz[j]))*(p[l]*w[l]^(D[j]-1)*exp(-w[l]*sumhaz[j])*(D[j]-w[l]*sumhaz[j]))/(denominator[j])^2+
+ ifelse( l == g, w[l]^D[j]*exp(-w[l]*sumhaz[j])*(D[j]-w[l]*sumhaz[j])/denominator[j], 0) +
+ ifelse( l == K, w[l]^D[j]*exp(-w[l]*sumhaz[j])*(D[j]-w[l]*sumhaz[j])/denominator[j], 0)
}
#Mixed term 1-3 #dim(d2pcov) = (K-1)xncovs
for(r in 1:ncovs)
{
d2pcov[ j, g, r ] = dsumhaz[j,r] * ( (w[K]^(D[j]+1)*exp(-w[K]*sumhaz[j]) - w[g]^(D[j]+1)*exp(-w[g]*sumhaz[j]))/denominator[j] +
-(w[K]^(D[j])*exp(-w[K]*sumhaz[j]) - w[g]^(D[j])*exp(-w[g]*sumhaz[j]))*numerator1[j]/(denominator[j])^2 )
}
}
}
#Pure term 2 #dim(d2w) = KxK
for( g in 1:K )
{
for( l in 1:K )
{
d2w[ j, g, l] = -p[g]*p[l]*((w[g]*w[l])^(D[j]-1))*exp(-(w[g]+w[l])*sumhaz[j])*(D[j]-w[g]*sumhaz[j])*(D[j]-w[l]*sumhaz[j])/(denominator[j])^2+
+ ifelse(g!=l, 0, p[g]*w[g]^(D[j]-2)*exp(-w[g]*sumhaz[j])*((D[j]-1-w[g]*sumhaz[j])*(D[j]-w[g]*sumhaz[j])-w[g]*sumhaz[j])/denominator[j])
}
}
#Pure term 3 #dim(d2cov) = ncovs x ncovs
for(r in 1:ncovs)
{
for(s in 1:ncovs)
{
d2cov[ j, r, s ] = lamsum[ j, r, s ]+ numerator2[j]/denominator[j]*(dsumhaz[j,r]*dsumhaz[j,s])+
- numerator1[j]/denominator[j]*d2sumhaz[j,r,s] +
- (numerator1[j]/denominator[j])^2*(dsumhaz[j,r]*dsumhaz[j,s])
}
}
#Mixed term 2-3 #dim(d2wcov) = Kxncovs
for(g in 1:K)
{
for(r in 1:ncovs)
{
d2wcov[ j, g, r ] = p[g] * w[g]^(D[j]-1) * exp(-w[g]*sumhaz[j]) * dsumhaz[j,r] * ( (-w[g]*(D[j]+1-w[g]*sumhaz[j]))/denominator[j]+
+ ((D[j]-w[g]*sumhaz[j])*numerator1[j])/(denominator[j])^2)
}
}
check = ifelse( length(which(is.na(d2w[j,,])))>0,j,check)
if(K>2){
exact_dertmp = rbind( cbind(d2p[j, , ], d2pw[j, , ], d2pcov[j, , ]),
cbind(t(d2pw[j, , ]), d2w[j, , ], d2wcov[j, , ]),
cbind(t(d2pcov[j, , ]), t(d2wcov[j, , ]), d2cov[j, , ]) )
}else if( K==2 ){
exact_dertmp = rbind( c(d2p[j, , ], d2pw[j, , ], d2pcov[j, , ]),
cbind(d2pw[j, , ], d2w[j, , ], d2wcov[j, , ]),
cbind(d2pcov[j, , ], t(d2wcov[j, , ]), d2cov[j, , ]) )
}else if(K==1){
exact_dertmp = rbind( c(d2w[j, , ], d2wcov[j, , ]), cbind( d2wcov[j, , ], d2cov[j, , ] ) )
}
exact_der = exact_der + exact_dertmp
}
return(exact_der)
}
exact_first_der = function(ncovs, w, p, D, sumhaz, dsumhaz, lamroutlam0, deltaijcovr, dsumhazred )
{
K = length(w)
H = length(D)
#to build
dp = rep(0,K-1)
dw = rep(0,K)
dcov = rep(0,ncovs)
dcov2 = rep(0,ncovs)
exact_derivative1 = exact_derivative2 = rep(0, K-1+K+ncovs)
if( K > 1 )
{
w_sum_haz = matrix( unlist( lapply( 1:K, function(i) w[i] * sumhaz )), nrow = H, ncol= K)
denominator = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^D )), nrow = H, ncol= K)*exp(-w_sum_haz))
numerator1 = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^(D+1) )), nrow = H, ncol= K)*exp(-w_sum_haz))
numerator2 = rowSums(matrix( unlist( lapply( 1:K, function(i) p[i]*w[i]^(D+2) )), nrow = H, ncol= K)*exp(-w_sum_haz))
}else{
denominator = numerator1 = numerator2 = exp(-sumhaz)
}
for( j in 1:H )
{
for(g in 1:(K-1))
{
dp[g] = (w[g]^(D[j])*exp(-w[g]*sumhaz[j]) - w[K]^(D[j])*exp(-w[K]*sumhaz[j]))/denominator[j]
}
for(g in 1:K)
{
dw[g] = (p[g] * w[g]^(D[j]-1) * exp(-w[g]*sumhaz[j]) * (D[j]-w[g]*sumhaz[j]))/denominator[j]
}
for(r in 1:ncovs)
{
dcov[r] = -numerator1[j]*dsumhaz[j,r]/denominator[j] + lamroutlam0[j,r]
dcov2[r] = -numerator1[j]*dsumhazred[j,r]/denominator[j] + deltaijcovr[j,r]
}
exact_derivative1 = exact_derivative1 + c( dp, dw, dcov)
exact_derivative2 = exact_derivative2 + c( dp, dw, dcov2)
}
return(list(exact_derivative1,exact_derivative2))
}
Iy = function( ncovs, alpha, w, p, D, sumhaz, lamroutlam0, dsumhaz)#, lamwoutlam0, dsumhazw)
{
#ncovs: number of covariates
#alpha: matrix of posterior density of Z, HxK
#w: vector of frailties, Kx1
#p: vector of p, Kx1
#D: number of events in each group Hx1
#sumhaz: Lambda_0*exp(X_ij* beta) H x 1
#lamroutlam0: lambda_0r/lambda_0 + X_ijr H x nocovs
#dsumhaz: first derivative of cumulative baseline haz H x ncovs
K = dim(alpha)[2]
H = dim(alpha)[1]
#to build
dpdp = array(dim=c(H, K-1))
dwdw = array(dim=c(H, K))
dcovdcov = array(dim=c(H, ncovs))
expectedStexpectedSj = matrix(0, nrow=(K-1+K+ncovs), ncol=(K-1+K+ncovs))
#each array is group specific
for( j in 1:H)
{
#Pure term 1 #dim(dpdp) = (K-1)xq
dpdp[ j, ] = alpha[ j, 1:(K-1) ]/p[1:(K-1)] - alpha[ j, K ]/p[K]
#Pure term 2 #dim(dwdw) = K x K
dwdw[ j, ] = alpha[ j, 1:K]*(D[j]/w[1:K]-sumhaz[j]) #( lamwoutlam0[ j, 1:K ] - dsumhazw[j,1:K] )
#Pure term 3 #dim(dcovdcov) = ncovs x ncovs
for(r in 1:ncovs) {
dcovdcov[ j, r ] = alpha[ j, ] %*% t(lamroutlam0[j,r]-w*dsumhaz[j,r])
}
expectedStexpectedSj = expectedStexpectedSj + c( dpdp[j,], dwdw[j, ], dcovdcov[j, ] ) %*% t(c( dpdp[j,], dwdw[j, ], dcovdcov[j, ] ))
}
return(expectedStexpectedSj)
}
Ixy = function( ncovs, alpha, w, p, D, sumhaz, lamroutlam0, dsumhaz ) #, lamwoutlam0, dsumhazw)
{
#ncovs: number of covariates
#alpha: matrix of posterior density of Z, HxK
#w: vector of frailties, Kx1
#p: vector of p, Kx1
#D: number of events in each group Hx1
#sumhaz: Lambda_0*exp(X_ij* beta) H x 1
#lamroutlam0: lambda_0r/lambda_0 + X_ijr H x nocovs
#dsumhaz: first derivative of cumulative baseline haz H x ncovs
K = dim(alpha)[2]
H = dim(alpha)[1]
#to build
dpdp = array(dim=c(H, K-1, K-1))
dwdw = array(dim=c(H, K, K))
dcovdcov = array(dim=c(H, ncovs, ncovs))
dpdw = array(dim=c(H, K-1, K))
dpdcov = array(dim=c(H, K-1, ncovs))
dwdcov = array(dim=c(H, K, ncovs))
expectedStSgroupj = matrix(0, nrow=(K-1+K+ncovs), ncol=(K-1+K+ncovs))
#each array is group specific
for( j in 1:H)
{
#Pure term 1 #dim(dpdp) = (K-1)x(K-1)
dpdp[ j, , ] = alpha[ j, K ]/p[K]^2
if( K > 2){
diag( dpdp[ j, , ] ) = diag( dpdp[ j, , ] ) + alpha[ j, 1:(K-1) ]/p[1:(K-1)]^2
}else if( K <= 2){
dpdp[ j, , ] = alpha[ j, K ]/p[K]^2 + alpha[ j, 1:(K-1) ]/p[1:(K-1)]^2
}
#Pure term 2 #dim(dwdw) = K x K
#diag( alpha[ j, 1:K]*( D[j]/w[1:K] - sumhaz[j] )^2 )
if(K>1){
dwdw[ j, , ] = diag(alpha[ j, 1:K]*(D[j]/w[1:K]-sumhaz[j])^2)#( lamwoutlam0[ j, 1:K ] - dsumhazw[j,1:K] )^2 )
}else if(K==1){
dwdw[ j, , ] = alpha[ j, 1:K]*(D[j]/w[1:K]-sumhaz[j])^2#( lamwoutlam0[ j, 1:K ] - dsumhazw[j,1:K] )^2 )
}
#Pure term 3 #dim(dcovdcov) = ncovs x ncovs
for(r in 1:ncovs) {
for(s in 1:ncovs) {
dcovdcov[ j, r, s ] = alpha[j,] %*% t( (lamroutlam0[j,r]-w*dsumhaz[j,r])*(lamroutlam0[j,s]-w*dsumhaz[j,s]) )
}
}
#Mixed term 1-2 #dim(dpdw) = (K-1)xK
dpdwtmp = rep(0,(K-1))
dpdwtmp[K-1] = -alpha[ j,K]/p[K] * (D[j]/w[K]-sumhaz[j])
if(K>2){
dpdw[j, , ] = cbind( diag(alpha[ j,1:(K-1)]/p[1:(K-1)] * (D[j]/w[1:(K-1)]-sumhaz[j])), dpdwtmp )
}else{
dpdw[j, , ] = cbind( alpha[ j,1:(K-1)]/p[1:(K-1)] * (D[j]/w[1:(K-1)]-sumhaz[j]), dpdwtmp )
}
#Mixed term 1-3 #dim(dpdcov) = K-1 x ncovs
for (r in 1:ncovs)
{
dpdcov[j, , r] = alpha[j,1:(K-1)]/p[1:(K-1)] * (lamroutlam0[j,r]-w[1:(K-1)]*dsumhaz[j,r]) - alpha[j,K]/p[K]*(lamroutlam0[j,r]-w[K]*dsumhaz[j,r])
}
#Mixed term 2-3 #dim(dpdcov) = K x ncovs
for (r in 1:ncovs)
{
dwdcov[j, , r] = alpha[j,1:K] * (lamroutlam0[j,r]-w[1:K]*dsumhaz[j,r]) * (D[j]/w[1:K]-sumhaz[j])
}
if(K>2){
expectedStSgroupjtmp = rbind( cbind(dpdp[j, , ], dpdw[j, , ], dpdcov[j, , ]),
cbind(t(dpdw[j, , ]), dwdw[j, , ], dwdcov[j, , ]),
cbind(t(dpdcov[j, , ]), t(dwdcov[j, , ]), dcovdcov[j, , ]) )
}else if( K==2 ){
expectedStSgroupjtmp = rbind( c(dpdp[j, , ], dpdw[j, , ], dpdcov[j, , ]),
cbind(dpdw[j, , ], dwdw[j, , ], dwdcov[j, , ]),
cbind(dpdcov[j, , ], t(dwdcov[j, , ]), dcovdcov[j, , ]) )
}else if(K==1){
expectedStSgroupjtmp = rbind( c(dwdw[j, , ], dwdcov[j, , ]), cbind( dwdcov[j, , ], dcovdcov[j, , ] ) )
}
expectedStSgroupj = expectedStSgroupj + expectedStSgroupjtmp
}
return(expectedStSgroupj)
}
Ix = function( ncovs, alpha, w, p, D, d2sumhaz, lamsum, dsumhaz) #,lamw2outlam02, d2sumhazw, lamwbetaoutlam02 ,d2sumhazwbeta )
{
#ncovs: number of covariates
#alpha: matrix of posterior density of Z, HxK
#w: vector of frailties, Kx1
#p: vector of p, Kx1
#D: number of events in each group Hx1
#d2sumhaz: second derivative of cumulative baseline haz H x ncovs x ncovs
#lamsum: second derivative of baseline haz H x nocovs x ncovs
#dsumhaz: first derivative of cumulative baseline haz H x ncovs
K = dim(alpha)[2]
H = dim(alpha)[1]
#pure terms
d2pp = array( dim = c( H, K-1, K-1 ) )
d2ww = array( dim = c( H, K, K ) )
d2covcov = array( dim = c( H, ncovs, ncovs ) )
#mixed terms
d2wcov = array( dim = c( H, K , ncovs ) )
#result
expectedBgroupj = matrix(0, nrow = (K-1+K+ncovs), ncol = (K-1+K+ncovs) )
for(j in 1:H)
{
#Pure term 1 #dim(d2pp) = (K-1)x(K-1)
d2pp[ j, , ] = - alpha[ j, K ]/p[K]^2
if(K>2){
diag( d2pp[ j, , ] ) = diag( d2pp[ j, , ] ) - alpha[ j, 1:(K-1) ]/p[1:(K-1)]^2
}else{
d2pp[ j, , ] = - alpha[ j, K ]/p[K]^2 - alpha[ j, 1:(K-1) ]/p[1:(K-1)]^2
}
#Pure term 2 #dim(d2pp) = K x K
if(K>1){
d2ww[ j, , ] = diag( -alpha[ j, 1:K ]*D[j]/w[1:K]^2 ) #alpha[ j, 1:K ]*(lamw2outlam02[j,1:K]-d2sumhazw[j,1:K] )
}else if(K==1){
d2ww[ j, , ] = -alpha[ j, 1:K ]*D[j]/w[1:K]^2 #alpha[ j, 1:K ]*(lamw2outlam02[j,1:K]-d2sumhazw[j,1:K] )
}
#Pure term 3 #dim(d2covcov) = ncovs x ncovs
for(r in 1:ncovs)
{
for(s in 1:ncovs)
{
d2covcov[ j, r, s ] = t(alpha[ j, 1:K]) %*% (lamsum[ j, r, s]- w[1:K] * d2sumhaz[ j , r, s])
}
}
#Mixed term 1 #dim(d2wcov) = K x ncovs
for(k in 1:K)
{
for(r in 1:ncovs)
{
d2wcov[ j, k, r ] = - alpha[j, k] * dsumhaz[ j, r ] #(lamwbetaoutlam02[j, k, r]-d2sumhazwbeta[j, k, r])
}
}
if(K>=2)
{
expectedBgroupjtmp = rbind( cbind( d2pp[ j, , ], matrix( 0, nrow = K-1, ncol = K ), matrix( 0, nrow = K-1, ncol = ncovs ) ),
cbind( matrix( 0, nrow = K, ncol = K-1 ), d2ww[j, , ], d2wcov[j, , ] ),
cbind( matrix( 0, nrow = ncovs, ncol = K-1 ), t(d2wcov[j, , ]), d2covcov[j, , ]) )
}else{
expectedBgroupjtmp = rbind( c( d2ww[j, , ], d2wcov[j, , ] ),
cbind( d2wcov[j, , ], d2covcov[j, , ]) )
}
expectedBgroupj = expectedBgroupj + expectedBgroupjtmp
}
return(expectedBgroupj)
}
information_matrix = function( expectedS, expectedB, expectedSexpectedS )
{
info = -expectedB-expectedS+expectedSexpectedS
cov <- solve(info)
se <- sqrt(diag(cov))
return(list(cov,se))
}
npdf_cox <- function( data, time, status, family, covariates, population = 2, eps_conv=10^-4)
{
H <- length( unique( data[[family]] ) ) #total number of hospital
K <- population
dat_ord <- data[ order( data[[family]], data[[time]], -data[[status]] ), ]
nt <- length(unique(data[[time]]))
ncovs <- length(covariates)
cov_read <- paste(covariates, sep=',')
cum_haz <- as.data.frame( cbind( hazard=rep( 0, nt ),
time = sort( unique( data[[time]] ) )) )
sumhaz <- rep( 0, H )
dsumhaz <- array( dim = c( H, ncovs ) ) #perchè non ho fatto una matrix??
d2sumhaz <- lamsum <- array( dim = c( H, ncovs, ncovs ) )
deltaijcovr <- matrix( 0, nrow = H, ncol = ncovs )
dsumhazred <- matrix( 0, nrow = H, ncol = ncovs )
#### computing SE
#cum_haz$hazard <-
YY <- haz <- rep( 0, nt )
dYY <- dhaz <- dcumhaz <- matrix( nrow = nt, ncol = ncovs )
#dYYmatr <- dhazmatr <- matrix( nrow = nt, ncol = ncovs )
d2YY <- d2haz <- d2cumhaz <- array( dim = c( nt, ncovs, ncovs ) )
#d2YYmatr <- d2hazmatr <- array( dim = c( nt, ncovs, ncovs ) )
# #new line
lamroutlam0 <- matrix( nrow = H, ncol = ncovs )
##sort the cluster id (there is a smarter way)
for( i in 1:length(unique( dat_ord[[family]] ) ) )
{
value <- unique( dat_ord[[family]] )[i]
dat_ord[[family]][ dat_ord[[family]] == value ] <- i
}
#
D <- table( factor( dat_ord[[family]] )[ dat_ord[[status]] == 1 ] ) # number of events in each group j
n_H <- as.vector( table( dat_ord[[family]] ) ) #number of patients in each hospital
risk_index <- matrix( 0, nrow = dim(dat_ord)[1], ncol = nt ) # matrix of at risk patients: nrows x nt
N = rep( 0, nt )
time_list <- sapply( 1:nt, function(x) !is.na(match(dat_ord[[time]], cum_haz$time[x]))) #matrix dim(dat_ord)[1] x nt, time_list[i,j] = T if the i-th row in the data is the j-th time recorded
N <- sapply( 1:dim(time_list)[2], function(x) sum(dat_ord[[status]][time_list[,x]])) #number of events happenend at each time. N[j] is the number of eents happenend at time j
for( l in 1:nt )
{
risk_index[ which( dat_ord[[time]] >= cum_haz$time[ l ]), l ] <- 1 #nrow(dat_ord) x nt
}
lis = list()
while( K >= 1 )
{
print(K)
count <- 0
not_proper_K <- 0
eps <- 10^5
p <- rep( 1/K, K )
w <- seq( 1, 5, length.out = K )
if( K==1 ) w <- 1
set.seed(2)
w_to_rep <- sample( rep( w, round( p*H ) ) )
if( H - sum( round( p*H ) ) >= 1 )
{
w_to_rep <- c( w_to_rep, rep( w_to_rep[ sum( round( p*H ) ) ], H - sum( round( p*H ) ) ) )
p <- table(w_to_rep)/H
}
if( H - sum( round( p*H ) ) <= -1 )
{
length( w_to_rep ) <- H
p <- table(w_to_rep)/H
}
w_off <- w_to_rep[ dat_ord[[family]] ]
numerator <- rep( 0, K )
E_formula_part <- rep( 0, H )
alpha <- E_formula <- matrix( 0, nrow = H, ncol = K)
while(eps > eps_conv & count < 200 )
{
mF <- formula( paste("Surv(time, status)", paste( paste( covariates, collapse= '+'), "offset( log( w_off ) )", sep ='+'), sep = "~"))
temp_model <- survival::coxph( mF, data = dat_ord, method = "breslow" )
beta_hat <- temp_model$coef
# computing baseline hazard and cumulative baseline hazard
YY = as.numeric( (w_off*exp( beta_hat %*% t(dat_ord[,cov_read]) )) %*% risk_index ) # nt x 1 contribute from at risk patient
haz = N/YY
cum_haz$hazard = cumsum( haz )
# saving p
p_old <- p
#Expectation step
for( j in 1:H )
{
hospj <- dat_ord[[family]]==j
ebz <- exp( as.matrix( dat_ord[hospj,cov_read] ) %*% beta_hat )
ti <- match(dat_ord[[time]][hospj], cum_haz$time)
lam0t <- cum_haz$hazard[ti]
# flag_cov is an indicator for the presence of at least 1 covariate
flag_cov = ifelse( length(cov_read) != 0, 1, 0 )
E_formula_part[j] <- ifelse( flag_cov == 1,
sum( lam0t*ebz ),
sum( lam0t ) )
for( k in 1:K )
{
E_formula[j,k] <- ifelse( flag_cov == 1,
sum( lam0t*ebz*w[ k ] ),
sum( lam0t*w[ k ] ) )
numerator[ k ] <- p[ k ] * exp( D[ j ] * log( w[ k ] ) +
-E_formula[j,k] )
}
alpha[ j, ] <- numerator/sum( numerator )
}
#Maximization Step
# vector of latent population label, dim = H x 1
belonging <- as.numeric( apply( alpha, 1, which.max) )
# vector of proportions
p <- ( colSums( alpha ) )/ H
print(p)
# vector of frailty, dim = Kx1
w <- (D %*% alpha)/(E_formula_part %*% alpha)
# standardization of w
w <- w/min(w)
#w <- w/w[1]
#w[which.min(w)] <- 1
# vector of frailty values, dim = nx1
w_off <- ( w[ belonging ][dat_ord[[family]]] )
# computation of the distance from the previous estimate of p
eps <- max( abs(p-p_old) )
# augmenting count
count <- count + 1
print(count)
}
# computing 1st and 2nd hazard derivatives
dhazards <- harzard_derivatives( dat_ord, risk_index, w_off, beta_hat, cov_read, N, YY )
dhaz <- dhazards$dhaz
d2haz <- dhazards$d2haz
dcumhaz <- dhazards$dcumhaz
d2cumhaz <- dhazards$d2cumhaz
print('dhaz done')
#nt x ncovs
# dYY <- t( risk_index) %*% ( as.matrix(sapply(w_off * exp( beta_hat %*% t(dat_ord[ ,cov_read ]) ) * dat_ord[ ,cov_read ], as.numeric)))
# dhaz <- - dYY * N/YY^2
#
# if( ncovs > 1 )
# {
# #nrow(dat_ord) x ncovs x ncovs
# d2YYtemp <- array( t( apply( dat_ord[,cov_read], 1, function(x) x%*%t(x)) ), dim=c( dim( dat_ord )[1], ncovs, ncovs ) )
# #nt x ncovs x ncovs
# dYYdYY <- array( t( apply( dYY[,1:ncovs], 1, function(x) x%*%t(x)) ), dim=c( nt, ncovs, ncovs ) )
# L <- lapply( seq_len( dim(risk_index)[2] ), function(i) apply(as.vector(w_off*exp(beta_hat %*% t(dat_ord[,cov_read])))*d2YYtemp*risk_index[,i],MARGIN = 2:3,sum))
# d2YY <- array( unlist(L), dim = c(nrow(L[[1]]), ncol(L[[1]]), length(L)))
# d2YY <- aperm( d2YY, c( 3, 1, 2 ))
# }else{
# d2YYtemp <- dat_ord[,cov_read]^2
# dYYdYY <- dYY^2
# d2YY <- t(risk_index) %*% ( w_off*exp( beta_hat * dat_ord[,cov_read] )*d2YYtemp ) # nt x ncovs
# }
#
# # constructing array for point-wise operation
# d2haz <- array( N/YY^3*(2*dYYdYY-d2YY*YY), dim = c( nt, ncovs, ncovs ))
#
#
# if(ncovs>1)
# {
# dcumhaz <- matrix( apply( dhaz, MARGIN = 2, cumsum ), ncol = ncovs) #sum by column
# d2cumhaz <- array( apply( d2haz, MARGIN = 2:3, cumsum ), dim = c( nt, ncovs, ncovs ) )
# }else{
# dcumhaz <- matrix( cumsum( dhaz ), ncol = ncovs)
# d2cumhaz <- array( cumsum( d2haz ), dim = c( nt, ncovs, ncovs ))
# }
#computing the input for the se functions
input_se <- input_se_functions( family, time, status, dat_ord, cov_read, beta_hat, haz, cum_haz, dhaz, dcumhaz, d2haz, d2cumhaz )
lamroutlam0 <- input_se$lamroutlam0
lamsum <- input_se$lamsum
deltaijcovr <- input_se$deltaijcovr
sumhaz <- input_se$sumhaz
dsumhazred <- input_se$dsumhazred
dsumhaz <- input_se$dsumhaz
d2sumhaz <- input_se$d2sumhaz
print('imput_se done')
# for( j in 1:H )
# {
# hospj <- dat_ord[[family]]==j
# ebz <- exp( as.matrix( dat_ord[hospj,cov_read] ) %*% beta_hat )
# ti <- match(dat_ord[[time]][hospj], cum_haz$time)
# lam0t <- cum_haz$hazard[ti]
# sumhaz[j] <- sum(lam0t * ebz)
#
#
# for (r in 1:ncovs){
# cov_int_r = cov_read[r]
# covr <- dat_ord[hospj,cov_int_r]
# lamrt <- dcumhaz[ti, r]
# #dsumhaz is the second part of III term
# dsumhaz[j,r] <- sum(ebz*(covr*lam0t + lamrt))
# #new lines for III term
# lamroutlam0tmp <- dat_ord[[status]][hospj]*( dhaz[ti,r]/haz[ti] + covr )
# lamroutlam0[j,r] <- sum(lamroutlam0tmp[haz[ti]>0])
#
# deltaijcovrtmp <- dat_ord[[status]][hospj]*covr
# deltaijcovr[j,r] <- sum(deltaijcovrtmp[haz[ti]>0])
# dsumhazred[j,r] <- sum(ebz*(covr*lam0t))
#
# for (s in 1:ncovs){
# cov_int_s <- cov_read[s]
# covs <- dat_ord[hospj,cov_int_s]
# lamst <- dcumhaz[ti, s]
# lamrst <- d2cumhaz[ti, r, s]
# d2sumhaz[j,r,s] <- sum(ebz*(covr*covs*lam0t + covr*lamst + covs*lamrt + lamrst))
# lamtmp <- dat_ord[[status]][hospj] * (d2haz[ti,r,s]*haz[ti] - dhaz[ti,r]*dhaz[ti,s])/haz[ti]^2
# lamsum[j,r,s] <- sum(lamtmp[haz[ti]>0]) # exclude zero hazard
#
# }
#
# }
# }
#function to be numerically optimized
sefundeponwbeta = function( x )
{
YYfun <- as.numeric( (w_off*exp( x[(2*K):(2*K-1+ncovs)] %*% t(dat_ord[,cov_read]) )) %*% risk_index ) # nt x 1 contribute from at risk patient
hazfun <- N/YYfun
cum_hazfun <- cumsum(hazfun)
lfull <- rep(0,H)
for(j in 1:H)
{
hospj <- dat_ord[[family]]==j
ebz <- exp( as.matrix( dat_ord[hospj,cov_read] ) %*% x[(2*K):(2*K-1+ncovs)] )
ti <- match(dat_ord[[time]][hospj], cum_haz$time)
lam0t <- cum_hazfun[ti]
haz0t <- hazfun[ti]
firsttermtmp <- dat_ord[[status]][hospj]*log(haz0t*ebz)
firstterm <- sum(firsttermtmp[haz0t>0,])
secondtermtmp <- -sum(lam0t * ebz) * x[(K):(2*K-1)]
wpowerD <- x[(K):(2*K-1)]^D[j]
secondterm <- log(sum(exp(secondtermtmp)*wpowerD*c(x[1:(K-1)],1-sum(x[1:(K-1)]))))
lfull[j] <- firstterm+secondterm
}
return( sum( lfull ) )
}
#Numerical computation of standard errors
if( length( unique( belonging ) ) == K )
{
if( K != 1 )
{
deriv <- numDeriv::genD( sefundeponwbeta, c( p[1:(K-1)], w, beta_hat ), method = "Richardson" )
}else{
deriv <- numDeriv::genD( sefundeponwbeta, c( w, beta_hat), method = "Richardson" )
}
info <- matrix(0, 2*K-1+ncovs, 2*K-1+ncovs)
info[upper.tri(info, diag=T)] <- deriv$D[(2*K+ncovs):(2*K-1+ncovs+(2*K+ncovs-1)*(2*K+ncovs)/2)]
info <- Matrix::forceSymmetric(info)
infoRich <- solve(-info)
print('infoRich done')
#Louis method for standard errors
EStS <- Ixy( ncovs, alpha, w, p, D, sumhaz, lamroutlam0, dsumhaz)
EB <- Ix( ncovs, alpha, w, p, D, d2sumhaz, lamsum, dsumhaz)
ESES <- Iy( ncovs, alpha, w, p, D, sumhaz, lamroutlam0, dsumhaz)
stderr_covariance <- information_matrix( expectedS = EStS, expectedB = EB, expectedSexpectedS = ESES )
print('Louis done')
#Exact second derivativefor standard errors
SecDer <- exact_sec_der( ncovs, w, p, D, sumhaz, dsumhaz, d2sumhaz,lamsum )
InfoFischer <- solve( -SecDer )
print('InfoFischer done')
}
# computing loglikelihood, AIC and BIC
llik <- log_likelihood( family, time, status, dat_ord, cov_read, p, w, alpha, beta_hat, haz, cum_haz)
# llik = 0
#
# for( j in 1:H )
# {
# hospj <- dat_ord[[family]]==j
# hospj_lam <- dat_ord[[family]]==j & dat_ord[[status]]==1
# ti <- match(dat_ord[[time]][hospj], cum_haz$time)
# ti_lam <- match(dat_ord[[time]][hospj_lam], cum_haz$time)
#
# for( k in 1:K )
# {
# llik_LAM <- ifelse( flag_cov == 1,
# sum( cum_haz$hazard[ti]*w[k]*exp( as.matrix( dat_ord[hospj,cov_read] ) %*% beta_hat ) ),
# sum( cum_haz$hazard[ti]*w[k] ) )
#
# #w_off[hospj][1] because w_off[hospj] are all equal
# llik_lam <- ifelse( flag_cov == 1,
# sum( log( haz[ti_lam]*w[k]*exp( as.matrix( dat_ord[hospj_lam,cov_read] ) %*% beta_hat ) ) ),
# sum( log( haz[ti_lam]*w[k] ) ) )
#
# llik_p <- ifelse( K==1, 1, p[ k ] ) #/( 1-p[ belonging[ j ] ] ) )
#
# llik <- llik + alpha[j,k]*(llik_lam-llik_LAM) + alpha[j,k]*log( llik_p )#*length(hospj)
#
# }
# }
tot_param <- K-1 + K + length(cov_read)
BIC <- -2*llik + tot_param * log( length( which(dat_ord$status == 1 ) ) )
AIC <- -2*llik + 2*tot_param
if( length( unique( belonging ) ) < K )
{
not_proper_K <- 1
stderr_covariance <- NULL
infoRich <- NULL
InfoFischer <- NULL
}
step = K
lis[[step]] <- list()
lis[[step]][['K']] <- K
lis[[step]][['too_much_K']] <- not_proper_K
lis[[step]][['p']] <- p
lis[[step]][['w']] <- w
lis[[step]][['beta']] <- beta_hat
lis[[step]][['belonging']] <- belonging
lis[[step]][['llik']] <- llik
lis[[step]][['BIC']] <- BIC
lis[[step]][['AIC']] <- AIC
lis[[step]][['varcovLouis']] <- stderr_covariance[[1]]
lis[[step]][['seLouis']] <- stderr_covariance[[2]]
lis[[step]][['varcovRich']] <- infoRich
lis[[step]][['seRich']] <- sqrt(diag(infoRich))
lis[[step]][['varcovExact']] <- InfoFischer
lis[[step]][['seExact']] <- sqrt(diag(InfoFischer))
K = K-1
}
best_llik = lis[[1]]$llik
best_BIC = lis[[1]]$BIC
best_AIC = lis[[1]]$AIC
pos_llik = 1
pos_BIC = 1
pos_AIC = 1
max_K = 0
right_K = 1
#Finding the maximum K for which we have distinct w
for( i in 2:population)
{
if( lis[[i]]$too_much_K > max_K )
{
max_K = lis[[i]]$too_much_K
right_K = lis[[i-1]]$K
}
}
if(max_K==0)
{
right_K = lis[[population]]$K
}
#I use the right K as the top value for evaluating the best BIC and AIC
i=1
while( i <= right_K)
{
if( lis[[i]]$llik > best_llik)
{
best_llik = lis[[i]]$llik
pos_llik = lis[[i]]$K
}
if( lis[[i]]$BIC < best_BIC)
{
best_BIC = lis[[i]]$BIC
pos_BIC = lis[[i]]$K
}
if( lis[[i]]$AIC < best_AIC)
{
best_AIC = lis[[i]]$AIC
pos_AIC = lis[[i]]$K
}
i=i+1
}
return( c(lis,pos_llik,pos_BIC,pos_AIC, right_K) ) #pos is the right K up to BIC or AIC, while right_K is the right K up to the belonging
}
#
# ##test
# N=100
# #S=40
# #S=rep(c(40,60,40,60,50),20)
# lambda = 0.5
# beta = c(1.6,3) #2#c(1.5,2,3)#c(0.05,0.26,0.36)
# rho=1
# p = c(0.8,0.2)#c(0.25,0.35,0.3,0.1)
# w_values= c(0.8,1.6)#c(1,2,2.7,3.2)
#
#
# data = simulWeibDiscreteFrail( N, S=NULL, lambda, rho, beta, p, w_values)
# test_res = npdf_cox( data, time = 'time', status = 'status', family = 'family', covariates = c('x'), population = 3, eps_conv=10^-4)
#
# set.seed(2000)
# data = simulWeibDiscreteFrailCovNPbaseInv(N, S=50, beta, Lambda_0_inv = function(t, c=0.01, d=4.6) (t^(1/d))/c , p, w_values)
# start.time <- Sys.time()
# test_res = npdf_cox( data, time = 'time', status = 'status', family = 'family', covariates = c('x.1','x.2'), population = 3, eps_conv=10^-4) #.1','x.2','x.3
# end.time <- Sys.time()
# time.taken <- end.time - start.time
# time.taken
#
#
# dat_hand = sdatright_trans_3
# test_real_data = npdf_cox( dat_hand, time = 'time', status = 'status', family = 'OSP_ID', covariates = c('AGE','SEX'), population = 5, eps_conv=10^-4)
#
#
# test_res[[1]]$w
# test_res[[2]]$w
# test_res[[3]]$w
#
# test_res[[1]]$p
# test_res[[2]]$p
# test_res[[3]]$p
#
# test_res[[1]]$beta
# test_res[[2]]$beta
# test_res[[3]]$beta
#
# test_res[[1]]$seLouis
# test_res[[1]]$seRich
# test_res[[1]]$seExact
# test_res[[2]]$seLouis
# test_res[[2]]$seRich
# test_res[[2]]$seExact
# test_res[[3]]$seLouis
# test_res[[3]]$seRich
# test_res[[3]]$seExact
#
# test_res[[5]]
# test_res[[6]]
# test_res[[7]]
#
# plot(1:3,c(test_res[[1]]$AIC,test_res[[2]]$AIC,test_res[[3]]$AIC))
# table( rep( test_res[[2]]$belonging, S)- data$belong )
# (350+320)/sum(table( rep( test_res[[2]]$belonging, S)- data$belong ))
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.