R/survreg.fit.R
In survival: Survival Analysis

Documented in survreg.fit

# 
#  Do the actual fit of a survreg model.  This routine is for the case
#   of no penalized terms (splines, etc).
#
survreg.fit<- function(x, y, weights, offset, init, controlvals, dist, 
		       scale=0, nstrat=1, strata, parms=NULL, assign) {
    iter.max <- controlvals$iter.max
    eps <- controlvals$rel.tolerance
    toler.chol <- controlvals$toler.chol

    if (!is.matrix(x)) stop("Invalid X matrix ")
    n <- nrow(x)
    nvar <- ncol(x)
    ny <- ncol(y)
    if (is.null(offset)) offset <- rep(0,n)
    if (missing(weights)|| is.null(weights)) weights<- rep(1.0,n)
    else if (any(weights<=0)) stop("Invalid weights, must be >0")

    if (scale <0) stop("Invalid scale")
    if (scale >0 && nstrat >1) 
	    stop("Cannot have both a fixed scale and strata")
    if (nstrat>1 && (missing(strata) || length(strata)!= n))
	    stop("Invalid strata variable")
    if (nstrat==1) strata <- rep(1,n)
    if (scale >0) nstrat2 <- 0          # number of variances to estimate
    else          nstrat2 <- nstrat

    if (is.character(dist)) {
	sd <- survreg.distributions[[dist]]
	if (is.null(sd)) stop ("Unrecognized distribution")
	}
    else sd <- dist
    if (!is.function(sd$density)) 
	stop("Missing density function in the definition of the distribution")
    dnum <- match(sd$name, c("Extreme value", "Logistic", "Gaussian"))
    if (is.na(dnum)) {
        #  We need to set up a callback routine
        #  This returns the 5 number distribution summary (see the density
        #  functions in survreg.distributions).  Interval censored obs require
        #  2 evals and all others 1, so the call to the routine will have n2
        #  values.
	dnum <- 4  # flag for the C routine
	n2 <- n + sum(y[,ny]==3)  # not needed, keep for documentation
        
	#
        # Create an expression that will be evaluated by the C-code,
        #   but with knowledge of some current variables
        # In the R doc, this would be "body(function(z) {"
	#  in Splus (Chambers book):  "functionBody(function(z)"
	#  same action, different name.  Luckily 'quote' exists in both.
	# We make very sure the result is the right type and length here
	#  rather than in the C code, for simplicity.
        f.expr <- quote({
                if (length(parms)) temp <- sd$density(z, parms)
                else               temp <- sd$density(z)
                if (!is.matrix(temp) || any(dim(temp) != c(n2,5)) ||
                    !is.numeric(temp))
		    stop("Density function returned an invalid matrix")
                as.vector(as.double(temp))
                })
       
        # create an isolated sandbox (frame or environment) in which
	#  we can do the evaluation without endangering local objects
	#  but still with knowlege of sd, parms, and n2
        rho <- new.env() #inherits necessary objects
        }
    else {
	f.expr <- 1  #dummy values for the .Call
	rho <- 1
	}

    # This is a subset of residuals.survreg: define the first and second
    #   derivatives at z=0 for the 4 censoring types
    #   Used below for starting estimates
    derfun <- function(y, eta, sigma, density, parms) {
	ny <- ncol(y)
	status <- y[,ny]
	z <- (y[,1] - eta)/sigma
	dmat <- density(z,parms)
	dtemp<- dmat[,3] * dmat[,4]    #f'
	if (any(status==3)) {
	    z2 <- (y[,2] - eta)/sigma
	    dmat2 <- density(z2, parms)
	    }
	else {
	    dmat2 <- matrix(0,1,5)   #dummy values
	    z2 <- 0
	    }
	tdenom <- ((status==0) * dmat[,2]) +
		  ((status==1) * 1 )       +
		  ((status==2) * dmat[,1]) +
		  ((status==3) * ifelse(z>0, dmat[,2]-dmat2[,2], 
		                             dmat2[,1] - dmat[,1]))
	tdenom <- 1/(tdenom* sigma)
	dg <- -tdenom   *(((status==0) * (0-dmat[,3])) +
			  ((status==1) * dmat[,4]) + 
			  ((status==2) * dmat[,3]) +
			  ((status==3) * (dmat2[,3]- dmat[,3])))

	ddg <- (tdenom/sigma)*(((status==0) * (0- dtemp)) +
			       ((status==1) * dmat[,5]) +
			       ((status==2) * dtemp) +
			       ((status==3) * (dmat2[,3]*dmat2[,4] - dtemp))) 
	list(dg = dg, ddg = ddg - dg^2)
	}

    # Rescale the X matrix, which leads to more stable results, but only
    #  if the first column is an intercept.
    #  If someone provided initial values, assume they know what they
    #  are doing.
    rescaled <- FALSE
    if (is.null(init) && all(x[,1]==1) && ncol(x)>1) {  
        okay <- apply(x, 2, function(z) all(z==0 | z==1)) # leave these alone
        if (!all(okay)) {
            rescaled <- TRUE
            center <- ifelse(okay, 0, colMeans(x))
            stdev  <- ifelse(okay, 1, apply(x, 2, sd))
            x <- scale(x, center, stdev)
        }
    } 

    #
    # A good initial value of the scale turns out to be critical for successful
    #   iteration, in a surprisingly large number of data sets.
    # The best way we've found to get one is to fit a model with only the
    #   mean and the scale.  We don't need to do this in 3 situations:
    #    1. The only covariate is a mean (this step is then just a duplicate
    #       of the main fit).
    #    2. There are no scale parameters to estimate
    #    3. The user gave initial estimates for the scale
    # However, for 2 and 3 we still want the loglik for a mean only model
    #  as a part of the returned object.
    #
    nvar2 <- nvar + nstrat2
    meanonly <- (nvar==1 && all(x==1))
    if (!meanonly) {
	yy <- ifelse(y[,ny]!=3, y[,1], (y[,1]+y[,2])/2 )
	coef <- sd$init(yy, weights, parms)  #starting estimate for this model
	#init returns \sigma^2, I need log(sigma)
	# We sometimes get into trouble with a small estimate of sigma,
	#  (the surface isn't SPD), but never with a large one.  Double it.
	if (scale >0) vars <- log(scale) # scale was passed in as a parameter
	else vars <- log(4*coef[2])/2    # log(2*sqrt(variance)) = log(4*var)/2
	coef <- c(coef[1], rep(vars, nstrat))
	
	# get a better initial value for the mean using the "glim" trick
	deriv <- derfun(y, yy, exp(vars), sd$density, parms)
	wt <-  -1*deriv$ddg*weights
	coef[1] <- sum(weights*deriv$dg + wt*(yy -offset)) / sum(wt)
        
	# Now the fit proper (intercept only)
	fit0 <- .Call(Csurvreg6,
		       iter = as.integer(20),
		       nvar = as.integer(1),
		       as.double(y),
		       as.integer(ny),
		       x = as.double(rep(1.0, n)),
		       as.double(weights),
		       as.double(offset),
		       coef= as.double(coef),
		       as.integer(nstrat2),
		       as.integer(strata),
		       as.double(eps),
		       as.double(toler.chol), 
		       as.integer(dnum),
		       f.expr,
		       rho)
	}
    #
    # Fit the model with all covariates
    #
    if (is.numeric(init)) {
	if (length(init) == nvar && (nvar2 > nvar)) {
	    # Add on the variance estimates from above
	    init <- c(init, fit0$coef[-1])
	    }
	if (length(init) != nvar2) stop("Wrong length for initial parameters")
	if (scale >0) init <- c(init, log(scale))
	}
    else  {
	# Do the 'glim' method of finding an initial value of coef
	if (meanonly) {
	    yy <- ifelse(y[,ny]!=3, y[,1], (y[,1]+y[,2])/2 )
	    coef <- sd$init(yy, weights, parms)
	    if (scale >0) vars <- rep(log(scale), nstrat)
	    else vars  <- rep(log(4*coef[2])/2, nstrat)  
	    }
	else vars <- fit0$coef[-1]
	eta <- yy - offset     #what would be true for a 'perfect' model

	deriv <- derfun(y, yy, exp(vars[strata]), sd$density, parms)
	wt <-  -1*deriv$ddg*weights
	coef <- coxph.wtest(t(x)%*% (wt*x), 
		       c((wt*eta + weights*deriv$dg)%*% x),
			    toler.chol=toler.chol)$solve
	init <- c(coef, vars)
	}

    # Now for the fit in earnest
    fit <- .Call(Csurvreg6,
		   iter = as.integer(iter.max),
		   as.integer(nvar),
		   as.double(y),
		   as.integer(ny),
		   as.double(x),
	           as.double(weights),
		   as.double(offset),
		   as.double(init),
	           as.integer(nstrat2),
	           as.integer(strata),
		   as.double(eps),
	           as.double(toler.chol), 
		   as.integer(dnum),
		   f.expr,
		   rho)
    if (iter.max >1 && fit$flag > nvar2) {
        warning("Ran out of iterations and did not converge")
	}

    cname <- dimnames(x)[[2]]
    if (is.null(cname)) cname <- paste("x", 1:ncol(x))
    if (scale==0) cname <- c(cname, rep("Log(scale)", nstrat))
    if (scale>0) fit$coef <- fit$coef[1:nvar2]
    names(fit$coef) <- cname

    if (meanonly) {
	coef0 <- fit$coef
	loglik <- rep(fit$loglik,2)
	}
    else {
	coef0 <- fit0$coef
	names(coef0) <- c("Intercept", rep("Log(scale)", nstrat))
	loglik <- c(fit0$loglik, fit$loglik)
	}

    var.new <- matrix(fit$var, nvar2, dimnames=list(cname, cname))
    coef.new <- fit$coef
    if (rescaled) {
        vtemp <- diag(c(1/stdev, rep(1.0, nrow(var.new)- length(stdev))))
        vtemp[1, 2:nvar] <- -center[2:nvar]/stdev[2:nvar]
        coef.new <- drop(vtemp %*%coef.new)
        var.new <- vtemp%*% var.new %*% t(vtemp)
        names(coef.new) <- names(fit$coef)
    }

    temp <- list(coefficients   = coef.new,
		 icoef  = coef0, 
		 var    = var.new,
		 loglik = loglik, 
		 iter   = fit$iter,
		 linear.predictors = c(x %*% fit$coef[1:nvar] + offset),
                 df= length(fit$coef),
		 score = fit$u
		 )
    temp
    }