R/Rcode.r

Defines functions epa joinrate transrate.hmd transrate.hld rsfitterem

Documented in epa joinrate transrate.hld transrate.hmd

rsfitterem<-function(data,b,maxiter,ratetable,tol,bwin,p,cause,Nie){
  # cause: = 2 (unknown), 0 in 1 known. Lahko preko argumenta cause v rsadd dolocis, ce kdo ima znan cause of death (ne rabijo vsi) .
  # Nie: to je lambda_0 (ti), ki se oceni v M koraku v EM algoritmu

  pr.time<-proc.time()[3]
  if (maxiter<1) stop("There must be at least one iteration run")
  n<-nrow(data)
  m <- p
  dtimes <- which(data$stat==1)			#the positions of event times in data$Y
  td <- data$Y[dtimes]				#event times
  ntd <- length(td)				#number of event times
  utimes <- which(c(1,diff(td))!=0)		#the positions of unique event times among td
  utd <- td[utimes]				#unique event times
  nutd <- length(utd)				#number of unique event times
  udtimes <- dtimes[utimes]			#the positions of unique event times among data$Y
  razteg <- function(x){
    # x is a 0/1 vector, the output is a vector of length sum(x), with the corresponding rep numbers
    n <- length(x)
    repu <- rep(1,n)
    repu[x==1] <- 0
    repu <- rev(cumsum(rev(repu)))
    repu <- repu[x==1]
    repu <- -diff(c(repu,0))+1
    if(sum(repu)!=n)repu <- c(n-sum(repu),repu)  #ce je prvi cas censoring, bo treba se kej narest??
    repu
  }
  rutd <- rep(0,ntd)
  rutd[utimes] <- 1
  rutd <- razteg(rutd)				#from  unique event times to event times
  rtd <- razteg(data$stat)			#from event times to data$Y

  a <- data$a[data$stat==1]

  if(bwin[1]!=0){
    #the vector of change points for the smoothing bandwidth
    nt4 <- c(1,ceiling(c(nutd*.25,nutd/2,nutd*.75,nutd)))
    if(missing(bwin))bwin <- rep(1,4)
    else bwin <- rep(bwin,4)
    for(it in 1:4){
      bwin[it] <- bwin[it]*max(diff(utd[nt4[it]:nt4[it+1]]))
    }
    while(utd[nt4[2]]<bwin[1]){		# ce je bwin velik, skrajsamo nt4
      nt4 <- nt4[-2]
      if(length(nt4)==1)break
    }
    #the smoothing matrix
    krn <- kernerleftch(utd,bwin,nt4)
  }



  #forming the new dataset
  if(p>0){
    whtemp <- data$stat==1&cause==2
    dataded <- data[data$stat==1&cause==2,]				#events with unknown cause
    datacens <- data[data$stat==0|cause<2,]				#censorings or known cause

    datacens$cause <- cause[data$stat==0|cause<2]*data$stat[data$stat==0|cause<2]

    databig <- lapply(dataded, rep, 2)
    databig <- do.call("data.frame", databig)
    databig$cause <- rep(2,nrow(databig))
    nded <- nrow(databig)
    databig$cens <- c(rep(1,nded/2),rep(0,nded/2))

    datacens$cens <- rep(0,nrow(datacens))
    datacens$cens[datacens$cause<2] <- datacens$cause[datacens$cause<2]
    names(datacens) <- names(databig)

    databig <- rbind(databig,datacens)

    cause <- cause[data$stat==1]

    #NEW IN 2.05 (next 4 lines)
    fk <- (attributes(ratetable)$factor != 1)
    nfk <- length(fk)
    varstart <- 3+nfk+1		#first column of covariates
    varstop <- 3+nfk+m		#last column of covariates
    #model matrix for relative survival
    xmat <- as.matrix(data[,varstart:varstop])		#NEW IN 2.05

    #ebx at initial values of b
    ebx <- as.vector(exp(xmat%*%b)) # exp(linear.predictor)

    #model matrix for coxph
    modmat <- as.matrix(databig[,varstart:varstop])		#NEW IN 2.05
    varnames <- names(data)[varstart:varstop]		#NEW IN 2.05
  }
  else{
    cause <- cause[data$stat==1]
    ebx <- rep(1,n) # exp(linear.predictor)
  }


  #for time-dependent data:
  starter <- sort(data$start)
  starter1<-c(starter[1],starter[-length(starter)])

  #the values of interest in the cumsums of the obsolete values (there is at least one value - the 1st)
  index <- c(TRUE,(starter!=starter1)[-1])

  starter <- starter[index]
  #the number of repetitions in each cumsum difference - needed for s0 calculation
  val1 <- apply(matrix(starter,ncol=1),1,function(x,Y)sum(x>=Y),data$Y)
  val1 <- c(val1[1],diff(val1),length(data$Y)-val1[length(val1)])

  eb <- ebx[data$stat==1]
  # s0 je sum_{at risk set} ebx
  s0 <- cumsum((ebx)[n:1])[n:1]

  ebx.st <- ebx[order(data$start)]
  s0.st <- ((cumsum(ebx.st[n:1]))[n:1])[index]
  s0.st <- rep(c(s0.st,0),val1)
  s0 <- s0 - s0.st


  #s0 only at times utd
  s0 <- s0[udtimes]


  #find the corresponding value of Y for each start!=0 - needed for likelihood calculation
  start <- data$start
  # if(any(start!=0)){
  #   wstart <- rep(NA,n)
  #   ustart <- unique(start[start!=0])
  #   for(its in ustart){
  #     wstart[start==its] <- min(which(data$Y==its))
  #   }
  # }


  #tale del je zelo sumljiv - kako se racuna likelihood za ties???
  difft <- c(data$Y[data$stat==1][1],diff(td))
  difft <- difftu <- difft[difft!=0]
  difft <- rep(difft,rutd)
  a0 <- a*difft



  if(sum(Nie==.5)!=0)maxit0 <- maxiter
  else maxit0<- maxiter - 3
  for(i in 1:maxit0){

    #Nie is of length ntd, should be nutd, with the values at times being the sum
    nietemp <- rep(1:nutd,rutd)
    Nies <- as.vector(by(Nie,nietemp,sum))  #shorter Nie - only at times utd

    lam0u <- lam0 <- Nies/s0
    #the smooting of lam0
    if(bwin[1]!=0)lam0s <- krn%*%lam0
    else lam0s <- lam0/difftu

    #extended to all event times
    lam0s <- rep(lam0s,rutd)


    #compute Nie, only for those with unknown hazard
    Nie[cause==2] <- as.vector(lam0s*eb/(a+lam0s*eb))[cause==2]

  }

  if(maxit0!=maxiter & i==maxit0) i <- maxiter
  #likelihood calculation - manjka ti se likelihood za nicelni model!!!
  #the cumulative hazard
  Lam0  <- cumsum(lam0)
  #extended to all event times
  Lam0 <- rep(Lam0,rutd)
  if(data$stat[1]==0) Lam0 <- c(0,Lam0)
  #extended to all exit times
  Lam0 <- rep(Lam0,rtd)

  #for time dependent covariates and left-truncated individuals: replace by the difference
  if(any(start!=0)){
    # Calculate hazards at non-event times:
    timehaz <- data.frame(time=sort(data$Y), Lam0_2=Lam0)
    timehaz_tmp <- data.frame(time=unique(data$start), Lam0_2=NA)
    timehaz <- rbind(timehaz, timehaz_tmp)
    timehaz <- timehaz[order(timehaz$time),]
    timehaz$Lam0_2 <- mstateNAfix(timehaz$Lam0_2, 0)
    timehaz <- timehaz[!duplicated(timehaz$time),]

    # Prepare object so that you can calculate Lam0_event_time - Lam0_entry_time
    data_lt <- cbind(data, Lam0, id_0=1:nrow(data))
    data_lt <- merge(data_lt, timehaz,
                     by.x='start', by.y='time', all.x = TRUE)
    data_lt <- data_lt[order(data_lt$id_0),]

    # Check:
    # if(any(data_lt$Lam0_2[start!=0] != Lam0[wstart[start!=0]])){
    #   browser()
    # }

    # Edit Lam0:
    Lam0[start!=0] <- data_lt$Lam0[start!=0] - data_lt$Lam0_2[start!=0]

    # Old calculation:
    # Lam0[start!=0] <- Lam0[start!=0] - Lam0[wstart[start!=0]]
  }

  lam0 <- rep(lam0,rutd)

  likely0 <- sum(log(a0 + lam0*eb)) - sum(data$ds + Lam0*ebx)
  likely <- likely0
  tempind <- Nie<=0|Nie>=1
  if(any(tempind)){
    if(any(Nie<=0))Nie[Nie<=0] <- tol
    if(any(Nie>=1))Nie[Nie>=1] <- 1-tol
  }

  if(p>0)databig$wei <- c(Nie[cause==2],1-Nie[cause==2],rep(1,nrow(datacens)))


  if(maxiter>=1&p!=0){
    for(i in 1:maxiter){

      if(p>0){
        b00<-b
        if(i==1)fit <- coxph(Surv(start,Y,cens)~modmat,data=databig,weights=databig$wei,init=b00,x=TRUE,iter.max=maxiter)
        else    fit <- coxph(Surv(start,Y,cens)~modmat,data=databig,weights=databig$wei,x=TRUE,iter.max=maxiter)

        if(any(is.na(fit$coeff))) stop("X matrix deemed to be singular, variable ",which(is.na(fit$coeff)))

        b <- fit$coeff

        ebx <- as.vector(exp(xmat%*%b))
      }
      else ebx <- rep(1,n)

      eb <- ebx[data$stat==1]

      # s0 je sum_{at risk set} ebx
      s0 <- cumsum((ebx)[n:1])[n:1]

      ebx.st <- ebx[order(data$start)]
      s0.st <- ((cumsum(ebx.st[n:1]))[n:1])[index]
      s0.st <- rep(c(s0.st,0),val1)
      s0 <- s0 - s0.st

      #Nie is of length ntd, should be nutd, with the values at times being the sum
      nietemp <- rep(1:nutd,rutd)
      Nies <- as.vector(by(Nie,nietemp,sum))  #shorter Nie - only at times utd
      #s0 only at times utd
      s0 <- s0[udtimes]

      lam0u <- lam0 <- Nies/s0


      #the cumulative hazard
      Lam0  <- cumsum(lam0)
      #extended to all event times
      Lam0 <- rep(Lam0,rutd)
      if(data$stat[1]==0) Lam0 <- c(0,Lam0)
      #extended to all exit times
      Lam0 <- rep(Lam0,rtd)
      # for time dependent covariates and left-truncated individuals: replace by the difference
      if(any(start!=0)){
        timehaz <- data.frame(time=sort(data$Y), Lam0_2=Lam0)
        timehaz_tmp <- data.frame(time=unique(data$start), Lam0_2=NA)
        timehaz <- rbind(timehaz, timehaz_tmp)
        timehaz <- timehaz[order(timehaz$time),]
        timehaz$Lam0_2 <- mstateNAfix(timehaz$Lam0_2, 0)
        timehaz <- timehaz[!duplicated(timehaz$time),]

        # Prepare object so that you can calculate Lam0_event_time - Lam0_entry_time
        data_lt <- cbind(data, Lam0, id_0=1:nrow(data))
        data_lt <- merge(data_lt, timehaz,
                         by.x='start', by.y='time', all.x = TRUE)
        data_lt <- data_lt[order(data_lt$id_0),]

        # Edit Lam0:
        Lam0[start!=0] <- data_lt$Lam0[start!=0] - data_lt$Lam0_2[start!=0]
        # Lam0[start!=0] <- Lam0[start!=0] - Lam0[wstart[start!=0]]
      }

      #the smooting of lam0
      if(bwin[1]!=0)lam0s <- krn%*%lam0
      else lam0s <- lam0/difft

      #extended to all event times
      lam0s <- rep(lam0s,rutd)

      #compute Nie, only for those with unknown hazard
      Nie[cause==2] <- as.vector(lam0s*eb/(a+lam0s*eb))[cause==2]

      #likelihood calculation - manjka ti se likelihood za nicelni model!!!

      lam0 <- rep(lam0,rutd)

      likely <- sum(log(a0 + lam0*eb)) - sum(data$ds + Lam0*ebx)

      if(p>0){
        tempind <- Nie<=0|Nie>=1
        if(any(tempind)){
          if(any(Nie<=0))Nie[Nie<=0] <- tol
          if(any(Nie>=1))Nie[Nie>=1] <- 1-tol
          #if(which(tempind)!=nev)warning("Weights smaller than 0")
          #if(any(is.na( match(which(tempind),c(1,nev)) )))browser()
        }
        if(nded==0) break()
        databig$wei[1:nded] <- c(Nie[cause==2],1-Nie[cause==2])
        bd <- abs(b-b00)
        if(max(bd)< tol) break()
      }
      #early stopping time for no covariates???
    }
  }
  iter <- i
  #if (maxiter > 1& iter>=maxiter)
  #        warning("Ran out of iterations and did not converge")
  if(p>0){
    if(nded!=0){
      resi <- resid(fit,type="schoenfeld")
      if(!is.null(dim(resi)))resi <- resi[1:(nded/2),]
      else resi <- resi[1:(nded/2)]
      swei <- fit$weights[1:(nded/2)]

      if(is.null(dim(resi))) fishem <- sum((resi^2*swei*(1-swei)))
      else {
        fishem <- apply(resi,1,function(x)outer(x,x))
        fishem <- t(t(fishem)*swei*(1-swei))
        fishem <- matrix(apply(fishem,1,sum),ncol=m)
      }
    }

    else fishem <- 0
    fishcox <- solve(fit$var)
    fisher <- fishcox - fishem
    fit$var <- solve(fisher)
    names(fit$coefficients)<-varnames
    fit$lambda0 <- lam0s
  }
  else fit <- list(lambda0 = lam0s)
  fit$lambda0 <- fit$lambda0[utimes]
  fit$Lambda0 <- Lam0[udtimes]
  fit$times <- utd
  fit$Nie <- Nie
  fit$bwin <- bwin
  fit$iter <- i
  class(fit) <- c("rsadd",class(fit))
  fit$loglik <- c(likely0,likely)
  fit$lam0.ns <- lam0u
  fit
}



em <- function (rform, init, control, bwin)
{
  data <- rform$data
  n <- nrow(data)
  p <- rform$m
  ord_id <- order(data$Y)
  rform$cause <- rform$cause[ord_id]
  data <- data[ord_id, ]
  fk <- (attributes(rform$ratetable)$factor != 1)
  nfk <- length(fk)
  nev <- length(data$Y[data$stat == 1])
  data$a <- rep(NA, n)
  xx <- exp.prep(data[, 4:(nfk + 3),drop=FALSE], data$Y - data$start, rform$ratetable)
  # The cumulative population hazard of dying at time Y:
  data$ds <- -log(xx)
  data1 <- data
  data1[, 4:(nfk + 3)] <- data[, 4:(nfk + 3)] + data$Y %*% t(fk)
  xx <- exp.prep(data1[data1$stat == 1, 4:(nfk + 3),drop=FALSE], 1, rform$ratetable)
  # The population hazard of dying in the following day (for individuals that had an event):
  data$a[data$stat == 1] <- -log(xx)

  if (p > 0) {
    if (!missing(init) && !is.null(init)) {
      if (length(init) != p)
        stop("Wrong length for inital values")
    }
    else init <- rep(0, p)
    beta <- matrix(init, p, 1)
  }
  pr.time<-proc.time()[3]

  Nie <- rep(.5,sum(data$stat==1))
  Nie[rform$cause[data$stat==1]<2] <-  rform$cause[data$stat==1][rform$cause[data$stat==1]<2]

  #NEW IN 2.05
  varstart <- 3+nfk+1		#first column of covariates
  varstop <- 3+nfk+p		#last column of covariates


  if(missing(bwin))bwin <- -1
  if(bwin<0){

    if(p>0)data1 <- data[,-c(varstart:varstop)]    		#NEW IN 2.05
    else  data1 <- data
    nfk <- length(attributes(rform$ratetable)$dimid)
    names(data)[4:(3+nfk)] <- attributes(rform$ratetable)$dimid
    expe <- rs.surv(Surv(Y,stat)~1,data,ratetable=rform$ratetable,method="ederer2")
    esurv <- -log(expe$surv[expe$n.event!=0])
    if(esurv[length(esurv)]==Inf)esurv[length(esurv)] <-  esurv[length(esurv)-1]
    x <- seq(.1,3,length=5)
    dif <- rep(NA,5)
    options(warn=-1)
    diter <- max(round(max(data$Y)/356.24),3)
    for(it in 1:5){
      fit <- rsfitterem(data1,NULL,diter,rform$ratetable,control$epsilon,x[it],0,rform$cause,Nie)
      dif[it] <- sum((esurv-fit$Lambda0)^2)
    }
    wh <- which.min(dif)
    if(wh==1)x <- seq(x[wh],x[wh+1]-.1,length=5)
    else if(wh==5)x <- c(x, max(data$Y)/ max(diff(data$Y)))
    if(wh!=1)
      x <- seq(x[wh-1]+.1,x[wh+1]-.1,length=5)
    dif <- rep(NA,5)

    for(it in 1:5){
      fit <- rsfitterem(data1,NULL,diter,rform$ratetable,control$epsilon,x[it],0,rform$cause,Nie)
      dif[it] <- sum((esurv-fit$Lambda0)^2)
    }
    options(warn=0)
    Nie <- fit$Nie
    bwin <- x[which.min(dif)]
  }

  fit <- rsfitterem(data, beta, control$maxit, rform$ratetable,
                    control$epsilon, bwin, p, rform$cause,Nie)

  Nie <- rep(0,nrow(data))
  Nie[data$stat==1] <- fit$Nie
  fit$Nie <- Nie[order(ord_id)]
  fit$bwin <- list(bwin=fit$bwin,bwinfac=bwin)
  fit
}




#' Fit an Additive model for Relative Survival
#'
#' The function fits an additive model to the data. The methods implemented are
#' the maximum likelihood method, the semiparametric method, a glm model with a
#' \code{binomial} error and a glm model with a \code{poisson} error.
#'
#' NOTE: The follow-up time must be specified in days. The \code{ratetable}
#' being used may have different variable names and formats than the user's
#' data set, this is dealt with by the \code{rmap} argument. For example, if
#' age is in years in the data set but in days in the \code{ratetable} object,
#' age=age*365.241 should be used. The calendar year can be in any date format
#' (date, Date and POSIXt are allowed), the date formats in the
#' \code{ratetable} and in the data may differ.
#'
#' The maximum likelihood method and both glm methods assume a fully parametric
#' model with a piecewise constant baseline excess hazard function. The
#' intervals on which the baseline is assumed constant should be passed via
#' argument \code{int}. The EM method is semiparametric, i.e. no assumptions
#' are made for the baseline hazard and therefore no intervals need to be
#' specified.
#'
#' The methods using glm are methods for grouped data. The groups are formed
#' according to the covariate values. This should be taken into account when
#' fitting a model. The glm method returns life tables for groups specified by
#' the covariates in \code{groups}.
#'
#' The EM method output includes the smoothed baseline excess hazard
#' \code{lambda0}, the cumulative baseline excess hazard \code{Lambda0} and
#' \code{times} at which they are estimated. The individual probabilites of
#' dying due to the excess risk are returned as \code{Nie}.  The EM method
#' fitting procedure requires some local smoothing of the baseline excess
#' hazard. The default \code{bwin=-1} value lets the function find an
#' appropriate value for the smoothing band width. While this ensures an
#' unbiased estimate, the procedure time is much longer. As the value found by
#' the function is independent of the covariates in the model, the value can be
#' read from the output (\code{bwinfac}) and used for refitting different
#' models to the same data to save time.
#'
#' @param formula a formula object, with the response as a \code{Surv} object
#' on the left of a \code{~} operator, and, if desired, terms separated by the
#' \code{+} operator on the right. \code{Surv(start,stop,event)} outcomes
#' are also possible for time-dependent covariates and left-truncation for
#' \code{method='EM'}.
#'
#' NOTE: The follow-up time must be in days.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, organized as a \code{ratetable}
#' object, such as \code{slopop}.
#' @param int either a single value denoting the number of follow-up years or a
#' vector specifying the intervals (in years) in which the hazard is constant
#' (the times that are bigger than \code{max(int)} are censored. If missing,
#' only one interval (from time 0 to maximum observation time) is assumed.  The
#' EM method does not need the intervals, only the maximum time can be
#' specified (all times are censored after this time point).
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used.  Default is
#' \code{options()$na.action}.
#' @param method \code{glm.bin} or \code{glm.poi} for a glm model, \code{EM}
#' for the EM algorithm and \code{max.lik} for the maximum likelihood model
#' (default).
#' @param init vector of initial values of the iteration.  Default initial
#' value is zero for all variables.
#' @param bwin controls the bandwidth used for smoothing in the EM algorithm.
#' The follow-up time is divided into quartiles and \code{bwin} specifies a
#' factor by which the maximum between events time length on each interval is
#' multiplied. The default \code{bwin=-1} lets the function find an appropriate
#' value. If \code{bwin=0}, no smoothing is applied.
#' @param centered if \code{TRUE}, all the variables are centered before
#' fitting and the baseline excess hazard is calculated accordingly. Default is
#' \code{FALSE}.
#' @param cause A vector of the same length as the number of cases. \code{0}
#' for population deaths, \code{1} for disease specific deaths, \code{2}
#' (default) for unknown. Can only be used with the \code{EM} method.
#' @param control a list of parameters for controlling the fitting process.
#' See the documentation for \code{glm.control} for details.
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details
#' below.
#' @param ... other arguments will be passed to \code{glm.control}.
#' @return An object of class \code{rsadd}. In the case of
#' \code{method="glm.bin"} and \code{method="glm.poi"} the class also inherits
#' from \code{glm} which inherits from the class \code{lm}. Objects of this
#' class have methods for the functions \code{print} and \code{summary}. An
#' object of class \code{rsadd} is a list containing at least the following
#' components: \item{data}{the data as used in the model, along with the
#' variables defined in the rate table} \item{ratetable}{the ratetable used.}
#' \item{int}{the maximum time (in years) used. All the events at and after
#' this value are censored.} \item{method}{the fitting method that was used.}
#' \item{linear.predictors}{the vector of linear predictors, one per subject.}
#' @seealso \code{\link{rstrans}}, \code{\link{rsmul}}
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#'
#' EM algorithm: Pohar Perme M., Henderson R., Stare, J. (2009) "An approach to
#' estimation in relative survival regression." Biostatistics, \bold{10}:
#' 136--146.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' #fit an additive model
#' #note that the variable year is given in days since 01.01.1960 and that
#' #age must be multiplied by 365.241 in order to be expressed in days.
#' fit <- rsadd(Surv(time,cens)~sex+as.factor(agegr)+ratetable(age=age*365.241),
#' 	    ratetable=slopop,data=rdata,int=5)
#'
#' #check the goodness of fit
#' rs.br(fit)
#'
#' #use the EM method and plot the smoothed baseline excess hazard
#' fit <- rsadd(Surv(time,cens)~sex+age,rmap=list(age=age*365.241),
#' 	 ratetable=slopop,data=rdata,int=5,method="EM")
#' sm <- epa(fit)
#' plot(sm$times,sm$lambda,type="l")
#'
rsadd <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,
                   int, na.action, method = "max.lik", init, bwin, centered = FALSE,
                   cause, control, rmap, ...)
{
  call <- match.call()
  if (missing(control))
    control <- glm.control(...)

  if(!missing(cause)){								#NEW: ce cause ne manjka, ga preverim in dodam kot spremenljivko
    if (length(cause) != nrow(data))
      stop("Length of cause does not match data dimensions")
    data$cause <- cause
    rform <- rformulate(formula, data, ratetable, na.action,
                        int, centered, cause)
  }
  else{ #no cause
    if (!missing(rmap)) {
      rmap <- substitute(rmap)
      #rform <- rformulate(formula,data, ratetable, na.action, rmap,int, centered)			#get the data ready
    }
    #else
    rform <- rformulate(formula,data, ratetable, na.action, rmap, int, centered)
  }

  if (method == "EM") {
    if (!missing(int)) {
      if (length(int) > 1 | any(int <= 0))
        stop("Invalid value of 'int'")
    }
  }
  else {
    if (missing(int))
      int <- c(0,ceiling(max(rform$Y/365.241)))
    if (length(int) == 1) {
      if (int <= 0)
        stop("The value of 'int' must be positive ")
      int <- 0:int
    }
    else if (int[1] != 0)
      stop("The first interval in 'int' must start with 0")
  }
  method <-  match.arg(method,c("glm.bin","glm.poi","max.lik","EM"))

  if (method == "glm.bin" | method == "glm.poi")
    fit <- glmxp(rform = rform, interval = int, method = method,
                 control = control)
  else if (method == "max.lik")
    fit <- maxlik(rform = rform, interval = int, init = init,
                  control = control)
  else if (method == "EM")
    fit <- em(rform, init, control, bwin)
  fit$call <- call
  fit$formula <- formula
  fit$data <- rform$data
  fit$ratetable <- rform$ratetable
  fit$n <- nrow(rform$data)
  if (length(rform$na.action))
    fit$na.action <- rform$na.action
  fit$y <- rform$Y.surv
  fit$method <- method
  if (method == "EM") {
    if (!missing(int))
      fit$int <- int
    else fit$int <- ceiling(max(rform$Y[rform$status == 1])/365.241)
    fit$terms <- rform$Terms
    if(centered)fit$mvalue <- rform$mvalue
  }
  if (method == "max.lik") {
    fit$terms <- rform$Terms
  }
  if (rform$m > 0)
    fit$linear.predictors <- as.matrix(rform$X) %*% fit$coef[1:ncol(rform$X)]
  fit
}


maxlik <- function (rform, interval, subset, init, control)
{
  data <- rform$data
  max.time <- max(data$Y)/365.241
  if (max.time < max(interval))
    interval <- interval[1:(sum(max.time > interval) + 1)]
  fk <- (attributes(rform$ratetable)$factor != 1)
  nfk <- length(fk)
  data <- cbind(data, offset = rform$offset)
  data <- survsplit(data, cut = interval[-1] * 365.241, end = "Y",
                    event = "stat", start = "start", episode = "epi", interval = interval)
  del <- which(data$start==data$Y)
  if(length(del))    data <- data[-del,]
  offset <- data$offset
  data$offset <- NULL
  d.int <- diff(interval)
  data[, 4:(nfk + 3)] <- data[, 4:(nfk + 3)] + data$start %*%
    t(fk)
  data$lambda <- rep(0, nrow(data))
  nsk <- nrow(data[data$stat == 1, ])
  xx <- exp.prep(data[data$stat == 1, 4:(nfk + 3),drop=FALSE] + (data[data$stat ==
                                                                        1, ]$Y - data[data$stat == 1, ]$start) %*% t(fk), 1,  rform$ratetable)
  data$lambda[data$stat == 1] <- -log(xx) * 365.241
  xx <- exp.prep(data[, 4:(nfk + 3),drop=FALSE], data$Y - data$start, rform$ratetable)
  data$epi <- NULL
  data$ds <- -log(xx)
  data$Y <- data$Y/365.241
  data$start <- data$start/365.241
  data <- data[, -(4:(3 + nfk))]
  intn <- length(interval[-1])
  m <- rform$m
  p <-  m + intn
  if (!missing(init) && !is.null(init)) {
    if (length(init) != p)
      stop("Wrong length for inital values")
  }
  else init <- rep(0, p)
  if(m>0){
    init0 <- init[-(1:m)]
    data1 <- data[,-(4:(3+m))]
  }
  else{
    init0 <- init
    data1 <- data
  }
  fit0 <- lik.fit(data1, 0, intn, init0,  control, offset)
  if(m>0){
    init[-(1:m)] <- fit0$coef
    fit <- lik.fit(data, m, intn, init,  control, offset)
  }
  else fit <- fit0
  fit$int <- interval
  class(fit) <- "rsadd"
  fit$times <- fit$int*365.241						#dodano za potrebe rs.surv.rsadd
  fit$Lambda0 <- cumsum(c(0, exp(fit$coef[(m+1):p])*diff(fit$int)  ))
  fit
}

lik.fit <- function (data, m, intn, init, control, offset)
{
  n <- dim(data)[1]
  varpos <- 4:(3 + m + intn)
  x <- data[, varpos]
  varnames <- names(data)[varpos]
  lbs <- names(x)
  x <- as.matrix(x)
  p <- length(varpos)
  d <- data$stat
  ds <- data$ds
  h <- data$lambda
  y <- data$Y - data$start
  maxiter <- control$maxit
  if (!missing(init) && !is.null(init)) {
    if (length(init) != p)
      stop("Wrong length for inital values")
  }
  else init <- rep(0, p)
  b <- matrix(init, p, 1)
  b0 <- b
  fit <- mlfit(b, p, x, offset, d, h, ds, y, maxiter, control$epsilon)
  if (maxiter > 1 & fit$nit >= maxiter) {
    values <- apply(data[data$stat==1,varpos,drop=FALSE],2,sum)			#NEW: deluje tudi, ce je ratetable eno-dimenzionalen
    problem <- which.min(values)
    outmes <- "Ran out of iterations and did not converge"
    if(values[problem]==0)tzero <- ""
    else tzero <- "only "
    if(values[problem]<5){
      if(!is.na(strsplit(names(values)[problem],"fu")[[1]][2]))outmes <- paste(outmes, "\n This may be due to the fact that there are ",tzero, values[problem], " events on interval",strsplit(names(values)[problem],"fu")[[1]][2],"\n You can use the 'int' argument to change the follow-up intervals in which the baseline excess hazard is assumed constant",sep="")
      else outmes <- paste(outmes, "\n This may be due to the fact that there are ",tzero, values[problem], " events for covariate value ",names(values)[problem],sep="")
    }
    warning(outmes)
  }
  b <- as.vector(fit$b)
  names(b) <- varnames
  fit <- list(coefficients = b, var = -solve(fit$sd), iter = fit$nit,
              loglik = fit$loglik)
  fit
}





#' Split a Survival Data Set at Specified Times
#'
#' Given a survival data set and a set of specified cut times, the function
#' splits each record into multiple records at each cut time. The new data set
#' is be in \code{counting process} format, with a start time, stop time, and
#' event status for each record. More general than \code{survSplit} as it also
#' works with the data already in the \code{counting process} format.
#'
#'
#' @param data data frame.
#' @param cut vector of timepoints to cut at.
#' @param end character string with name of event time variable.
#' @param event character string with name of censoring indicator.
#' @param start character string with name of start variable (will be created
#' if it does not exist).
#' @param id character string with name of new id variable to create
#' (optional).
#' @param zero If \code{start} doesn't already exist, this is the time that the
#' original records start. May be a vector or single value.
#' @param episode character string with name of new episode variable
#' (optional).
#' @param interval this argument is used by \code{max.lik} function
#' @return New, longer, data frame.
#' @seealso \code{\link{survSplit}}.
#' @keywords survival
survsplit <- function (data, cut, end, event, start, id = NULL, zero = 0,
                       episode = NULL, interval = NULL)
{
  ntimes <- length(cut)
  n <- nrow(data)
  p <- ncol(data)
  if (length(interval) > 0) {
    ntimes <- ntimes - 1
    sttime <- c(rep(0, n), rep(cut[-length(cut)], each = n))
    endtime <- rep(cut, each = n)
  }
  else {
    endtime <- rep(c(cut, Inf), each = n)
    sttime <- c(rep(0, n), rep(cut, each = n))
  }
  newdata <- lapply(data, rep, ntimes + 1)
  eventtime <- newdata[[end]]
  if (start %in% names(data))
    starttime <- newdata[[start]]
  else starttime <- rep(zero, length = (ntimes + 1) * n)
  starttime <- pmax(sttime, starttime)
  epi <- rep(0:ntimes, each = n)
  if (length(interval) > 0)
    status <- ifelse(eventtime <= endtime & eventtime >=
                       starttime, newdata[[event]], 0)
  else status <- ifelse(eventtime <= endtime & eventtime >
                          starttime, newdata[[event]], 0)
  endtime <- pmin(endtime, eventtime)
  if (length(interval) > 0)
    drop <- (starttime > endtime) | (starttime == endtime &
                                       status == 0)
  else drop <- starttime >= endtime
  newdata <- do.call("data.frame", newdata)
  newdata <- newdata[!drop, ]
  newdata[, start] <- starttime[!drop]
  newdata[, end] <- endtime[!drop]
  newdata[, event] <- status[!drop]
  if (!is.null(id))
    newdata[, id] <- rep(rownames(data), ntimes + 1)[!drop]
  fu <- NULL
  if (length(interval) > 2) {
    for (it in 1:length(interval[-1])) {
      drop1 <- sum(!drop[1:(it * n - n)])
      drop2 <- sum(!drop[(it * n - n + 1):(it * n)])
      drop3 <- sum(!drop[(it * n + 1):(length(interval[-1]) *
                                         n)])
      if (it == 1)
        fu <- cbind(fu, c(rep(1, drop2), rep(0, drop3)))
      else if (it == length(interval[-1]))
        fu <- cbind(fu, c(rep(0, drop1), rep(1, drop2)))
      else fu <- cbind(fu, c(rep(0, drop1), rep(1, drop2),
                             rep(0, drop3)))
    }
    fu <- as.data.frame(fu)
    names(fu) <- c(paste("fu [", interval[-length(interval)],
                         ",", interval[-1], ")", sep = ""))
    newdata <- cbind(newdata, fu)
  }
  else if (length(interval) == 2) {
    fu <- rep(1, sum(!drop))
    newdata <- cbind(newdata, fu)
    names(newdata)[ncol(newdata)] <- paste("fu [", interval[1],
                                           ",", interval[2], "]", sep = "")
  }
  if (!is.null(episode))
    newdata[, episode] <- epi[!drop]
  newdata
}


glmxp <- function (rform, data, interval, method, control)
{
  if (rform$m == 1)
    g <- as.integer(as.factor(rform$X[[1]]))
  else if (rform$m > 1) {
    gvar <- NULL
    for (i in 1:rform$m) {
      gvar <- append(gvar, rform$X[i])
    }
    tabgr <- as.data.frame(table(gvar))
    tabgr <- tabgr[, 1:rform$m]
    n.groups <- dim(tabgr)[1]
    mat <- do.call("data.frame", gvar)
    names(mat) <- names(tabgr)
    tabgr <- cbind(tabgr, g = as.numeric(row.names(tabgr)))
    mat <- cbind(mat, id = 1:rform$n)
    c <- merge(tabgr, mat)
    g <- c[order(c$id), rform$m + 1]
  }
  else g <- rep(1, rform$n)
  vg <- function(X) {
    n <- dim(X)[1]
    w <- sum((X$event == 0) & (X$fin == 1) & (X$y != 1))
    nd <- sum((X$event == 1) & (X$fin == 1))
    ps <- exp.prep(X[, 4:(nfk + 3),drop=FALSE], t.int, rform$ratetable)
    ld <- n - w/2
    lny <- log(sum(X$y))
    k <- t.int/365.241
    dstar <- sum(-log(ps)/k * X$y)
    ps <- mean(ps)
    if (rform$m == 0)
      data.rest <- X[1, 7 + nfk + rform$m, drop = FALSE]
    else data.rest <- X[1, c((3 + nfk + 1):(3 + nfk + rform$m),
                             7 + nfk + rform$m)]
    cbind(nd = nd, ld = ld, ps = ps, lny = lny, dstar = dstar,
          k = k, data.rest)
  }
  nint <- length(interval)
  if (nint < 2)
    stop("Illegal interval value")
  meje <- interval
  my.fun <- function(x) {
    if (x > 1) {
      x.t <- rep(1, floor(x))
      if (x - floor(x) > 0)
        x.t <- c(x.t, x - floor(x))
      x.t
    }
    else x
  }
  int <- apply(matrix(diff(interval), ncol = 1), 1, my.fun)
  if (is.list(int))
    int <- c(0, cumsum(do.call("c", int)))
  else int <- c(0, cumsum(int))
  int <- int * 365.241
  nint <- length(int)
  X <- cbind(rform$data, grupa = g)
  fk <- (attributes(rform$ratetable)$factor != 1)
  nfk <- length(fk)
  Z <- X[X$start >= int[2], ]
  nz <- dim(Z)[1]
  Z$fin <- rep(0, nz)
  Z$event <- rep(0, nz)
  Z$fu <- rep(0, nz)
  Z$y <- rep(0, nz)
  Z$origstart <- Z$start
  Z$xind <- rep(0, nz)
  if (nrow(Z) > 0)
    Z[, 4:(nfk + 3)] <- Z[, 4:(nfk + 3)] + matrix(Z$start,
                                                  ncol = nfk, byrow = FALSE, nrow = nrow(Z)) * matrix(fk,
                                                                                                      ncol = nfk, byrow = TRUE, nrow = nrow(Z))
  X <- X[X$start < int[2], ]
  X$fin <- (X$Y <= int[2])
  X$event <- X$fin * X$stat
  ford <- eval(substitute(paste("[", a, ",", b, "]", sep = ""),
                          list(a = meje[1], b = meje[2])))
  X$fu <- rep(ford, rform$n - nz)
  t.int <- int[2] - int[1]
  X$y <- (pmin(X$Y, int[2]) - X$start)/365.241
  X$origstart <- X$start
  X$xind <- rep(1, nrow(X))
  gr1 <- by(X, X$grupa, vg)
  grm1 <- do.call("rbind", gr1)
  X <- X[X$fin == 0, ]
  X$start <- rep(int[2], dim(X)[1])
  X <- rbind(X, Z[Z$start < int[3], ])
  Z <- Z[Z$start >= int[3], ]
  temp <- 0
  if (nint > 2) {
    for (i in 3:nint) {
      ni <- dim(X)[1]
      if (ni == 0) {
        temp <- 1
        break
      }
      X$fin <- X$Y <= int[i]
      X$event <- X$fin * X$stat
      l <- sum(int[i - 1] >= meje * 365.241)
      if(l==1)
        ftemp <- eval(substitute(paste("[", a, ",", b, "]", sep = ""),
                                 list(a = meje[l], b = meje[l + 1])))

      else
        ftemp <- eval(substitute(paste("(", a, ",", b, "]", sep = ""),
                                 list(a = meje[l], b = meje[l + 1])))
      ford <- c(ford, ftemp)
      X$fu <- rep(ford[i - 1], ni)
      t.int <- int[i] - int[i - 1]
      index <- X$origstart < int[i - 1]
      index1 <- as.logical(X$xind)
      if (sum(index) > 0)
        X[index, 4:(nfk + 3)] <- X[index, 4:(nfk + 3)] +
        matrix(fk * t.int, ncol = nfk, byrow = TRUE,
               nrow = sum(index))
      X$xind <- rep(1, nrow(X))
      X$y <- (pmin(X$Y, int[i]) - X$start)/365.241
      gr1 <- by(X, X$grupa, vg)
      grm1 <- rbind(grm1, do.call("rbind", gr1))
      X <- X[X$fin == 0, ]
      X$start <- rep(int[i], dim(X)[1])
      if (i == nint)
        break
      X <- rbind(X, Z[Z$start < int[i + 1], ])
      X <- X[X$start != X$Y, ]
      Z <- Z[Z$start >= int[i + 1], ]
    }
    l <- sum(int[i - temp] > meje * 365.241)
    interval <- meje[1:(l + 1)]
  }
  else interval <- meje[1:2]
  grm1$fu <- factor(grm1$fu, levels = unique(ford))
  if (method == "glm.bin") {
    ht <- binomial(link = cloglog)
    ht$link <- "Hakulinen-Tenkanen relative survival model"
    ht$linkfun <- function(mu) log(-log((1 - mu)/ps))
    ht$linkinv <- function(eta) 1 - exp(-exp(eta)) * ps
    ht$mu.eta <- function(eta) exp(eta) * exp(-exp(eta)) *
      ps
    .ps <- ps <- grm1$ps
    #assign(".ps", grm1$ps, envir = .GlobalEnv)
    # ht$initialize <- expression({
    #     n <- y[, 1] + y[, 2]
    #     y <- ifelse(n == 0, 0, y[, 1]/n)
    #     weights <- weights * n
    #     mustart <- (n * y + 0.01)/(n + 0.02)
    #     mustart[(1 - mustart)/data$ps >= 1] <- data$ps[(1 - mustart)/data$ps >=
    #         1] * 0.9
    # })
    if (any(grm1$ld - grm1$nd > grm1$ps * grm1$ld)) {
      n <- sum(grm1$ld - grm1$nd > grm1$ps * grm1$ld)
      g <- dim(grm1)[1]
      warnme <- paste("Observed number of deaths is smaller than the expected in ",
                      n, "/", g, " groups of patients", sep = "")
    }
    else warnme <- ""

    if (length(interval) == 2 & rform$m == 0)
      stop("No groups can be formed")
    if (length(interval) == 1 | length(table(grm1$fu)) ==
        1)
      grm1$fu <- as.integer(grm1$fu)

    y <- ifelse(grm1$ld == 0, 0, grm1$nd/grm1$ld)
    #weights <- weights * grm1$ld
    mustart <- (grm1$ld * y + 0.01)/(grm1$ld + 0.02)
    mustart[(1 - mustart)/grm1$ps >= 1] <- grm1$ps[(1 - mustart)/grm1$ps >=
                                                     1] * 0.9

    if (!length(rform$X))
      local.ht <- glm(cbind(nd, ld - nd) ~ -1 + fu + offset(log(k)),
                      data = grm1, family = ht,mustart=mustart)
    else {
      xmat <- as.matrix(grm1[, 7:(ncol(grm1) - 1)])

      local.ht <- glm(cbind(nd, ld - nd) ~ -1 + xmat +
                        fu + offset(log(k)), data = grm1, family = ht,mustart=mustart)
    }
    names(local.ht[[1]]) <- c(names(rform$X), paste("fu",
                                                    levels(grm1$fu)))
  }
  else if (method == "glm.poi") {
    pot <- poisson()
    pot$link <- "glm relative survival model with Poisson error"
    pot$linkfun <- function(mu) log(mu - dstar)
    pot$linkinv <- function(eta) dstar + exp(eta)
    #assign(".dstar", grm1$dstar, envir = .GlobalEnv)
    if (any(grm1$nd - grm1$dstar < 0)) {
      pot$initialize <- expression({
        if (any(y < 0)) stop(paste("Negative values not allowed for",
                                   "the Poisson family"))
        n <- rep.int(1, nobs)
        #mustart <- pmax(y, .dstar) + 0.1
      })
    }
    if (any(grm1$nd - grm1$dstar < 0)) {
      n <- sum(grm1$nd - grm1$dstar < 0)
      g <- dim(grm1)[1]
      warnme <- paste("Observed number of deaths is smaller than the expected in ",
                      n, "/", g, " groups of patients", sep = "")
    }
    else warnme <- ""
    dstar <- grm1$dstar
    if (length(interval) == 2 & rform$m == 0)
      stop("No groups can be formed")
    if (length(interval) == 1 | length(table(grm1$fu)) ==
        1)
      grm1$fu <- as.integer(grm1$fu)

    mustart <- pmax(grm1$nd, grm1$dstar) + 0.1
    if (!length(rform$X))
      local.ht <- glm(nd ~ -1 + fu, data = grm1, family = pot,
                      offset = grm1$lny,mustart=mustart)
    else {
      xmat <- as.matrix(grm1[, 7:(ncol(grm1) - 1)])
      local.ht <- glm(nd ~ -1 + xmat + fu, data = grm1,
                      family = pot, offset = grm1$lny,mustart=mustart)
    }
    names(local.ht[[1]]) <- c(names(rform$X), paste("fu",
                                                    levels(grm1$fu)))
  }
  else stop(paste("Method '", method, "' not a valid method",
                  sep = ""))
  class(local.ht) <- c("rsadd", class(local.ht))
  local.ht$warnme <- warnme
  local.ht$int <- interval
  local.ht$groups <- local.ht$data
  return(local.ht)
}



#' Calculate Residuals for a "rsadd" Fit
#'
#' Calculates partial residuals for an additive relative survival model.
#'
#'
#' @param object an object inheriting from class \code{rsadd}, representing a
#' fitted additive relative survival model. Typically this is the output from
#' the \code{rsadd} function.
#' @param type character string indicating the type of residual desired.
#' Currently only Schoenfeld residuals are implemented.
#' @param ... other arguments.
#' @return A list of the following values is returned: \item{res}{a matrix
#' containing the residuals for each variable.} \item{varr}{the variance for
#' each residual} \item{varr1}{the sum of \code{varr}.} \item{kvarr}{the
#' derivative of each residual, to be used in \code{rs.zph} function.}
#' \item{kvarr1}{the sum of \code{kvarr}.}
#' @seealso \code{\link{rsadd}}.
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#'
#' Goodness of fit: Stare J.,Pohar Perme M., Henderson R. (2005) "Goodness of
#' fit of relative survival models." Statistics in Medicine, \bold{24}:
#' 3911--3925.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' fit <- rsadd(Surv(time,cens)~sex,rmap=list(age=age*365.241),
#'        ratetable=slopop,data=rdata,int=5)
#' sresid <- residuals.rsadd(fit)
#'
residuals.rsadd <- function (object, type = "schoenfeld", ...)
{
  data <- object$data[order(object$data$Y), ]
  ratetable <- object$ratetable
  beta <- object$coef
  start <- data[, 1]
  stop <- data[, 2]
  event <- data[, 3]
  fk <- (attributes(ratetable)$factor != 1)
  nfk <- length(fk)
  n <- nrow(data)
  scale <- 1
  if (object$method == "EM")
    scale <- 365.241
  m <- ncol(data)
  rem <- m - nfk - 3
  interval <- object$int
  int <- ceiling(max(interval))
  R <- data[, 4:(nfk + 3)]
  lp <- matrix(-log(exp.prep(as.matrix(R), 365.241, object$ratetable))/scale, ncol = 1)
  fu <- NULL
  if (object$method == "EM") {
    death.time <- stop[event == 1]
    for (it in 1:int) {
      fu <- as.data.frame(cbind(fu, as.numeric(death.time/365.241 <
                                                 it & (death.time/365.241) >= (it - 1))))
    }
    if(length(death.time)!=length(unique(death.time))){
      utimes <- which(c(1,diff(death.time))!=0)
      razteg <- function(x){
        # x is a 0/1 vector, the output is a vector of length sum(x), with the corresponding rep numbers
        n <- length(x)
        repu <- rep(1,n)
        repu[x==1] <- 0
        repu <- rev(cumsum(rev(repu)))
        repu <- repu[x==1]
        repu <- -diff(c(repu,0))+1
        if(sum(repu)!=n)repu <- c(n-sum(repu),repu)  #ce je prvi cas censoring, bo treba se kej narest??
        repu
      }
      rutd <- rep(0,length(death.time))
      rutd[utimes] <- 1
      rutd <- razteg(rutd)				#from  unique event times to event times
    }
    else rutd <- rep(1,length(death.time))
    lambda0 <- rep(object$lambda0,rutd)
  }
  else {
    pon <- NULL
    for (i in 1:(length(interval) - 1)) {
      width <- ceiling(interval[i + 1]) - floor(interval[i])
      lo <- interval[i]
      hi <- min(interval[i + 1], floor(interval[i]) + 1)
      for (j in 1:width) {
        fu <- as.data.frame(cbind(fu, as.numeric(stop/365.241 <
                                                   hi & stop/365.241 >= lo)))
        names(fu)[ncol(fu)] <- paste("fu", lo, "-", hi,
                                     sep = "")
        if (j == width) {
          pon <- c(pon, sum(fu[event == 1, (ncol(fu) -
                                              width + 1):ncol(fu)]))
          break()
        }
        else {
          lo <- hi
          hi <- min(interval[i + 1], floor(interval[i]) +
                      1 + j)
        }
      }
    }
    m <- ncol(data)
    data <- cbind(data, fu)
    rem <- m - nfk - 3
    lambda0 <- rep(exp(beta[rem + 1:(length(interval) - 1)]),
                   pon)
    fu <- fu[event == 1, , drop = FALSE]
    beta <- beta[1:rem]
  }
  if (int >= 2) {
    for (j in 2:int) {
      R <- R + matrix(fk * 365.241, ncol = ncol(R), byrow = TRUE,
                      nrow = n)
      xx <- exp.prep(R, 365.241, object$ratetable)
      lp <- cbind(lp, -log(xx)/scale)
    }
  }
  z <- as.matrix(data[, (4 + nfk):m])
  out <- resid.com(start, stop, event, z, beta, lp, lambda0,
                   fu, n, rem, int, type)
  out
}

resid.com <- function (start, stop, event, z, beta, lp, lambda0, fup, n, rem,
                       int, type)
{
  le <- exp(z %*% beta)
  olp <- if (int > 1)
    apply(lp[n:1, ], 2, cumsum)[n:1, ]
  else matrix(cumsum(lp[n:1])[n:1], ncol = 1)
  ole <- cumsum(le[n:1])[n:1]
  lp.st <- lp[order(start), , drop = FALSE]
  le.st <- le[order(start), , drop = FALSE]
  starter <- sort(start)
  starter1 <- c(starter[1], starter[-length(starter)])
  index <- c(TRUE, (starter != starter1)[-1])
  starter <- starter[index]
  val1 <- apply(matrix(starter, ncol = 1), 1, function(x, Y) sum(x >=
                                                                   Y), stop)
  val1 <- c(val1[1], diff(val1), length(stop) - val1[length(val1)])
  olp.st <- (apply(lp.st[n:1, , drop = FALSE], 2, cumsum)[n:1,
                                                          , drop = FALSE])[index, , drop = FALSE]
  olp.st <- apply(olp.st, 2, function(x) rep(c(x, 0), val1))
  olp <- olp - olp.st
  olp <- olp[event == 1, ]
  olp <- apply(fup * olp, 1, sum)
  ole.st <- cumsum(le.st[n:1])[n:1][index]
  ole.st <- rep(c(ole.st, 0), val1)
  ole <- ole - ole.st
  ole <- ole[event == 1] * lambda0
  s0 <- ole + olp
  sc <- NULL
  zb <- NULL
  kzb <- NULL
  f1 <- function(x) rep(mean(x), length(x))
  f2 <- function(x) apply(x, 2, f1)
  f3 <- function(x) apply(x, 1:2, f1)
  ties <- length(unique(stop[event == 1])) != length(stop[event ==
                                                            1])
  for (k in 1:rem) {
    zlp <- apply((z[, k] * lp)[n:1, , drop = FALSE], 2, cumsum)[n:1,
                                                                , drop = FALSE]
    zlp.st <- (apply((z[, k] * lp.st)[n:1, , drop = FALSE],
                     2, cumsum)[n:1, , drop = FALSE])[index, , drop = FALSE]
    zlp.st <- apply(zlp.st, 2, function(x) rep(c(x, 0), val1))
    zlp <- zlp - zlp.st
    zlp <- zlp[event == 1, , drop = FALSE]
    zlp <- apply(fup * zlp, 1, sum)
    zle <- cumsum((z[, k] * le)[n:1])[n:1]
    zle.st <- cumsum((z[, k] * le.st)[n:1])[n:1][index]
    zle.st <- rep(c(zle.st, 0), val1)
    zle <- zle - zle.st
    zle <- zle[event == 1]
    zle <- zle * lambda0
    s1 <- zle + zlp
    zb <- cbind(zb, s1/s0)
    kzb <- cbind(kzb, zle/s0)
  }
  s1ties <- cbind(zb, kzb)
  if (ties) {
    s1ties <- by(s1ties, stop[event == 1], f2)
    s1ties <- do.call("rbind", s1ties)
  }
  zb <- s1ties[, 1:rem, drop = FALSE]
  kzb <- s1ties[, -(1:rem), drop = FALSE]
  sc <- z[event == 1, , drop = FALSE] - zb
  row.names(sc) <- stop[event == 1]
  out.temp <- function(x) outer(x, x, FUN = "*")
  krez <- rez <- array(matrix(NA, ncol = rem, nrow = rem),
                       dim = c(rem, rem, sum(event == 1)))
  for (a in 1:rem) {
    for (b in a:rem) {
      zzlp <- apply((z[, a] * z[, b] * lp)[n:1, , drop = FALSE],
                    2, cumsum)[n:1, , drop = FALSE]
      zzlp.st <- (apply((z[, a] * z[, b] * lp.st)[n:1,
                                                  , drop = FALSE], 2, cumsum)[n:1, , drop = FALSE])[index,
                                                                                                    , drop = FALSE]
      zzlp.st <- apply(zzlp.st, 2, function(x) rep(c(x,
                                                     0), val1))
      zzlp <- zzlp - zzlp.st
      zzlp <- zzlp[event == 1, , drop = FALSE]
      zzlp <- apply(fup * zzlp, 1, sum)
      zzle <- cumsum((z[, a] * z[, b] * le)[n:1])[n:1]
      zzle.st <- cumsum((z[, a] * z[, b] * le.st)[n:1])[n:1][index]
      zzle.st <- rep(c(zzle.st, 0), val1)
      zzle <- zzle - zzle.st
      zzle <- zzle[event == 1]
      zzle <- zzle * lambda0
      s2 <- zzlp + zzle
      s20 <- s2/s0
      ks20 <- zzle/s0
      s2ties <- cbind(s20, ks20)
      if (ties) {
        s2ties <- by(s2ties, stop[event == 1], f2)
        s2ties <- do.call("rbind", s2ties)
      }
      rez[a, b, ] <- rez[b, a, ] <- s2ties[, 1]
      krez[a, b, ] <- krez[b, a, ] <- s2ties[, 2]
    }
  }
  juhu <- apply(zb, 1, out.temp)
  if (is.null(dim(juhu)))
    juhu1 <- array(data = matrix(juhu, ncol = a), dim = c(a,
                                                          a, length(zb[, 1])))
  else juhu1 <- array(data = apply(juhu, 2, matrix, ncol = a),
                      dim = c(a, a, length(zb[, 1])))
  varr <- rez - juhu1
  kjuhu <- apply(cbind(zb, kzb), 1, function(x) outer(x[1:rem],
                                                      x[-(1:rem)], FUN = "*"))
  if (is.null(dim(kjuhu)))
    kjuhu1 <- array(data = matrix(kjuhu, ncol = rem), dim = c(rem,
                                                              rem, length(zb[, 1])))
  else kjuhu1 <- array(data = apply(kjuhu, 2, matrix, ncol = rem),
                       dim = c(rem, rem, length(zb[, 1])))
  kvarr <- krez - kjuhu1
  for (i in 1:dim(varr)[1]) varr[i, i, which(varr[i, i, ] <
                                               0)] <- 0
  for (i in 1:dim(kvarr)[1]) kvarr[i, i, which(kvarr[i, i,
  ] < 0)] <- 0
  varr1 <- apply(varr, 1:2, sum)
  kvarr1 <- apply(kvarr, 1:2, sum)
  if (type == "schoenfeld")
    out <- list(res = sc, varr1 = varr1, varr = varr, kvarr = kvarr,
                kvarr1 = kvarr1)
  out
}






#' Test the Proportional Hazards Assumption for Relative Survival Regression
#' Models
#'
#' Test the proportional hazards assumption for relative survival models
#' (\code{rsadd}, \code{rsmul} or \code{rstrans}) by forming a Brownian Bridge.
#'
#'
#' @aliases rs.br plot.rs.br print.rs.br
#' @param fit the result of fitting a relative survival model, using the
#' \code{rsadd}, \code{rsmul} or \code{rstrans} function.
#' @param sc partial residuals calculated by the \code{resid} function. This is
#' used to save time if several tests are to be calculated on these residuals
#' and can otherwise be omitted.
#' @param rho a number controlling the weigths of residuals. The weights are
#' the number of individuals at risk at each event time to the power
#' \code{rho}. The default is \code{rho=0}, which sets all weigths to 1.
#' @param test a character string specifying the test to be performed on
#' Brownian bridge. Possible values are \code{"max"} (default), which tests the
#' maximum absolute value of the bridge, and \code{cvm}, which calculates the
#' Cramer Von Mises statistic.
#' @param global should a global Brownian bridge test be performed, in addition
#' to the per-variable tests
#' @return an object of class \code{rs.br}. This function would usually be
#' followed by both a print and a plot of the result. The plot gives a Brownian
#' bridge for each of the variables. The horizontal lines are the 95% and 99%
#' confidence intervals for the maximum absolute value of the Brownian bridge
#' @seealso \code{\link{rsadd}}, \code{rsmul}, \code{rstrans},
#' \code{\link{resid}}.
#' @references Goodness of fit: Stare J.,Pohar Perme M., Henderson R. (2005)
#' "Goodness of fit of relative survival models." Statistics in Medicine,
#' \bold{24}: 3911--3925.
#'
#' Package. Pohar M., Stare J. (2006) "Relative survival analysis in R."
#' Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' fit <- rsadd(Surv(time,cens)~sex,rmap=list(age=age*365.241),
#' 		ratetable=slopop,data=rdata,int=5)
#' rsbr <- rs.br(fit)
#' rsbr
#' plot(rsbr)
#'
rs.br <- function (fit, sc, rho = 0, test = "max", global = TRUE)
{
  test <- match.arg(test,c("max","cvm"))
  if (inherits(fit, "rsadd")) {
    if (missing(sc))
      sc <- resid(fit, "schoenfeld")
    sresid <- sc$res
    varr <- sc$varr
    sresid <- as.matrix(sresid)
  }
  else {
    coef <- fit$coef
    options(warn = -1)
    sc <- coxph.detail(fit)
    options(warn = 0)
    sresid <- sc$score
    varr <- sc$imat
    if (is.null(dim(varr)))
      varr <- array(varr, dim = c(1, 1, length(varr)))
    sresid <- as.matrix(sresid)
  }
  if (inherits(fit, "coxph")) {
    if(is.null(fit$data)){
      temp <- fit$y
      class(temp) <- "matrix"
      if(ncol(fit$y)==2)temp <- data.frame(rep(0,nrow(fit$y)),temp)
      if(is.null(fit$x))stop("The coxph model should be called with x=TRUE argument")
      fit$data <- data.frame(temp,fit$x)
      names(fit$data)[1:3] <- c("start","Y","stat")
    }
  }
  data <- fit$data[order(fit$data$Y), ]
  time <- data$Y[data$stat == 1]
  ties <- (length(unique(time)) != length(time))
  keep <- 1:(ncol(sresid))
  options(warn = -1)
  scaled <- NULL
  varnova <- NULL
  if (ncol(sresid) == 1) {
    varr <- varr[1, 1, ]
    scaled <- sresid/sqrt(varr)
  }
  else { for (i in 1:ncol(sresid)) varnova <- cbind(varnova,varr[i,i,])
  scaled <- sresid/sqrt(varnova)
  }

  options(warn = 0)
  nvar <- ncol(sresid)
  survfit <- getFromNamespace("survfit", "survival")
  temp <- survfit(fit$y~1, type = "kaplan-meier")
  n.risk <- temp$n.risk
  n.time <- temp$time
  if (temp$type == "right") {
    cji <- matrix(fit$y, ncol = 2)
    n.risk <- n.risk[match(cji[cji[, 2] == 1, 1], n.time)]
  }
  else {
    cji <- matrix(fit$y, ncol = 3)
    n.risk <- n.risk[match(cji[cji[, 3] == 1, 2], n.time)]
  }
  n.risk <- sort(n.risk, decreasing = TRUE)
  varnames <- names(fit$coef)[keep]
  u2 <- function(bb) {
    n <- length(bb)
    1/n * (sum(bb^2) - sum(bb)^2/n)
  }
  wc <- function(x, k = 1000) {
    a <- 1
    for (i in 1:k) a <- a + 2 * (-1)^i * exp(-2 * i^2 * pi^2 *
                                               x)
    a
  }
  brp <- function(x, n = 1000) {
    a <- 1
    for (i in 1:n) a <- a - 2 * (-1)^(i - 1) * exp(-2 * i^2 *
                                                     x^2)
    a
  }
  global <- as.numeric(global & ncol(sresid) > 1)
  table <- NULL
  bbt <- as.list(1:(nvar + global))
  for (i in 1:nvar) {
    if (nvar != 1)
      usable <- which(varr[i, i, ] > 1e-12)
    else usable <- which(varr > 1e-12)
    w <- (n.risk[usable])^rho
    w <- w/sum(w)
    if (nvar != 1) {
      sci <- scaled[usable, i]
    }
    else sci <- scaled[usable]
    if (ties) {
      if (inherits(fit, "rsadd")) {
        sci <- as.vector(by(sci, time[usable], function(x) sum(x)/sqrt(length(x))))
        w <- as.vector(by(w, time[usable], sum))
      }
      else {
        w <- w * as.vector(table(time))[usable]
        w <- w/sum(w)
      }
    }
    sci <- sci * sqrt(w)
    timescale <- cumsum(w)
    bm <- cumsum(sci)
    bb <- bm - timescale * bm[length(bm)]
    if (test == "max")
      table <- rbind(table, c(max(abs(bb)), 1 - brp(max(abs(bb)))))
    else if (test == "cvm")
      table <- rbind(table, c(u2(bb), 1 - wc(u2(bb))))
    bbt[[i]] <- cbind(timescale, bb)
  }
  if (inherits(fit, "rsadd")) {
    beta <- fit$coef[1:(length(fit$coef) - length(fit$int) +  1)]
  }
  else beta <- fit$coef
  if (global) {
    qform <- function(matrix, vector) t(vector) %*% matrix %*%
      vector
    diagonal <- apply(varr, 3, diag)
    sumdiag <- apply(diagonal, 2, sum)
    usable <- which(sumdiag > 1e-12)
    score <- t(beta) %*% t(sresid[usable, ])
    varr <- varr[, , usable]
    qf <- apply(varr, 3, qform, vector = beta)
    w <- (n.risk[usable])^rho
    w <- w/sum(w)
    sci <- score/(qf)^0.5
    if (ties) {
      if (inherits(fit, "rsadd")) {
        sci <- as.vector(by(t(sci), time[usable], function(x) sum(x)/sqrt(length(x))))
        w <- as.vector(by(w, time[usable], sum))
      }
      else {
        w <- w * as.vector(table(time))
        w <- w/sum(w)
      }
    }
    sci <- sci * sqrt(w)
    timescale <- cumsum(w)
    bm <- cumsum(sci)
    bb <- bm - timescale * bm[length(bm)]
    if (test == "max")
      table <- rbind(table, c(max(abs(bb)), 1 - brp(max(abs(bb)))))
    else if (test == "cvm")
      table <- rbind(table, c(u2(bb), 1 - wc(u2(bb))))
    bbt[[nvar + 1]] <- cbind(timescale, bb)
    varnames <- c(varnames, "GLOBAL")
  }
  dimnames(table) <- list(varnames, c(test, "p"))
  out <- list(table = table, bbt = bbt, rho = rho)
  class(out) <- "rs.br"
  out
}



#' Behaviour of Covariates in Time for Relative Survival Regression Models
#'
#' Calculates the scaled partial residuals of a relative survival model
#' (\code{rsadd}, \code{rsmul} or \code{rstrans})
#'
#'
#' @param fit the result of fitting an additive relative survival model, using
#' the \code{rsadd}, \code{rsmul} or \code{rstrans} function.
#'
#' In the case of multiplicative and transformation models the output is
#' identical to \code{cox.zph} function, except no test is performed.
#' @param sc partial residuals calculated by the \code{resid} function. This is
#' used to save time if several tests are to be calculated on these residuals
#' and can otherwise be omitted.
#' @param transform a character string specifying how the survival times should
#' be transformed. Possible values are \code{"km"}, \code{"rank"},
#' \code{"identity"} and \code{log}. The default is \code{"identity"}.
#' @param var.type a character string specifying the variance used to scale the
#' residuals. Possible values are \code{"each"}, which estimates the variance
#' for each residual separately, and \code{sum}(default), which assumes the
#' same variance for all the residuals.
#' @return an object of class \code{rs.zph}. This function would usually be
#' followed by a plot of the result. The plot gives an estimate of the
#' time-dependent coefficient \code{beta(t)}. If the proportional hazards
#' assumption is true, \code{beta(t)} will be a horizontal line.
#' @seealso \code{\link{rsadd}}, \code{rsmul}, \code{rstrans},
#' \code{\link{resid}}, \code{\link{cox.zph}}.
#' @references Goodness of fit: Stare J.,Pohar Perme M., Henderson R. (2005)
#' "Goodness of fit of relative survival models." Statistics in Medicine,
#' \bold{24}: 3911--3925.
#'
#' Package. Pohar M., Stare J. (2006) "Relative survival analysis in R."
#' Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' fit <- rsadd(Surv(time,cens)~sex,rmap=list(age=age*365.241),
#' 	ratetable=slopop,data=rdata,int=5)
#' rszph <- rs.zph(fit)
#' plot(rszph)
#'
rs.zph <- function (fit, sc, transform = "identity", var.type = "sum")
{
  if (inherits(fit, "rsadd")) {
    if (missing(sc))
      sc <- resid(fit, "schoenfeld")
    sresid <- sc$res
    varr <- sc$kvarr
    fvar <- solve(sc$kvarr1)
    sresid <- as.matrix(sresid)
  }
  else {
    coef <- fit$coef
    options(warn = -1)
    sc <- coxph.detail(fit)
    options(warn = 0)
    sresid <- as.matrix(resid(fit, "schoenfeld"))
    varr <- sc$imat
    fvar <- fit$var
  }
  data <- fit$data[order(fit$data$Y), ]
  time <- data$Y
  stat <- data$stat

  if (!inherits(fit, "rsadd")) {
    ties <- as.vector(table(time[stat==1]))
    if(is.null(dim(varr))) varr <- rep(varr/ties,ties)
    else{
      varr <- apply(varr,1:2,function(x)rep(x/ties,ties))
      varr <- aperm(varr,c(2,3,1))
    }
  }
  keep <- 1:(length(fit$coef) - length(fit$int) + 1)
  varnames <- names(fit$coef)[keep]
  nvar <- length(varnames)
  ndead <- length(sresid)/nvar
  if (inherits(fit, "rsadd"))
    times <- time[stat == 1]
  else times <- sc$time
  if (is.character(transform)) {
    tname <- transform
    ttimes <- switch(transform, identity = times, rank = rank(times),
                     log = log(times), km = {
                       fity <- Surv(time, stat)
                       temp <- survfit(fity~1)
                       t1 <- temp$surv[temp$n.event > 0]
                       t2 <- temp$n.event[temp$n.event > 0]
                       km <- rep(c(1, t1), c(t2, 0))
                       if (is.null(attr(sresid, "strata")))
                         1 - km
                       else (1 - km[sort.list(sort.list(times))])
                     }, stop("Unrecognized transform"))
  }
  else {
    tname <- deparse(substitute(transform))
    ttimes <- transform(times)
  }
  if (var.type == "each") {
    invV <- apply(varr, 3, function(x) try(solve(x), silent = TRUE))
    if (length(invV) == length(varr)){
      if(!is.numeric(invV)){
        usable <- rep(FALSE, dim(varr)[3])
        options(warn=-1)
        invV <- as.numeric(invV)
        usable[1:(min(which(is.na(invV)))-1)] <- TRUE
        invV <- invV[usable]
        sresid <- sresid[usable,,drop=FALSE]
        options(warn=0)
      }
      else usable <- rep(TRUE, dim(varr)[3])
    }
    else {
      usable <- unlist(lapply(invV, is.matrix))
      if (!any(usable))
        stop("All the matrices are singular")
      invV <- invV[usable]
      sresid <- sresid[usable, , drop = FALSE]
    }
    di1 <- dim(varr)[1]
    di3 <- sum(usable)
    u <- array(data = matrix(unlist(invV), ncol = di1), dim = c(di1,
                                                                di1, di3))
    uv <- cbind(matrix(u, ncol = di1, byrow = TRUE), as.vector(t(sresid)))
    uv <- array(as.vector(t(uv)), dim = c(di1 + 1, di1, di3))
    r2 <- t(apply(uv, 3, function(x) x[1:di1, ] %*% x[di1 +
                                                        1, ]))
    r2 <- matrix(r2, ncol = di1)
    whr2 <-  apply(r2<100,1,function(x)!any(x==FALSE))
    usable <- as.logical(usable*whr2)
    r2 <- r2[usable,,drop=FALSE]
    u <- u[,,usable]
    dimnames(r2) <- list(times[usable], varnames)
    temp <- list(x = ttimes[usable], y = r2 + outer(rep(1,
                                                        sum(usable)), fit$coef[keep]), var = u, call = call,
                 transform = tname)
  }
  else if (var.type == "sum") {
    xx <- ttimes - mean(ttimes)
    r2 <- t(fvar %*% t(sresid) * ndead)
    r2 <- as.matrix(r2)
    dimnames(r2) <- list(times, varnames)
    temp <- list(x = ttimes, y = r2 + outer(rep(1, ndead),
                                            fit$coef[keep]), var = fvar, transform = tname)
  }
  else stop("Unknown 'var.type'")
  class(temp) <- "rs.zph"
  temp
}



#' Graphical Inspection of Proportional Hazards Assumption in Relative Survival
#' Models
#'
#' Displays a graph of the scaled partial residuals, along with a smooth curve.
#'
#'
#' @param x result of the \code{rs.zph} function.
#' @param resid a logical value, if \code{TRUE} the residuals are included on
#' the plot, as well as the smooth fit.
#' @param df the degrees of freedom for the fitted natural spline, \code{df=2}
#' leads to a linear fit.
#' @param nsmo number of points used to plot the fitted spline.
#' @param var the set of variables for which plots are desired.  By default,
#' plots are produced in turn for each variable of a model.  Selection of a
#' single variable allows other features to be added to the plot, e.g., a
#' horizontal line at zero or a main title.
#' @param cex a numerical value giving the amount by which plotting text and
#' symbols should be scaled relative to the default.
#' @param add logical, if \code{TRUE} the plot is added to an existing plot
#' @param col a specification for the default plotting color.
#' @param lty the line type.
#' @param xlab x axis label.
#' @param ylab y axis label.
#' @param xscale units for x axis, default is 1, i.e. days.
#' @param ... Additional arguments passed to the \code{plot} function.
#' @seealso \code{\link{rs.zph}}, \code{\link{plot.cox.zph}}.
#' @references Goodness of fit: Stare J.,Pohar Perme M., Henderson R. (2005)
#' "Goodness of fit of relative survival models." Statistics in Medicine,
#' \bold{24}: 3911-3925.
#'
#' Package: Pohar M., Stare J. (2006) "Relative survival analysis in R."
#' Computer Methods and Programs in Biomedicine, \bold{81}: 272-278.
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741-1749, 2007.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' fit <- rsadd(Surv(time,cens)~sex+as.factor(agegr),rmap=list(age=age*365.241),
#'              ratetable=slopop,data=rdata,int=5)
#' rszph <- rs.zph(fit)
#' plot(rszph)
#'
plot.rs.zph <- function (x,resid = TRUE, df = 4, nsmo = 40, var, cex = 1,  add = FALSE, col = 1,
                         lty = 1, xlab, ylab, xscale = 1, ...)
{
  #require(splines)
  xx <- x$x
  if(x$transform=="identity")xx <- xx/xscale
  yy <- x$y
  d <- nrow(yy)
  df <- max(df)
  nvar <- ncol(yy)
  pred.x <- seq(from = min(xx), to = max(xx), length = nsmo)
  temp <- c(pred.x, xx)
  lmat <- splines::ns(temp, df = df, intercept = TRUE)
  pmat <- lmat[1:nsmo, ]
  xmat <- lmat[-(1:nsmo), ]
  qmat <- qr(xmat)
  if (missing(ylab))
    ylab <- paste("Beta(t) for", dimnames(yy)[[2]])
  if (missing(xlab))
    xlab <- "Time"
  if (missing(var))
    var <- 1:nvar
  else {
    if (is.character(var))
      var <- match(var, dimnames(yy)[[2]])
    if (any(is.na(var)) || max(var) > nvar || min(var) <
        1)
      stop("Invalid variable requested")
  }
  if (x$transform == "log") {
    xx <- exp(xx)
    pred.x <- exp(pred.x)
  }
  else if (x$transform != "identity") {
    xtime <- as.numeric(dimnames(yy)[[1]])/xscale
    apr1 <- approx(xx, xtime, seq(min(xx), max(xx), length = 17)[2 *
                                                                   (1:8)])
    temp <- signif(apr1$y, 2)
    apr2 <- approx(xtime, xx, temp)
    xaxisval <- apr2$y
    xaxislab <- rep("", 8)
    for (i in 1:8) xaxislab[i] <- format(temp[i])
  }
  for (i in var) {
    y <- yy[, i]
    yhat <- pmat %*% qr.coef(qmat, y)
    yr <- range(yhat, y)
    if (!add) {
      if (x$transform == "identity")
        plot(range(xx), yr, type = "n", xlab = xlab, ylab = ylab[i],...)
      else if (x$transform == "log")
        plot(range(xx), yr, type = "n", xlab = xlab, ylab = ylab[i],log = "x", ...)
      else {
        plot(range(xx), yr, type = "n", xlab = xlab, ylab = ylab[i],axes = FALSE, ...)
        axis(1, xaxisval, xaxislab)
        axis(2)
        box()
      }
    }
    if (resid)
      points(xx, y, cex = cex, col = col)
    lines(pred.x, yhat, col = col, lty = lty)
  }
}

plot.rs.br <- function (x, var, ylim = c(-2, 2), xlab, ylab, ...)
{
  bbt <- x$bbt
  par(ask = TRUE)
  if (missing(var))
    var <- 1:nrow(x$table)
  ychange <- FALSE
  if (missing(ylab))
    ylab <- paste("Brownian bridge for", row.names(x$table))
  else {
    if (length(ylab) == 1 & nrow(x$table) > 1)
      ylab <- rep(ylab, nrow(x$table))
  }
  if (missing(xlab))
    xlab <- "Time"
  for (i in var) {
    timescale <- bbt[[i]][, 1]
    bb <- bbt[[i]][, 2]
    plot(c(0, timescale), c(0, bb), type = "l", ylim = ylim,
         xlab = xlab, ylab = ylab[i], ...)
    abline(h = 1.36, col = 2)
    abline(h = 1.63, col = 2)
    abline(h = -1.36, col = 2)
    abline(h = -1.63, col = 2)
  }
  par(ask = FALSE)
}


Kernmatch <- function (t, tv, b, tD, nt4)
{
  kmat <- NULL
  for (it in 1:(length(nt4) - 1)) {
    kmat1 <- (outer(t[(nt4[it] + 1):nt4[it + 1]], tv, "-")/b[it])
    kmat1 <- kmat1^(kmat1 >= 0)
    kmat <- rbind(kmat, pmax(1 - kmat1^2, 0) * (1.5/b[it]))
  }
  kmat
}

kernerleftch <- function (td, b, nt4)
{
  n <- length(td)
  ttemp <- td[td >= b[1]]
  ntemp <- length(ttemp)
  if (ntemp == n)
    nt4 <- c(0, nt4[-1])
  else {
    nfirst <- n - ntemp
    nt4 <- c(0, 1:nfirst, nt4[-1])
    b <- c(td[1:nfirst], b)
  }
  krn <- Kernmatch(td, td, b, max(td), nt4)
  krn
}




#' Inverse transforming of time in Relative Survival
#'
#' This function can be used when predicting in Relative Survival using the
#' transformed time regression model (using \code{rstrans} function). It
#' inverses the time from Y to T in relative survival using the given
#' ratetable. The times Y can be produced with the \code{rstrans} function, in
#' which case, this is the reverse function. This function does the
#' transformation for one person at a time.
#'
#' Works only with ratetables that are split by age, sex and year. Transforming
#' can be computationally intensive, use lower and/or upper to guess the
#' interval of the result and thus speed up the function.
#'
#' @param y time in Y.
#' @param age age of the individual.  Must be in days.
#' @param sex sex of the individual. Must be coded in the same way as in the
#' \code{ratetable}.
#' @param year date of diagnosis. Must be in a date format
#' @param scale numeric value to scale the results. If \code{ratetable} is in
#' units/day, \code{scale = 365.241} causes the output to be reported in years.
#' @param ratetable a table of event rates, such as \code{survexp.us}.
#' @param lower the lower bound of interval where the result is expected. This
#' argument is optional, but, if given, can shorten the time the function needs
#' to calculate the result.
#' @param upper the upper bound of interval where the result is expected. See
#' \code{lower}
#' @return A list of values \item{T}{the original time} \item{Y}{the
#' transformed time}
#' @seealso \code{\link{rstrans}}
#' @references Package: Pohar M., Stare J. (2006) "Relative survival analysis
#' in R."  Computer Methods and Programs in Biomedicine, \bold{81}: 272-278.
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741-1749.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' invtime(y = 0.1, age = 23011, sex = 1, year = 9497, ratetable = slopop)
#'
invtime <- function (y = 0.1, age = 23011, sex = "male", year = 9497, scale = 1,
                     ratetable = relsurv::slopop, lower, upper)
{
  if (!is.numeric(age))
    stop("\"age\" must be numeric", call. = FALSE)
  if (!is.numeric(y))
    stop("\"y\" must be numeric", call. = FALSE)
  if (!is.numeric(scale))
    stop("\"scale\" must be numeric", call. = FALSE)
  temp <- data.frame(age = age, sex = I(sex), year = year)
  if (missing(lower)) {
    if (!missing(upper))
      stop("Argument \"lower\" is missing, with no default",
           call. = FALSE)
    nyears <- round((110 - age/365.241))
    tab <- data.frame(age = rep(age, nyears), sex = I(rep(sex,
                                                          nyears)), year = rep(year, nyears))
    vred <- 1 - survexp(c(0, 1:(nyears - 1)) * 365.241 ~ ratetable(age = age,
                                                                   sex = sex, year = year), ratetable = ratetable, data = tab,
                        cohort = FALSE)
    place <- sum(vred <= y)
    if (place == 0)
      lower <- 0
    else lower <- floor((place - 1) * 365.241 - place)
    upper <- ceiling(place * 365.241 + place)
  }
  else {
    if (missing(upper))
      stop("Argument \"upper\" is missing, with no default",
           call. = FALSE)
    if (!is.integer(lower))
      lower <- floor(lower)
    if (!is.integer(upper))
      upper <- ceiling(upper)
    if (upper <= lower)
      stop("'upper' must be higher than 'lower'", call. = FALSE)
  }
  lower <- max(0, lower)
  tab <- data.frame(age = rep(age, upper - lower + 1), sex = I(rep(sex,
                                                                   upper - lower + 1)), year = rep(year, upper - lower +
                                                                                                     1))
  vred <- 1 - survexp((lower:upper) ~ ratetable(age = age,
                                                sex = sex, year = year), ratetable = ratetable, data = tab,
                      cohort = FALSE)
  place <- sum(vred <= y)
  if (place == 0)
    warning(paste("The event happened on or before day",
                  lower), call. = FALSE)
  if (place == length(vred))
    warning(paste("The event happened on or after day", upper),
            call. = FALSE)
  t <- (place + lower - 1)/scale
  age <- round(age/365.241, 0.01)
  return(list(age, sex, year, Y = y, T = t))
}






#' Fit Andersen et al Multiplicative Regression Model for Relative Survival
#'
#' Fits the Andersen et al multiplicative regression model in relative
#' survival. An extension of the coxph function using relative survival.
#'
#' NOTE: The follow-up time must be specified in days. The \code{ratetable}
#' being used may have different variable names and formats than the user's
#' data set, this is dealt with by the \code{rmap} argument. For example, if
#' age is in years in the data set but in days in the \code{ratetable} object,
#' age=age*365.241 should be used. The calendar year can be in any date format
#' (date, Date and POSIXt are allowed), the date formats in the
#' \code{ratetable} and in the data may differ.
#'
#' @param formula a formula object, with the response as a \code{Surv} object
#' on the left of a \code{~} operator, and, if desired, terms separated by the
#' \code{+} operator on the right.
#'
#' NOTE: The follow-up time must be in days.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, such as \code{slopop}.
#' @param int the number of follow-up years used for calculating survival(the
#' data are censored after this time-point). If missing, it is set the the
#' maximum observed follow-up time.
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used.  Default is
#' \code{options()$na.action}.
#' @param init vector of initial values of the iteration.  Default initial
#' value is zero for all variables.
#' @param method the default method \code{mul} assumes hazard to be constant on
#' yearly intervals. Method \code{mul1} uses the ratetable to determine the
#' time points when hazard changes. The \code{mul1} method is therefore more
#' accurate, but at the same time can be more computationally intensive.
#' @param control a list of parameters for controlling the fitting process.
#' See the documentation for \code{coxph.control} for details.
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details
#' below.
#' @param ... Other arguments will be passed to \code{coxph.control}.
#' @return an object of class \code{coxph} with an additional item:
#' \item{basehaz}{Cumulative baseline hazard (population values are seen as
#' offset) at centered values of covariates.}
#' @seealso \code{\link{rsadd}}, \code{\link{rstrans}}.
#' @references Method: Andersen, P.K., Borch-Johnsen, K., Deckert, T., Green,
#' A., Hougaard, P., Keiding, N. and Kreiner, S. (1985) "A Cox regression model
#' for relative mortality and its application to diabetes mellitus survival
#' data.", Biometrics, \bold{41}: 921--932.
#'
#' Package. Pohar M., Stare J. (2006) "Relative survival analysis in R."
#' Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' #fit a multiplicative model
#' #note that the variable year is given in days since 01.01.1960 and that
#' #age must be multiplied by 365.241 in order to be expressed in days.
#' fit <- rsmul(Surv(time,cens)~sex+as.factor(agegr),rmap=list(age=age*365.241),
#'             ratetable=slopop,data=rdata)
#'
#'
#' #check the goodness of fit
#' rs.br(fit)
#'
#'
rsmul <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,
                   int, na.action, init, method = "mul", control,rmap, ...)
{
  #require(survival)

  if (!missing(rmap)) {
    rmap <- substitute(rmap)
  }
  rform <- rformulate(formula,data, ratetable, na.action,rmap,int)


  U <- rform$data
  if (missing(int))
    int <- ceiling(max(rform$Y/365.241))
  if(length(int)!=1)int <- max(int)
  fk <- (attributes(rform$ratetable)$factor != 1)
  nfk <- length(fk)
  if (method == "mul") {
    U <- survsplit(U, cut = (1:int) * 365.241, end = "Y",
                   event = "stat", start = "start", episode = "epi")
    fk <- (attributes(rform$ratetable)$factor != 1)
    nfk <- length(fk)
    U[, 4:(nfk + 3)] <- U[, 4:(nfk + 3)] + 365.241 * (U$epi) %*%
      t(fk)
    nsk <- dim(U)[1]
    xx <- exp.prep(U[, 4:(nfk + 3),drop=FALSE], 365.241, rform$ratetable)
    lambda <- -log(xx)/365.241
  }
  else if (method == "mul1") {
    U$id <- 1:dim(U)[1]
    my.fun <- function(x, attcut, nfk, fk) {
      intr <- NULL
      for (i in 1:nfk) {
        if (fk[i]) {
          n1 <- max(findInterval(as.numeric(x[3 + i]) +
                                   as.numeric(x[1]), attcut[[i]]) + 1, 2)
          n2 <- findInterval(as.numeric(x[3 + i]) + as.numeric(x[2]),
                             attcut[[i]])
          if (n2 > n1 & length(attcut[[i]] > 1)) {
            if (n2 > length(attcut[[i]]))
              n2 <- length(attcut[[i]])
            intr <- c(intr, as.numeric(attcut[[i]][n1:n2]) -
                        as.numeric(x[3 + i]))
          }
        }
      }
      intr <- sort(unique(c(intr, as.numeric(x[2]))))
      intr
    }
    attcut <- attributes(rform$ratetable)$cutpoints
    intr <- apply(U[, 1:(3 + nfk)], 1, my.fun, attcut, nfk,
                  fk)
    dolg <- unlist(lapply(intr, length))
    newdata <- lapply(U, rep, dolg)
    stoptime <- unlist(intr)
    starttime <- c(-1, stoptime[-length(stoptime)])
    first <- newdata$id != c(-1, newdata$id[-length(newdata$id)])
    starttime[first] <- newdata$start[first]
    last <- newdata$id != c(newdata$id[-1], -1)
    event <- rep(0, length(newdata$id))
    event[last] <- newdata$stat[last]
    U <- do.call("data.frame", newdata)
    U$start <- starttime
    U$Y <- stoptime
    U$stat <- event
    U[, 4:(nfk + 3)] <- U[, 4:(nfk + 3)] + (U$start) %*%
      t(fk)
    nsk <- dim(U)[1]
    xx <- exp.prep(U[, 4:(nfk + 3),drop=FALSE], 1, rform$ratetable)
    lambda <- -log(xx)/1
  }
  else stop("'method' must be one of 'mul' or 'mul1'")
  U$lambda <- log(lambda)
  if (rform$m == 0)
    fit <- coxph(Surv(start, Y, stat) ~ 1 + offset(lambda),
                 data = U, init = init, control = control, x = TRUE,
                 ...)
  else {
    xmat <- as.matrix(U[, (3 + nfk + 1):(ncol(U) - 2)])
    fit <- coxph(Surv(start, Y, stat) ~ xmat + offset(lambda),
                 data = U, init = init, control = control, x = TRUE,
                 ...)
    names(fit[[1]]) <- names(U)[(3 + nfk + 1):(ncol(U) -
                                                 2)]
  }
  class(fit) <- c("rsmul",class(fit))
  fit$basehaz <- basehaz(fit)			#NEW 2.05
  fit$data <- rform$data
  fit$call <- match.call()
  fit$int <- int
  if (length(rform$na.action))
    fit$na.action <- rform$na.action
  fit
}



#' Fit Cox Proportional Hazards Model in Transformed Time
#'
#' The function transforms each person's time to his/her probability of dying
#' at that time according to the ratetable. It then fits the Cox proportional
#' hazards model with the transformed times as a response. It can also be used
#' for calculatin the transformed times (no covariates are needed in the
#' formula for that purpose).
#'
#' NOTE: The follow-up time must be specified in days. The \code{ratetable}
#' being used may have different variable names and formats than the user's
#' data set, this is dealt with by the \code{rmap} argument. For example, if
#' age is in years in the data set but in days in the \code{ratetable} object,
#' age=age*365.241 should be used. The calendar year can be in any date format
#' (date, Date and POSIXt are allowed), the date formats in the
#' \code{ratetable} and in the data may differ.  A side product of this
#' function are the transformed times - stored in teh \code{y} object of the
#' output. To get these times, covariates are of course irrelevant.
#'
#' @param formula a formula object, with the response as a \code{Surv} object
#' on the left of a \code{~} operator, and, if desired, terms separated by the
#' \code{+} operator on the right.
#'
#' NOTE: The follow-up time must be in days.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, such as \code{slopop}.
#' @param int the number of follow-up years used for calculating survival(the
#' rest is censored). If missing, it is set the the maximum observed follow-up
#' time.
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used.  Default is
#' \code{options()$na.action}.
#' @param init vector of initial values of the iteration.  Default initial
#' value is zero for all variables.
#' @param control a list of parameters for controlling the fitting process.
#' See the documentation for \code{coxph.control} for details.
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details
#' below.
#' @param ... other arguments will be passed to \code{coxph.control}.
#' @return an object of class \code{coxph}. See \code{coxph.object} and
#' \code{coxph.detail} for details.  \item{y}{ an object of class \code{Surv}
#' containing the transformed times (these times do not depend on covariates).
#' }
#' @seealso \code{\link{rsmul}}, \code{\link{invtime}}, \code{\link{rsadd}},
#' \code{\link{survexp}}.
#' @references Method: Stare J., Henderson R., Pohar M. (2005) "An individual
#' measure for relative survival." Journal of the Royal Statistical Society:
#' Series C, \bold{54} 115--126.
#'
#' Package. Pohar M., Stare J. (2006) "Relative survival analysis in R."
#' Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#'
#' #fit a Cox model using the transformed times
#' #note that the variable year is given in days since 01.01.1960 and that
#' #age must be multiplied by 365.241 in order to be expressed in days.
#' fit <- rstrans(Surv(time,cens)~sex+as.factor(agegr),rmap=list(age=age*365.241,
#'         sex=sex,year=year),ratetable=slopop,data=rdata)
#'
#'
#' #check the goodness of fit
#' rs.br(fit)
#'
rstrans <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,
                     int, na.action, init, control,rmap, ...)
{
  if (!missing(rmap)) {
    rmap <- substitute(rmap)
  }
  rform <- rformulate(formula, data, ratetable, na.action, rmap, int)


  if (missing(int))
    int <- ceiling(max(rform$Y/365.241))
  fk <- (attributes(rform$ratetable)$factor != 1)
  nfk <- length(fk)
  if (rform$type == "counting") {
    start <- 1 - exp.prep(rform$R, rform$start, rform$ratetable)
  }
  else start <- rep(0, rform$n)
  stop <- 1 - exp.prep(rform$R, rform$Y, rform$ratetable)
  if(any(stop==0&rform$Y!=0))stop[stop==0&rform$Y!=0] <- .Machine$double.eps
  if(length(int)!=1)int <- max(int)
  data <- rform$data
  stat <- rform$status
  if (rform$m == 0) {
    if (rform$type == "counting")
      fit <- coxph(Surv(start, stop, stat) ~ 1,
                   init = init, control = control, x = TRUE, ...)
    else fit <- coxph(Surv(stop, stat) ~ 1,
                      init = init, control = control, x = TRUE, ...)
  }
  else {
    xmat <- as.matrix(data[, (4 + nfk):ncol(data)])
    fit <- coxph(Surv(start, stop, stat) ~ xmat,
                 init = init, control = control, x = TRUE, ...)
    names(fit[[1]]) <- names(rform$X)
  }
  fit$call <- match.call()
  if (length(rform$na.action))
    fit$na.action <- rform$na.action
  data$start <- start
  data$Y <- stop
  fit$data <- data
  fit$int <- int
  return(fit)
}


#' Reorganize Data into a Ratetable Object
#'
#' The function assists in reorganizing certain types of data into a ratetable
#' object.
#'
#' This function only applies for ratetables that are organized by age, sex and
#' year.
#'
#' @param men a matrix containing the yearly (conditional) probabilities of one
#' year survival for men. Rows represent age (increasing 1 year per
#' line,starting with 0), the columns represent cohort years (the limits are in
#' \code{yearlim}, the increase is in \code{int.length}.
#' @param women a matrix containing the yearly (conditional) probabilities of
#' one year survival for women.
#' @param yearlim the first and last cohort year given in the tables.
#' @param int.length the length of intervals in which cohort years are given.
#' @return An object of class \code{ratetable}.
#' @seealso \code{\link{ratetable}}.
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' men <- cbind(exp(-365.241*exp(-14.5+.08*(0:100))),exp(-365*exp(-14.7+.085*(0:100))))
#' women <- cbind(exp(-365.241*exp(-15.5+.085*(0:100))),exp(-365*exp(-15.7+.09*(0:100))))
#' table <- transrate(men,women,yearlim=c(1980,1990),int.length=10)
#'
transrate <- function (men, women, yearlim, int.length = 1)
{
  if (any(dim(men) != dim(women)))
    stop("The men and women matrices must be of the same size. \n In case of missing values at the end carry the last value forward")
  if ((yearlim[2] - yearlim[1])/int.length + 1 != dim(men)[2])
    stop("'yearlim' cannot be divided into intervals of equal length")
  if (!is.matrix(men) | !is.matrix(women))
    stop("input tables must be of class matrix")
  dimi <- dim(men)
  temp <- array(c(men, women), dim = c(dimi, 2))
  temp <- -log(temp)/365.241
  temp <- aperm(temp, c(1, 3, 2))
  cp <- as.date(apply(matrix(yearlim[1] + int.length * (0:(dimi[2] -
                                                             1)), ncol = 1), 1, function(x) {
                                                               paste("1jan", x, sep = "")
                                                             }))
  attributes(temp) <- list(dim = c(dimi[1], 2, dimi[2]), dimnames = list(age=as.character(0:(dimi[1] -
                                                                                               1)), sex=c("male", "female"), year=as.character(yearlim[1] + int.length *
                                                                                                                                                 (0:(dimi[2] - 1)))), dimid = c("age", "sex", "year"),
                           factor = c(0, 1, 0),type=c(2,1,3), cutpoints = list((0:(dimi[1] - 1)) *
                                                                                 (365.241), NULL, cp), class = "ratetable")
  attributes(temp)$summary <- function (R)
  {
    x <- c(format(round(min(R[, 1])/365.241, 1)), format(round(max(R[,
                                                                     1])/365.241, 1)), sum(R[, 2] == 1), sum(R[, 2] == 2))
    x2 <- as.character(as.Date(c(min(R[, 3]), max(R[, 3])), origin=as.Date('1970-01-01')))
    paste("  age ranges from", x[1], "to", x[2], "years\n", " male:",
          x[3], " female:", x[4], "\n", " date of entry from",
          x2[1], "to", x2[2], "\n")
  }
  temp
}



#' Reorganize Data obtained from Human Life-Table Database into a Ratetable
#' Object
#'
#' The function assists in reorganizing the .txt files obtained from Human
#' Life-Table Database (http://www.lifetable.de -> Data by Country) into a
#' ratetable object.
#'
#' This function works with any table organised in the format provided by the
#' Human Life-Table Database, but currently only works with TypeLT 1 (i.e. age
#' intervals of length 1). The age must always start with value 0, but can end
#' at different values (when that happens, the last value is carried forward).
#' The rates between the cutpoints are taken to be constant.
#'
#' @param file a vector of file names which the data are to be read from. Must
#' be in .tex format and in the same format as the files in Human Life-Table
#' Database.
#' @param cut.year a vector of cutpoints for years. Must be specified when the
#' year spans in the files are not consecutive.
#' @param race a vector of race names for the input files.
#' @return An object of class \code{ratetable}.
#' @seealso \code{\link{ratetable}}, \code{\link{transrate.hmd}},
#' \code{\link{joinrate}}, \code{\link{transrate}}.
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' \dontrun{
#' finpop <- transrate.hld(c("FIN_1981-85.txt","FIN_1986-90.txt","FIN_1991-95.txt"))
#' }
#' \dontrun{
#' nzpop <- transrate.hld(c("NZL_1980-82_Non-maori.txt","NZL_1985-87_Non-maori.txt",
#' 	 "NZL_1980-82_Maori.txt","NZL_1985-87_Maori.txt"),
#' 	 cut.year=c(1980,1985),race=rep(c("nonmaori","maori"),each=2))
#' }
#'
transrate.hld <- function(file, cut.year,race){
  nfiles <- length(file)
  data <- NULL
  for(it in 1:nfiles){
    tdata <- read.table(file[it],sep=",",header=TRUE)
    if(!any(tdata$TypeLT==1)) stop("Currently only TypeLT 1 is implemented")
    names(tdata) <- gsub(".","",names(tdata),fixed=TRUE)
    tdata <- tdata[,c("Country","Year1","Year2","TypeLT","Sex","Age","AgeInt","qx")]
    tdata <- tdata[tdata$TypeLT==1,]		#NEW - prej sem gledala tudi AgeInt, izkaze se, da ni treba. pri q(x) bi bilo vseeno tudi, ce bi gledala TypeLT=3.
    tdata <- tdata[!is.na(tdata$AgeInt),]		#NEW - vrzem ven zadnji interval, ki gre v neskoncnost in vsi umrejo (inf hazard)
    if(!missing(race))tdata$race <- rep(race[it],nrow(tdata))
    data <- rbind(data,tdata)
  }
  if(length(unique(data$Country))>1)warning("The data belongs to different countries")
  data <- data[order(data$Year1,data$Age),]
  data$qx <- as.character(data$qx)
  options(warn = -1)
  data$qx[data$qx=="."] <- NA
  data$qx <- as.numeric(data$qx)
  options(warn = 0)
  if(missing(cut.year)){
    y1 <-  unique(data$Year1)
    y2 <-  unique(data$Year2)
    if(any(apply(cbind(y1[-1],y2[-length(y2)]),1,diff)!=-1))warning("Data is not given for all the cut.year between the minimum and the maximum, use argument 'cut.year'")
  }
  else
    y1 <- cut.year
  if(length(y1)!=length(unique(data$Year1)))stop("Length 'cut.year' must match the number of unique values of Year1")
  cp <- as.date(apply(matrix(y1,ncol=1),1,function(x){paste("1jan",x,sep="")}))
  dn2 <- as.character(y1)
  amax <- max(data$Age)
  a.fun <- function(data,amax){
    mdata <- data[data$Sex==1,]
    wdata <- data[data$Sex==2,]
    men <-NULL
    women <- NULL
    k <- sum(mdata$Age==0)
    mind <- c(which(mdata$Age[-nrow(mdata)] != mdata$Age[-1]-1),nrow(mdata))
    wind <- c(which(wdata$Age[-nrow(wdata)] != wdata$Age[-1]-1),nrow(wdata))
    mst <- wst <- 1
    for(it in 1:k){
      qx <- mdata[mst:mind[it],]$qx
      lqx <- length(qx)
      if(lqx!=amax+1){
        nmiss <- amax + 1 - lqx
        qx <- c(qx,rep(qx[lqx],nmiss))
      }
      naqx <- max(which(!is.na(qx)))
      if(naqx!=amax+1) qx[(naqx+1):(amax+1)] <- qx[naqx]
      men <- cbind(men,qx)
      mst <- mind[it]+1
      qx <- wdata[wst:wind[it],]$qx
      lqx <- length(qx)
      if(lqx!=amax+1){
        nmiss <- amax + 1 - lqx
        qx <- c(qx,rep(qx[lqx],nmiss))
      }
      naqx <- max(which(!is.na(qx)))
      if(naqx!=amax+1) qx[(naqx+1):(amax+1)] <- qx[naqx]
      women <- cbind(women,qx)
      wst <- wind[it]+1
    }
    men<- -log(1-men)/365.241
    women<- -log(1-women)/365.241
    dims <- c(dim(men),2)
    array(c(men,women),dim=dims)
  }
  if(missing(race)){
    out <- a.fun(data,amax)
    dims <- dim(out)
    attributes(out)<-list(
      dim=dims,
      dimnames=list(as.character(0:amax),as.character(y1),c("male","female")),
      dimid=c("age","year","sex"),
      factor=c(0,0,1),type=c(2,3,1),
      cutpoints=list((0:amax)*(365.241),cp,NULL),
      class="ratetable"
    )


  }
  else{
    race.val <- unique(race)
    if(length(race)!=length(file))stop("Length of 'race' must match the number of files")
    for(it in 1:length(race.val)){
      if(it==1){
        out <- a.fun(data[data$race==race.val[it],],amax)
        dims <- dim(out)
        out <- array(out,dim=c(dims,1))
      }
      else{
        out1 <- array(a.fun(data[data$race==race.val[it],],amax),dim=c(dims,1))
        out <- array(c(out,out1),dim=c(dims,it))
      }
    }
    attributes(out)<-list(
      dim=c(dims,it),
      dimnames=list(age=as.character(0:amax),year=as.character(y1),sex=c("male","female"),race=race.val),
      dimid=c("age","year","sex","race"),
      factor=c(0,0,1,1),type=c(2,3,1,1),
      cutpoints=list((0:amax)*(365.241),cp,NULL,NULL),
      class="ratetable"
    )
  }
  attributes(out)$summary <- function (R)
  {
    x <- c(format(round(min(R[, 1])/365.241, 1)), format(round(max(R[,
                                                                     1])/365.241, 1)), sum(R[, 3] == 1), sum(R[, 3] == 2))
    x2 <- as.character(as.Date(c(min(R[, 2]), max(R[, 2])), origin=as.Date('1970-01-01')))
    paste("  age ranges from", x[1], "to", x[2], "years\n", " male:",
          x[3], " female:", x[4], "\n", " date of entry from",
          x2[1], "to", x2[2], "\n")
  }
  out
}



#' Reorganize Data obtained from Human Mortality Database into a Ratetable
#' Object
#'
#' The function assists in reorganizing the .txt files obtained from Human
#' Mortality Database (http://www.mortality.org) into a ratetable object.
#'
#' This function works automatically with tables organised in the format
#' provided by the Human Mortality Database. Download Life Tables for Males and
#' Females separately from the column named 1x1 (period life tables, organized
#' by date of death, yearly cutpoints for age as well as calendar year).
#'
#' If you wish to provide the data in the required format by yourself, note
#' that the only two columns needed are calendar year (Year) and probability of
#' death (qx). Death probabilities must be calculated up to age 110 (in yearly
#' intervals).
#'
#' @param male a .txt file, containing the data on males.
#' @param female a .txt file, containing the data on females.
#' @return An object of class \code{ratetable}.
#' @seealso \code{\link{ratetable}}, \code{\link{transrate.hld}},
#' \code{\link{joinrate}}, \code{\link{transrate}}.
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#' @keywords survival
#' @examples
#'
#' \dontrun{
#' auspop <- transrate.hmd("mltper_1x1.txt","fltper_1x1.txt")
#' }
#'
transrate.hmd <- function(male,female){
  nfiles <- 2
  men <- try(read.table(male,sep="",header=TRUE),silent=TRUE)
  if(inherits(men, "try-error")){ men <- read.table(male,sep="",header=TRUE,skip=1)}
  men <- men[,c("Year","Age","qx")]
  y1 <- sort(unique(men$Year))
  ndata <- nrow(men)/111
  if(round(ndata)!=ndata)stop("Each year must contain ages from 0 to 110")
  men <- matrix(men$qx, ncol=ndata)
  men <- matrix(as.numeric(men),ncol=ndata)
  women <- try(read.table(female,sep="",header=TRUE),silent=TRUE)
  if(inherits(women, "try-error")) {women <- read.table(female,sep="",header=TRUE,skip=1)}
  women <- women[,"qx"]
  if(length(women)!=length(men))stop("Number of rows in the table must be equal for both sexes")
  women <- matrix(women, ncol=ndata)
  women <- matrix(as.numeric(women),ncol=ndata)
  cp <- as.date(apply(matrix(y1,ncol=1),1,function(x){paste("1jan",x,sep="")}))
  dn2 <- as.character(y1)
  tfun <- function(vec){
    ind <- which(vec == 1 | is.na(vec))
    if(length(ind)>0)vec[min(ind):length(vec)] <- 0.999
    vec
  }
  men <- apply(men,2,tfun)
  women <- apply(women,2,tfun)
  men<- -log(1-men)/365.241
  women<- -log(1-women)/365.241
  nr <- nrow(men)-1
  dims <- c(dim(men),2)
  out <- array(c(men,women),dim=dims)
  attributes(out)<-list(
    dim=dims,
    dimnames=list(age=as.character(0:nr),year=as.character(y1),sex=c("male","female")),
    dimid=c("age","year","sex"),
    factor=c(0,0,1),type=c(2,3,1),
    cutpoints=list((0:nr)*(365.241),cp,NULL),
    class="ratetable"
  )
  attributes(out)$summary <- function (R)
  {
    x <- c(format(round(min(R[, 1])/365.241, 1)), format(round(max(R[,
                                                                     1])/365.241, 1)), sum(R[, 3] == 1), sum(R[, 3] == 2))
    x2 <- as.character(as.Date(c(min(R[, 2]), max(R[, 2])), origin=as.Date('1970-01-01')))
    paste("  age ranges from", x[1], "to", x[2], "years\n", " male:",
          x[3], " female:", x[4], "\n", " date of entry from",
          x2[1], "to", x2[2], "\n")
  }
  out
}






#' Join ratetables
#'
#' The function joins two or more objects organized as \code{ratetable} by
#' adding a new dimension.
#'
#' This function joins two or more \code{ratetable} objects by adding a new
#' dimension. The cutpoints of all the rate tables are compared and only the
#' common intervals kept. If the intervals defined by the cutpoints are not of
#' the same length, a warning message is displayed.  Each rate table must have
#' 3 dimensions, i.e. age, sex and year (the order is not important).
#'
#' @param tables a list of ratetables. If names are given, they are included as
#' \code{dimnames}.
#' @param dim.name the name of the added dimension.
#' @return An object of class \code{ratetable}.
#' @seealso \code{\link{ratetable}}, \code{\link{transrate.hld}},
#' \code{\link{transrate.hmd}}, \code{\link{transrate}}.
#' @references Package: Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272-278.
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741-1749.
#' @keywords survival
#' @examples
#'
#' #newpop <- joinrate(list(Arizona=survexp.az,Florida=survexp.fl,
#' #                   Minnesota=survexp.mn),dim.name="state")
#'
joinrate <- function(tables,dim.name="country"){
  nfiles <- length(tables)
  if(is.null(names(tables))) names(tables) <- paste("D",1:nfiles,sep="")
  if(any(!unlist(lapply(tables,is.ratetable))))stop("Tables must be in ratetable format")
  if(length(attributes(tables[[1]])$dim)!=3)stop("Currently implemented only for ratetables with 3 dimensions")

  if(is.null(attr(tables[[1]],"dimid")))attr(tables[[1]],"dimid") <- names((attr(tables[[1]],"dimnames")))

  for(it in 2:nfiles){
    if(is.null(attr(tables[[it]],"dimid")))attr(tables[[it]],"dimid") <- names((attr(tables[[it]],"dimnames")))
    if(length(attributes(tables[[it]])$dimid)!=3)stop("Each ratetable must have 3 dimensions: age, year and sex")
    mc <- match(attributes(tables[[it]])$dimid,attributes(tables[[1]])$dimid,nomatch=0)
    if(any(mc)==0) stop("Each ratetable must have 3 dimensions: age, year and sex")
    if(any(mc!=1:3)){
      atts <- attributes(tables[[it]])
      tables[[it]] <- aperm(tables[[it]],mc)
      atts$dimid <- atts$dimid[mc]
      atts$dimnames <- atts$dimnames[mc]
      atts$cutpoints <- atts$cutpoints[mc]
      atts$factor <- atts$factor[mc]
      atts$type <- atts$type[mc]
      atts$dim <- atts$dim[mc]
      attributes(tables[[it]]) <- atts
    }
  }

  list.eq <- function(l1,l2){
    n <- length(l1)
    rez <- rep(TRUE,n)
    for(it in 1:n){
      if(length(l1[[it]])!=length(l2[[it]]))rez[it] <- FALSE
      else if(any(l1[[it]]!=l2[[it]]))rez[it] <- FALSE
    }
    rez
  }


  equal <- rep(TRUE,3)
  for(it in 2:nfiles){
    equal <- equal*list.eq(attributes(tables[[1]])$cutpoints,attributes(tables[[it]])$cutpoints)
  }


  kir <-  which(!equal)

  newat <- attributes(tables[[1]])
  imena <- list(d1=NULL,d2=NULL,d3=NULL)

  for(jt in kir){
    listy <- NULL
    for(it in 1:nfiles){
      listy <- c(listy,attributes(tables[[it]])$cutpoints[[jt]])
    }
    imena[[jt]] <- names(table(listy)[table(listy) == nfiles])
    if(!length(imena[[jt]]))stop(paste("There are no common cutpoints for dimension", attributes(tables[[1]])$dimid[jt]))
  }


  for(it in 1:nfiles){
    keep <- lapply(dim(tables[[it]]),function(x)1:x)
    for(jt in kir){
      meci <- which(match(attributes(tables[[it]])$cutpoints[[jt]],imena[[jt]],nomatch=0)!=0)

      if(it==1){
        newat$dimnames[[jt]] <- attributes(tables[[it]])$dimnames[[jt]][meci]
        newat$dim[[jt]] <- length(imena[[jt]])
        newat$cutpoints[[jt]] <- attributes(tables[[it]])$cutpoints[[jt]][meci]
      }
      if(length(meci)>1){if(max(diff(meci)!=1))warning(paste("The cutpoints for ",attributes(tables[[1]])$dimid[jt] ," are not equally spaced",sep=""))}
      keep[[jt]] <- meci
    }
    tables[[it]] <- tables[[it]][keep[[1]],keep[[2]],keep[[3]]]
  }
  dims <- newat$dim
  out <- array(tables[[1]],dim=c(dims,1))
  for(it in 2:nfiles){
    out1 <- array(tables[[it]],dim=c(dims,1))
    out <- array(c(out,out1),dim=c(dims,it))
  }
  mc <- 1:4
  if(any(newat$factor>1)){
    wh <- which(newat$factor>1)
    mc <- c(mc[-wh],wh)
    out <- aperm(out,mc)
  }
  newat$dim <- c(dims,nfiles)[mc]
  newat$dimid <- c(newat$dimid,dim.name)[mc]
  newat$cutpoints <- list(newat$cutpoints[[1]],newat$cutpoints[[2]],newat$cutpoints[[3]],NULL)[mc]
  newat$factor <- c(newat$factor,1)[mc]
  newat$type <- c(newat$type,1)[mc]
  newat$dimnames <- list(newat$dimnames[[1]],newat$dimnames[[2]],newat$dimnames[[3]],names(tables))[mc]
  names(newat$dimnames) <- newat$dimid
  attributes(out) <- newat
  out
}



mlfit <- function (b, p, x, offset, d, h, ds, y, maxiter, tol)
{
  for (nit in 1:maxiter) {
    b0 <- b
    fd <- matrix(0, p, 1)
    sd <- matrix(0, p, p)
    if (nit == 1) {
      ebx <- exp(x %*% b) * exp(offset)
      l0 <- sum(d * log(h + ebx) - ds - y * ebx)
    }
    for (it in 1:p) {
      fd[it, 1] <- sum((d/(h + ebx) - y) * x[, it] * ebx)
      for (jt in 1:p) sd[it, jt] = sum((d/(h + ebx) - d *
                                          ebx/(h + ebx)^2 - y) * x[, it] * x[, jt] * ebx)
    }
    b <- b - solve(sd) %*% fd
    ebx <- exp(x %*% b) * exp(offset)
    l <- sum(d * log(h + ebx) - ds - y * ebx)
    bd <- abs(b - b0)
    if (max(bd) < tol)
      break()
  }
  out <- list(b = b, sd = sd, nit = nit, loglik = c(l0, l))
  out
}

print.rs.br <- function (x, digits = max(options()$digits - 4, 3), ...)
{
  invisible(print(x$table, digits = digits))
  if (x$rho != 0)
    invisible(cat("Weighted Brownian bridge with rho=", x$rho,
                  "\n"))
}

print.rsadd <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
  cat("\nCall: ", paste(deparse(x$call), sep = "\n", collapse = "\n"),
      "\n\n", sep = "", "\n")
  if (length(coef(x))) {
    cat("Coefficients")
    cat(":\n")
    print.default(format(x$coefficients, digits = digits),
                  print.gap = 2, quote = FALSE)
  }
  else cat("No coefficients\n\n")
  if(x$method=="EM")
    cat("\n", "Expected number of disease specific deaths: ",format(round(sum(x$Nie),2))," = ",format(round(100*sum(x$Nie)/sum(x$data$stat),1)),"% \n" ,sep="")
  if(x$method=="EM"|x$method=="max.lik"){
    chi <- 2*max((x$loglik[2]-x$loglik[1]),0)
    if(x$method=="EM")df <- length(x$coef)
    else df <- length(x$coef)-length(x$int)+1
    if(df>0){
      p.val <- 1- pchisq(chi,df)
      if(x$method=="max.lik")cat("\n")
      cat("Likelihood ratio test=",format(round(chi,2)),", on ",df," df, p=",format(p.val),"\n",sep="")
    }
    else cat("\n")
  }
  cat("n=",nrow(x$data),sep="")
  if(length(x$na.action))cat("  (",length(x$na.action)," observations deleted due to missing)",sep="")
  cat("\n")
  if (length(x$warnme))
    cat("\n", x$warnme, "\n\n")
  else cat("\n")
  invisible(x)
}

summary.rsadd <- function (object, correlation = FALSE, symbolic.cor = FALSE,
                           ...)
{
  if (inherits(object, "glm")) {
    p <- object$rank
    if (p > 0) {
      p1 <- 1:p
      Qr <- object$qr
      aliased <- is.na(coef(object))
      coef.p <- object$coefficients[Qr$pivot[p1]]
      covmat <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
      dimnames(covmat) <- list(names(coef.p), names(coef.p))
      var.cf <- diag(covmat)
      s.err <- sqrt(var.cf)
      tvalue <- coef.p/s.err
      dn <- c("Estimate", "Std. Error")
      pvalue <- 2 * pnorm(-abs(tvalue))
      coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
      dimnames(coef.table) <- list(names(coef.p), c(dn,
                                                    "z value", "Pr(>|z|)"))
      df.f <- NCOL(Qr$qr)
    }
    else {
      coef.table <- matrix(, 0, 4)
      dimnames(coef.table) <- list(NULL, c("Estimate",
                                           "Std. Error", "t value", "Pr(>|t|)"))
      covmat.unscaled <- covmat <- matrix(, 0, 0)
      aliased <- is.na(coef(object))
      df.f <- length(aliased)
    }
    ans <- c(object[c("call", "terms", "family", "iter",
                      "warnme")], list(coefficients = coef.table, var = covmat,
                                       aliased = aliased))
    if (correlation && p > 0) {
      dd <- s.err
      ans$correlation <- covmat/outer(dd, dd)
      ans$symbolic.cor <- symbolic.cor
    }
    class(ans) <- "summary.rsadd"
  }
  else if (inherits(object, "rsadd")) {
    aliased <- is.na(coef(object))
    coef.p <- object$coef
    var.cf <- diag(object$var)
    s.err <- sqrt(var.cf)
    tvalue <- coef.p/s.err
    dn <- c("Estimate", "Std. Error")
    pvalue <- 2 * pnorm(-abs(tvalue))
    coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
    dimnames(coef.table) <- list(names(coef.p), c(dn, "z value",
                                                  "Pr(>|z|)"))
    ans <- c(object[c("call", "terms", "iter", "var")], list(coefficients = coef.table,
                                                             aliased = aliased))
    if (correlation && sum(aliased) != length(aliased)) {
      dd <- s.err
      ans$correlation <- object$var/outer(dd, dd)
      ans$symbolic.cor <- symbolic.cor
    }
    class(ans) <- "summary.rsadd"
  }
  else ans <- object
  return(ans)
}

print.summary.rsadd <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor,
                                 signif.stars = getOption("show.signif.stars"), ...)
{
  cat("\nCall:\n")
  cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),
      "\n\n", sep = "")
  if (length(x$aliased) == 0) {
    cat("\nNo Coefficients\n")
  }
  else {
    cat("\nCoefficients:\n")
    coefs <- x$coefficients
    if (!is.null(aliased <- x$aliased) && any(aliased)) {
      cn <- names(aliased)
      coefs <- matrix(NA, length(aliased), 4, dimnames = list(cn,
                                                              colnames(coefs)))
      coefs[!aliased, ] <- x$coefficients
    }
    printCoefmat(coefs, digits = digits, signif.stars = signif.stars,
                 na.print = "NA", ...)
  }
  if (length(x$warnme))
    cat("\n", x$warnme, "\n")
  correl <- x$correlation
  if (!is.null(correl)) {
    p <- NCOL(correl)
    if (p > 1) {
      cat("\nCorrelation of Coefficients:\n")
      if (is.logical(symbolic.cor) && symbolic.cor) {
        print(symnum(correl, abbr.colnames = NULL))
      }
      else {
        correl <- format(round(correl, 2), nsmall = 2,
                         digits = digits)
        correl[!lower.tri(correl)] <- ""
        print(correl[-1, -p, drop = FALSE], quote = FALSE)
      }
    }
  }
  cat("\n")
  invisible(x)
}



#' Excess hazard function smoothing
#'
#' An Epanechnikov kernel function based smoother for smoothing the baseline
#' excess hazard calculated by the \code{rsadd} function with the \code{EM}
#' method.
#'
#' The function performs Epanechnikov kernel smoothing. The follow up time is
#' divided (according to percentiles of event times) into several intervals
#' (number of intervals defined by \code{n.bwin}) in which the width is
#' calculated as a factor of the maximum span between event times.  Boundary
#' effects are also taken into account on both sides.
#'
#' @param fit Fit from the additive relative survival model using the \code{EM}
#' method.
#' @param bwin The relative width of the smoothing window (default is 1).
#' @param times The times at which the smoother is to be evaluated. If missing,
#' it is evaluated at all event times.
#' @param n.bwin Number of times that the window width may change.
#' @param left If \code{FALSE} (default) smoothing is performed symmetrically,
#' if \code{TRUE} only leftside neighbours are considered.
#' @return A list with two components: \item{lambda}{the smoothed excess
#' baseline hazard function} \item{times}{the times at which the smoothed
#' excess baseline hazard is evaluated.}
#' @seealso \code{\link{rsadd}},
#' @references Package. Pohar M., Stare J. (2006) "Relative survival analysis
#' in R." Computer Methods and Programs in Biomedicine, \bold{81}: 272--278
#'
#' Relative survival: Pohar, M., Stare, J. (2007) "Making relative survival
#' analysis relatively easy." Computers in biology and medicine, \bold{37}:
#' 1741--1749.
#'
#' EM algorithm: Pohar Perme M., Henderson R., Stare, J. (2009) "An approach to
#' estimation in relative survival regression." Biostatistics, \bold{10}:
#' 136--146.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' #fit an additive model with the EM method
#' fit <- rsadd(Surv(time,cens)~sex+age,rmap=list(age=age*365.241),
#' 		ratetable=slopop,data=rdata,int=5,method="EM")
#' sm <- epa(fit)
#' plot(sm$times,sm$lambda)
#'
epa <- function(fit,bwin,times,n.bwin=16,left=FALSE){
  #bwin ... width of the window, relative to the default (1)
  #fit ... EM fit
  #times... times at which the smoothed plot is calculated
  #n.bwin ... number of different windows
  #left ... only predictable smoothing
  utd <- fit$times
  if(missing(times))times <- seq(1,max(utd),length=100)
  if(max(times)>max(utd)){
    warning("Cannot extrapolate beyond max event time")
    times <- pmax(times,max(utd))
  }
  nutd <- length(utd)
  nt4 <- c(1,ceiling(nutd*(1:n.bwin)/n.bwin))
  if(missing(bwin))bwin <- rep(length(fit$times)/100,n.bwin)
  else bwin <- rep(bwin*length(fit$times)/100,n.bwin)
  for(it in 1:n.bwin){
    bwin[it] <- bwin[it]*max(diff(utd[nt4[it]:nt4[it+1]]))
  }
  while(utd[nt4[2]]<bwin[1]){		# ce je bwin velik, skrajsamo nt4
    nt4 <- nt4[-2]
    if(length(nt4)==1)break
  }
  #the smoothing matrix
  if(left) krn <- kernerleftch(utd,bwin,nt4)
  else krn <- kern(times,utd,bwin,nt4)
  lams <- pmax(krn%*%fit$lam0.ns,0)
  list(lambda=lams,times=times)				#	, weights=c(fit$times[1],diff(fit$times)))
}

Kern <- function (t, tv, b, tD, nt4)
{
  Rb <- max(tv)					#Right border
  kmat <- NULL
  tvs <- tv
  tv <- tv[-1]
  kt <- function(q,t)12*(t+1)/(1+q)^4*( (1-2*q)*t + (3*q^2-2*q+1)/2  )
  totcajti <- NULL
  for (it in 1:(length(nt4) - 1)) {
    cajti <- t[t>tvs[nt4[it]] & t<=tvs[nt4[it + 1]]]
    if(length(cajti)){
      q <- min( cajti/b[it],1,(Rb-cajti)/b[it])
      if(q<1 & length(cajti)>1){
        jc <- 1
        while(jc <=length(cajti)){
          qd <- pmin( cajti[jc:length(cajti)]/b[it],1,(Rb-cajti[jc:length(cajti)])/b[it])
          q <- qd[1]
          if(q==1){
            casi <- cajti[jc:length(cajti)][qd==1]
            q <- 1
            jc <- sum(qd==1)+jc
          }
          else{
            casi <- cajti[jc]
            jc <- jc+1
          }
          kmat1 <- outer(casi, tv, "-")/b[it]		#z - to je ok
          if(q<1){
            if(casi>b[it]) kmt1 <- -kmat1
            vr <- kt(q,kmat1)*(kmat1>=-1 & kmat1 <= q)
          }
          else vr <-  pmax((1 - kmat1^2) * .75,0)
          kmat <- rbind(kmat, vr/b[it])
          totcajti <- c(totcajti,casi)
        }
      }
      else{
        kmat1 <- outer(cajti, tv, "-")/b[it]		#z - to je ok
        q <- min( cajti/b[it],1)
        if(q<1)vr <- kt(q,kmat1)*(kmat1>=-1 & kmat1 <= q)
        else vr <-  pmax((1 - kmat1^2) * .75,0)
        kmat <- rbind(kmat, vr/b[it])
        totcajti <- c(totcajti,cajti)
      }#else
    }#if

  }#for
  kmat
}

kern <- function (times,td, b, nt4)
{
  n <- length(td)
  ttemp <- td[td >= b[1]]
  ntemp <- length(ttemp)
  if (ntemp == n)
    nt4 <- c(0, nt4[-1])
  td <- c(0,td)
  nt4 <- c(1,nt4+1)
  b <- c(b[1],b)
  krn <- Kern(times, td, b, max(td), nt4)
  krn
}

exp.prep <- function (x, y,ratetable,status,times,fast=FALSE,ys,prec,cmp=F,netweiDM=FALSE) {			#function that prepares the data for C function call

  #x= matrix of demographic covariates - each individual has one line
  #y= follow-up time for each individual (same length as nrow(x)!)
  #ratetable= rate table used for calculation
  #status= status for each individual (same length as nrow(x)!), not needed if we only need Spi, status needed for rs.surv
  #times= times at which we wish to evaluate the quantities, not needed if we only need Spi, times needed for rs.surv
  #fast=for mpp method only
  #netweiDM=should new netwei script be used

  x <- as.matrix(x)
  if (ncol(x) != length(dim(ratetable)))
    stop("x matrix does not match the rate table")
  atts <- attributes(ratetable)

  cuts <- atts$cutpoints

  if (is.null(atts$type)) {
    rfac <- atts$factor
    us.special <- (rfac > 1)
  }
  else {
    rfac <- 1 * (atts$type == 1)
    us.special <- (atts$type == 4)
  }
  if (length(rfac) != ncol(x))
    stop("Wrong length for rfac")


  if (any(us.special)) {
    if (sum(us.special) > 1)
      stop("Two columns marked for special handling as a US rate table")
    cols <- match(c("age", "year"), atts$dimid)
    if (any(is.na(cols)))
      stop("Ratetable does not have expected shape")
    if (exists("as.Date")) {
      bdate <- as.Date("1960/1/1") + (x[, cols[2]] - x[,
                                                       cols[1]])
      byear <- format(bdate, "%Y")
      offset <- as.numeric(bdate - as.Date(paste(byear,
                                                 "01/01", sep = "/")))
    }
    else if (exists("date.mdy")) {
      bdate <- as.date(x[, cols[2]] - x[, cols[1]])
      byear <- date.mdy(bdate)$year
      offset <- bdate - mdy.date(1, 1, byear)
    }
    else stop("Can't find an appropriate date class\n")
    x[, cols[2]] <- x[, cols[2]] - offset
    if (any(rfac > 1)) {
      temp <- which(us.special)
      nyear <- length(cuts[[temp]])
      nint <- rfac[temp]
      cuts[[temp]] <- round(approx(nint * (1:nyear), cuts[[temp]],
                                   nint:(nint * nyear))$y - 1e-04)
    }
  }

  if(!missing(status)){		#the function was called from rs.surv
    if(length(status)!=nrow(x))    stop("Wrong length for status")

    if(missing(times))    times <- sort(unique(y))

    if (any(times < 0))
      stop("Negative time point requested")
    ntime <- length(times)
    if(missing(ys)) ys <- rep(0,length(y))
    #    times2 <- times
    #    times2[1] <- preci
    if(cmp)   temp <- .Call("cmpfast",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                            as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                            x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else if(fast&!missing(prec))    temp <- .Call("netfastpinter2",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                                                  as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                                  x, y, ys,as.integer(status), times,prec,PACKAGE="relsurv")
    else if(fast&missing(prec))    temp <- .Call("netfastpinter",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                                                 as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                                 x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else if(netweiDM==TRUE)    temp <- .Call("netweiDM",  as.integer(rfac),
                                             as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                             x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else    temp <- .Call("netwei",  as.integer(rfac),
                          as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                          x, y, as.integer(status), times,PACKAGE="relsurv")
  }
  else{				#only expected survival at time y is needed for each individual
    if(length(y)==1)y <- rep(y,nrow(x))
    if(length(y)!=nrow(x)) stop("Wrong length for status")
    temp <- .Call("expc",  as.integer(rfac),
                  as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                  x, y,PACKAGE="relsurv")
    temp  <- temp$surv
  }
  temp
}



#' Compute a Relative Survival Curve
#'
#' Computes an estimate of the relative survival curve using the Ederer I,
#' Ederer II method, Pohar-Perme method or the Hakulinen method
#'
#' NOTE: The follow-up time must be specified in days. The \code{ratetable}
#' being used may have different variable names and formats than the user's
#' data set, this is dealt with by the \code{rmap} argument. For example, if
#' age is in years in the data set but in days in the \code{ratetable} object,
#' age=age*365.241 should be used. The calendar year can be in any date format
#' (date, Date and POSIXt are allowed), the date formats in the
#' \code{ratetable} and in the data may differ.
#'
#' The potential censoring times needed for the calculation of the expected
#' survival by the Hakulinen method are calculated automatically. The times of
#' censoring are left as they are, the times of events are replaced with
#' \code{fin.date - year}.
#'
#' The calculation of the Pohar-Perme estimate is more time consuming since
#' more data are needed from the population tables.  The old version of the
#' function, now named \code{rs.survo} can be used as a faster version for the
#' Hakulinen and Ederer II estimate.
#'
#' Numerical integration is required for Pohar-Perme estimate. The integration
#' precision is set with argument \code{precision}, which defaults to daily
#' intervals, a default that should give enough precision for any practical
#' purpose.
#'
#' Note that even though the estimate is always calculated using numerical
#' integration, only the values at event and censoring times are reported.
#' Hence, the function \code{plot} draws a step function in between and the
#' function \code{summary} reports the value at the last event or censoring
#' time before the specified time. If the output of the estimated values at
#' other points is required, this should be specified with argument
#' \code{add.times}.
#'
#' @param formula a formula object, with the response as a \code{Surv} object
#' on the left of a \code{~} operator, and, if desired, terms separated by the
#' \code{+} operator on the right. If no strata are used, \code{~1} should be
#' specified.
#'
#' NOTE: The follow-up time must be in days.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, organized as a \code{ratetable}
#' object, such as \code{slopop}.
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used.  Default is
#' \code{options()$na.action}.
#' @param fin.date the date of the study ending, used for calculating the
#' potential follow-up times in the Hakulinen method. If missing, it is
#' calculated as \code{max(year+time)}.
#' @param method the method for calculating the relative survival. The options
#' are \code{pohar-perme}(default), \code{ederer1}, \code{ederer2} and
#' \code{hakulinen}.
#' @param conf.type one of \code{plain}, \code{log} (the default), or
#' \code{log-log}. The first option causes the standard intervals curve +- k
#' *se(curve), where k is determined from conf.int. The log option calculates
#' intervals based on the cumulative hazard or log(survival). The last option
#' bases intervals on the log hazard or log(-log(survival)).
#' @param conf.int the level for a two-sided confidence interval on the
#' survival curve(s). Default is 0.95.
#' @param type defines how survival estimates are to be calculated given the
#' hazards.  The default (\code{kaplan-meier}) calculates the product integral,
#' whereas the option \code{fleming-harrington} exponentiates the negative
#' cumulative hazard. Analogous to the usage in \code{survfit}.
#' @param add.times specific times at which the curve should be evaluated.
#' @param precision Precision for numerical integration. Default is 1, which
#' means that daily intervals are taken, the value may be decreased to get a
#' higher precision or increased to achieve a faster calculation. The
#' calculation intervals always include at least all times of event and
#' censoring as border points.
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details
#' below.
#' @return a \code{survfit} object; see the help on \code{survfit.object} for
#' details.  The \code{survfit} methods are used for \code{print},
#' \code{summary}, \code{plot}, \code{lines}, and \code{points}.
#' @seealso \code{survfit}, \code{survexp}
#' @references Package: Pohar Perme, M., Pavlic, K. (2018) "Nonparametric
#' Relative Survival Analysis with the R Package relsurv". Journal of
#' Statistical Software. 87(8), 1-27, doi: "10.18637/jss.v087.i08" Theory:
#' Pohar Perme, M., Esteve, J., Rachet, B. (2016) "Analysing Population-Based
#' Cancer Survival - Settling the Controversies." BMC Cancer, 16 (933), 1-8.
#' doi:10.1186/s12885-016-2967-9. Theory: Pohar Perme, M., Stare, J., Esteve,
#' J. (2012) "On Estimation in Relative Survival", Biometrics, 68(1), 113-120.
#' doi:10.1111/j.1541-0420.2011.01640.x.
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' #calculate the relative survival curve
#' #note that the variable year must be given in a date format and that
#' #age must be multiplied by 365.241 in order to be expressed in days.
#' rs.surv(Surv(time,cens)~sex,rmap=list(age=age*365.241), ratetable=slopop,data=rdata)
#'
rs.surv <- function (formula = formula(data), data = parent.frame(),ratetable = relsurv::slopop,
                     na.action, fin.date, method = "pohar-perme", conf.type = "log",
                     conf.int = 0.95,type="kaplan-meier",add.times,precision=1,rmap)

  #formula: for example Surv(time,cens)~sex
  #data: the observed data set
  #ratetable: the population mortality tables
  #conf.type: confidence interval calculation (plain, log or log-log)
  #conf.int: confidence interval
{

  call <- match.call()
  if (!missing(rmap)) {
    rmap <- substitute(rmap)
  }
  rform <- rformulate(formula,data, ratetable, na.action,rmap)
  data <- rform$data								#the data set
  type <- match.arg(type, c("kaplan-meier", "fleming-harrington"))		#method of hazard -> survival scale transformation
  type <- match(type, c("kaplan-meier", "fleming-harrington"))
  method <- match.arg(method,c("pohar-perme", "ederer2", "hakulinen","ederer1"))	#method of relative surv. curve estimation
  method <- match(method,c("pohar-perme", "ederer2", "hakulinen","ederer1"))
  conf.type <- match.arg(conf.type,c("plain","log","log-log"))		#conf. interval type

  if (method == 3) {						#need potential follow-up time for Hak. method
    R <- rform$R
    coll <- match("year", attributes(ratetable)$dimid)
    year <- R[, coll]							#calendar year in the data
    if (missing(fin.date))
      fin.date <- max(rform$Y + year)					#final date for everybody set to the last day observed
    Y2 <- rform$Y							#change into potential follow-up time
    if (length(fin.date) == 1) 						#if final date equal for everyone
      Y2[rform$status == 1] <- fin.date - year[rform$status == 1]#set pot.time for those that died (equal to censoring time for others)
    else if (length(fin.date) == nrow(rform$R))
      Y2[rform$status == 1] <- fin.date[rform$status ==
                                          1] - year[rform$status == 1]
    else stop("fin.date must be either one value or a vector of the same length as the data")
    status2 <- rep(0, nrow(rform$X))						#stat2=0 for everyone
  }
  p <- rform$m								#number of covariates
  if (p > 0) 									#if covariates
    data$Xs <- strata(rform$X[, ,drop=FALSE ])				#make strata according to covariates
  else data$Xs <- rep(1, nrow(data))						#if no covariates, just put 1

  se.fac <- sqrt(qchisq(conf.int, 1))						#factor needed for confidence interval
  out <- NULL
  out$n <- table(data$Xs)							#table of strata
  out$time <- out$n.risk <- out$n.event <- out$n.censor <- out$surv <- out$std.err <- out$strata <-  NULL
  #out$index <- out$strata0 <- NULL
  # out$index = indices of the original times from the data among the times used for calculations
  # out$strata0 = the same as out$strata but only on the original times from the data
  for (kt in 1:length(out$n)) {						#for each stratum
    inx <- which(data$Xs == names(out$n)[kt])				#individuals within this stratum
    tis <- sort(unique(rform$Y[inx])) #unique times

    #if (method == 1 & all.times == TRUE) tis <- sort(union(rform$Y[inx],as.numeric(1:max(floor(rform$Y[inx])))))	#1-day long intervals used - to take into the account the continuity of the pop. part
    if (method == 1 & !missing(add.times)){
      #tis <- sort(union(rform$Y[inx],as.numeric(1:max(floor(rform$Y[inx])))))	#1-day long intervals used - to take into the account the continuity of the pop. part
      add.times <- pmin(as.numeric(add.times),max(rform$Y[inx]))
      tis <- sort(union(rform$Y[inx],as.numeric(add.times)))	#1-day long intervals used - to take into the account the continuity of the pop. part
    }
    if(method==3)tis <- sort(unique(pmin(max(tis),c(tis,Y2[inx]))))				#add potential times in case of Hakulinen
    #out$index <- c(out$index, which(tis %in% rform$Y[inx])+length(out$time))

    temp <- exp.prep(rform$R[inx,,drop=FALSE],rform$Y[inx],rform$ratetable,rform$status[inx],times=tis,fast=(method<3),prec=precision)	#calculate the values for each interval of time

    out$time <- c(out$time, tis)						#add times
    out$n.risk <- c(out$n.risk, temp$yi)					#add number at risk for each time
    out$n.event <- c(out$n.event, temp$dni)					#add number of events for each time
    out$n.censor <- c(out$n.censor,  c(-diff(temp$yi),temp$yi[length(temp$yi)]) - temp$dni) 	#add number of censored for each time

    if(method==1){ 								#pohar perme method
      #approximate1 <- (temp$yidlisi/temp$yisi +temp$yidlisitt/temp$yisitt)/2
      #approximate <- (temp$yidlisiw/temp$yisi +temp$yidlisiw/temp$yisitt)/2		#approximation for integration
      approximate <- temp$yidlisiw
      #haz <- temp$dnisi/temp$yisi - temp$yidlisi/temp$yisi		#cumulative hazard increment on each interval
      haz <- temp$dnisi/temp$yisi - approximate			#cumulative hazard increment on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dnisisq/(temp$yisi)^2)))  #standard error on each interval
    }
    else if(method==2){							#ederer2 method
      haz <- temp$dni/temp$yi - temp$yidli/temp$yi			#cumulative hazard increment on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    else if(method==3){							#Hakulinen method
      temp2 <- exp.prep(rform$R[inx,,drop=FALSE],Y2[inx],ratetable,status2[inx],times=tis)	#calculate the values for each interval of time
      popsur <- exp(-cumsum(temp2$yisidli/temp2$yisis))			#population survival
      haz <- temp$dni/temp$yi						#observed hazard on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    else if(method==4){							#Ederer I
      popsur <- temp$sis/length(inx)					#population survival
      haz <- temp$dni/temp$yi						#observed hazard on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    if(type==2)survtemp <- exp(-cumsum(haz))
    else survtemp <-  cumprod(1-haz)
    if(method>2){
      survtemp <- survtemp/popsur
    }
    out$surv <- c(out$surv,survtemp)
    out$strata <- c(out$strata, length(tis))				#number of times in this strata
    #out$strata0 <- c(out$strata0, length(unique(rform$Y[inx])))
  }
  if (conf.type == "plain") {
    out$lower <- as.vector(out$surv - out$std.err * se.fac * 		#surv + fac*se
                             out$surv)
    out$upper <- as.vector(out$surv + out$std.err * se.fac *
                             out$surv)
  }
  else if (conf.type == "log") {						#on log scale and back
    out$lower <- exp(as.vector(log(out$surv) - out$std.err *
                                 se.fac))
    out$upper <- exp(as.vector(log(out$surv) + out$std.err *
                                 se.fac))
  }
  else if (conf.type == "log-log") {						#on log-log scale and back
    out$lower <- exp(-exp(as.vector(log(-log(out$surv)) -
                                      out$std.err * se.fac/log(out$surv))))
    out$upper <- exp(-exp(as.vector(log(-log(out$surv)) +
                                      out$std.err * se.fac/log(out$surv))))
  }
  names(out$strata) <-  names(out$n)
  #names(out$strata0) <- names(out$n)
  if (p == 0){
    out$strata <-  NULL						#if no covariates
    #out$strata0 <- NULL
  }
  #if (method != 1) out$index <- out$strata0 <- NULL # if method != pohar-perme
  out$n <- as.vector(out$n)
  out$conf.type <- conf.type
  out$conf.int <- conf.int
  out$method <- method
  out$call <- call
  out$type <- "right"
  class(out) <- c("survfit", "rs.surv")
  out
}



#' Net Expected Sample Size Is Estimated
#'
#' Calculates how the sample size decreases in time due to population mortality
#'
#' The function calculates the sample size we can expect at a certain time
#' point if the patients die only due to population causes (population survival
#' * initial sample size in a certain category), i.e. the number of individuals
#' that remains at risk at given timepoints after the individuals who die due
#' to population causes are removed. The result should be used as a guideline
#' for the sensible length of follow-up interval when calculating the net
#' survival.
#'
#' The first column of the output reports the number of individuals at time 0.
#' The last column of the output reports the conditional expected (population)
#' survival time for each subgroup.
#'
#' @param formula a formula object, same as in \code{rs.surv}. The right-hand
#' side of the formula object includes the variable that defines the subgroups
#' (a variable of type \code{factor}) by which the expected sample size is to
#' be calculated.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, organized as a \code{ratetable}
#' object, such as \code{slopop}.
#' @param times Times at which the calculation should be evaluated - in years!
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details of
#' the \code{rs.surv} function.
#' @return A list of values.
#' @seealso \code{rs.surv}
#' @references Pohar Perme, M., Pavlic, K. (2018) "Nonparametric Relative
#' Survival Analysis with the R Package relsurv". Journal of Statistical
#' Software. 87(8), 1-27, doi: "10.18637/jss.v087.i08"
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' rdata$agegr <-cut(rdata$age,seq(40,95,by=5))
#' nessie(Surv(time,cens)~agegr,rmap=list(age=age*365.241),
#' 	ratetable=slopop,data=rdata,times=c(1,3,5,10,15))
#'
nessie <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,times,rmap)

  #formula: for example Surv(time,cens)~sex
  #data: the observed data set
  #ratetable: the population mortality tables
  #times: the times at which to report NESS, if no default, then all unique times

{

  call <- match.call()
  if (!missing(rmap)) {
    rmap <- substitute(rmap)
  }
  na.action <- NA		#set the object just to be able to execute the rformulate call
  rform <- rformulate(formula, data, ratetable,na.action, rmap)			#get the data ready

  templab <- attr(rform$Terms,"term.labels")
  if(!is.null(attr(rform$Terms,"specials")$ratetable))templab <- templab[-length(templab)]	#delete the last term in the formula if the ratetable argument is there
  nameslist <- vector("list",length(templab))
  for(it in 1:length(nameslist)){
    valuetab <- table(data[,match(templab[it],names(data))])
    nameslist[[it]] <- paste(templab[it],names(valuetab),sep="")
  }
  names(nameslist) <- templab


  data <- rform$data								#the data set


  p <- rform$m								#number of covariates
  if (p > 0) 	{	#if covariates

    data$Xs <- my.strata(rform$X[,,drop=F],nameslist=nameslist)				#make strata according to covariates
    #data$Xs <- factor(data$Xs,levels=nameslist)				#order them in the same way as namelist
  }
  else data$Xs <- rep(1, nrow(data))						#if no covariates, just put 1

  if(!missing(times)) tis <- times
  else tis <- unique(sort(floor(rform$Y/365.241)))			#unique years of follow-up
  tis <- unique(c(0,tis))
  tisd <- tis*365.241


  out <- NULL
  out$n <- table(data$Xs)							#table of strata
  out$sp <- out$strata <- NULL
  # for (kt in 1:length(out$n)) {						#for each stratum
  for (kt in order(names(table(data$Xs)))) {						#for each stratum
    inx <- which(data$Xs == names(out$n)[kt])				#individuals within this stratum

    temp <- exp.prep(rform$R[inx,,drop=FALSE],rform$Y[inx],rform$ratetable,rform$status[inx],times=tisd,fast=FALSE)	#calculate the values for each interval of time

    out$time <- c(out$time, tisd)						#add times
    out$sp <- c(out$sp, temp$sis)						#add expected number of individuals alive
    out$strata <- c(out$strata, length(tis))				#number of times in this strata

    temp <- exp.prep(rform$R[inx,,drop=FALSE],rform$Y[inx],rform$ratetable,rform$status[inx],times=(seq(0,100,by=.5)*365.241)[-1],fast=FALSE)	#calculate the values for each interval of time
    out$povp <- c(out$povp,mean(temp$sit/365.241))
  }

  names(out$strata) <- names(out$n)[order(names(table(data$Xs)))]
  if (p == 0) out$strata <- NULL						#if no covariates

  mata <- matrix(out$sp,ncol=length(tis),byrow=TRUE)
  mata <- data.frame(mata)
  mata <- cbind(mata,out$povp)
  row.names(mata) <- names(out$n)[order(names(table(data$Xs)))]
  names(mata) <- c(tis,"c.exp.surv")


  cat("\n")
  print(round(mata,1))
  cat("\n")

  out$mata <- mata
  out$n <- as.vector(out$n)
  class(out) <- "nessie"
  invisible(out)
}







rs.period <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,
                       na.action, fin.date, method = "pohar-perme", conf.type = "log",
                       conf.int = 0.95,type="kaplan-meier",winst,winfin,diag.date,rmap)

  #formula: for example Surv(time,cens)~sex
  #data: the observed data set
  #ratetable: the population mortality tables
  #conf.type: confidence interval calculation (plain, log or log-log)
  #conf.int: confidence interval
  #winst: start of the period window (inclusive)
  #winfin: end of the period window (inclusive)

{

  call <- match.call()
  if (!missing(rmap)) {
    rmap <- substitute(rmap)
  }
  rform <- rformulate(formula, data, ratetable, na.action,rmap)			#get the data ready
  data <- rform$data								#the data set
  type <- match.arg(type, c("kaplan-meier", "fleming-harrington"))		#method of hazard -> survival scale transformation
  type <- match(type, c("kaplan-meier", "fleming-harrington"))
  method <- match.arg(method,c("pohar-perme", "ederer2", "hakulinen","ederer1"))	#method of relative surv. curve estimation
  method <- match(method,c("pohar-perme", "ederer2", "hakulinen","ederer1"))
  conf.type <- match.arg(conf.type,c("plain","log","log-log"))		#conf. interval type

  #machinations needed for period survival:
  R <- rform$R
  coll <- match("year", attributes(ratetable)$dimid)
  year <- R[, coll]							#calendar year in the data

  ys <- as.numeric(winst - year)
  yf <- as.numeric(winfin - year)

  relv <- which(ys <= rform$Y & yf>0)					#relevant individuals -> live up to the period window and were diagnosed before window end
  centhem <- which(yf < rform$Y)					#censor these - their event happens outside of the period window

  rform$status[centhem] <- 0
  rform$Y[centhem] <- yf[centhem]

  rform$Y <- rform$Y[relv]
  rform$X <- rform$X[relv,,drop=F]
  rform$R <- rform$R[relv,,drop=F]
  rform$status <- rform$status[relv]
  data <- data[relv,,drop=F]
  ys <- ys[relv]
  yf <- yf[relv]
  year <- year[relv]

  if (method == 3) {								#need potential follow-up time for Hak. method
    if (missing(fin.date))
      fin.date <- max(rform$Y + year)					#final date for everybody set to the last day observed
    Y2 <- rform$Y							#change into potential follow-up time
    if (length(fin.date) == 1) 						#if final date equal for everyone
      Y2[rform$status == 1] <- fin.date - year[rform$status == 1]#set pot.time for those that died (equal to censoring time for others)
    else if (length(fin.date[relv]) == nrow(rform$R)) 	{
      fin.date <- fin.date[relv]
      Y2[rform$status == 1] <- fin.date[rform$status ==
                                          1] - year[rform$status == 1]
    }
    else stop("fin.date must be either one value of a vector of the same length as the data")
    status2 <- rep(0, nrow(rform$X))						#stat2=0 for everyone
  }
  p <- rform$m								#number of covariates
  if (p > 0) 									#if covariates
    data$Xs <- strata(rform$X[, ,drop=FALSE ])				#make strata according to covariates
  else data$Xs <- rep(1, nrow(data))						#if no covariates, just put 1

  se.fac <- sqrt(qchisq(conf.int, 1))						#factor needed for confidence interval
  out <- NULL
  out$n <- table(data$Xs)							#table of strata
  out$time <- out$n.risk <- out$n.event <- out$n.censor <- out$surv <- out$std.err <- out$strata <- NULL
  for (kt in 1:length(out$n)) {						#for each stratum
    inx <- which(data$Xs == names(out$n)[kt])				#individuals within this stratum

    tis <- sort(unique(rform$Y[inx]))					#unique times
    if(method==3)tis <- sort(unique(pmin(max(tis),c(tis,Y2[inx]))))				#add potential times in case of Hakulinen

    ys <- pmax(ys,0)
    #tis <- sort(unique(c(tis,ys[ys>0]-1,ys[ys>0])))
    tis <- sort(unique(c(tis,ys[ys>0])))
    tis <- sort(unique(c(tis,tis-1,tis+1)))					#the day after exiting, the day before entering
    tis <- tis[-length(tis)]							#exclude the largest since it is beyond observation time (1 day later)

    temp <- exp.prep(rform$R[inx,,drop=FALSE],rform$Y[inx],rform$ratetable,rform$status[inx],times=tis,fast=(method<3),ys=ys)	#calculate the values for each interval of time

    out$time <- c(out$time, tis)						#add times
    out$n.risk <- c(out$n.risk, temp$yi)					#add number at risk for each time
    out$n.event <- c(out$n.event, temp$dni)					#add number of events for each time
    out$n.censor <- c(out$n.censor,  c(-diff(temp$yi),temp$yi[length(temp$yi)]) - temp$dni) 	#add number of censored for each time

    if(method==1){ 								#pohar perme method
      haz <- temp$dnisi/temp$yisi - temp$yidlisi/temp$yisi			#cumulative hazard increment on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dnisisq/(temp$yisi)^2)))  #standard error on each interval
    }
    else if(method==2){							#ederer2 method
      haz <- temp$dni/temp$yi - temp$yidli/temp$yi			#cumulative hazard increment on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    else if(method==3){							#Hakulinen method
      temp2 <- exp.prep(rform$R[inx,,drop=FALSE],Y2[inx],rform$ratetable,status2[inx],times=tis,ys=ys)	#calculate the values for each interval of time
      popsur <- exp(-cumsum(temp2$yisidli/temp2$yisis))			#population survival
      haz <- temp$dni/temp$yi						#observed hazard on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    else if(method==4){							#Ederer I
      popsur <- temp$sis/length(inx)					#population survival
      haz <- temp$dni/temp$yi						#observed hazard on each interval
      out$std.err <- c(out$std.err, sqrt(cumsum(temp$dni/(temp$yi)^2)))  #standard error on each interval
    }
    if(type==2)survtemp <- exp(-cumsum(haz))
    else survtemp <-  cumprod(1-haz)
    if(method>2){
      survtemp <- survtemp/popsur
    }
    out$surv <- c(out$surv,survtemp)
    out$strata <- c(out$strata, length(tis))				#number of times in this strata
  }
  if (conf.type == "plain") {
    out$lower <- as.vector(out$surv - out$std.err * se.fac * 		#surv + fac*se
                             out$surv)
    out$upper <- as.vector(out$surv + out$std.err * se.fac *
                             out$surv)
  }
  else if (conf.type == "log") {						#on log scale and back
    out$lower <- exp(as.vector(log(out$surv) - out$std.err *
                                 se.fac))
    out$upper <- exp(as.vector(log(out$surv) + out$std.err *
                                 se.fac))
  }
  else if (conf.type == "log-log") {						#on log-log scale and back
    out$lower <- exp(-exp(as.vector(log(-log(out$surv)) -
                                      out$std.err * se.fac/log(out$surv))))
    out$upper <- exp(-exp(as.vector(log(-log(out$surv)) +
                                      out$std.err * se.fac/log(out$surv))))
  }
  names(out$strata) <- names(out$n)
  if (p == 0) out$strata <- NULL						#if no covariates
  out$n <- as.vector(out$n)
  out$conf.type <- conf.type
  out$conf.int <- conf.int
  out$method <- method
  out$call <- call
  out$type <- "right"
  class(out) <- c("survfit", "rs.surv")
  out
}



#' expprep2 function
#'
#' Helper calculation function using C code. Saved also as exp.prep (unexported
#' function).
#'
#' Helper function used in rs.surv and other relsurv functions.
#'
#' @param x matrix of demographic covariates - each individual has one line
#' @param y follow-up time for each individual (same length as nrow(x))
#' @param ratetable rate table used for calculation
#' @param status status for each individual (same length as nrow(x)!), not
#' needed if we only need Spi, status needed for rs.surv
#' @param times times at which we wish to evaluate the quantities, not needed
#' if we only need Spi, times needed for rs.surv
#' @param fast for mpp method only
#' @param ys entry times (if empty, individuals are followed from time 0)
#' @param prec deprecated
#' @param cmp should cmpfast.C be used
#' @param netweiDM should new netwei script be used
#' @return List containing the calculated hazards and probabilities using the
#' population mortality tables.
#' @seealso rs.surv
#' @keywords survival
#' @export expprep2
expprep2 <- function (x, y,ratetable,status,times,fast=FALSE,ys,prec,cmp=F,netweiDM=FALSE) {			#function that prepares the data for C function call

  #x= matrix of demographic covariates - each individual has one line
  #y= follow-up time for each individual (same length as nrow(x)!)
  #ratetable= rate table used for calculation
  #status= status for each individual (same length as nrow(x)!), not needed if we only need Spi, status needed for rs.surv
  #times= times at which we wish to evaluate the quantities, not needed if we only need Spi, times needed for rs.surv
  #fast=for mpp method only
  #netweiDM=should new netwei script be used

  x <- as.matrix(x)
  if (ncol(x) != length(dim(ratetable)))
    stop("x matrix does not match the rate table")
  atts <- attributes(ratetable)

  cuts <- atts$cutpoints

  if (is.null(atts$type)) {
    rfac <- atts$factor
    us.special <- (rfac > 1)
  }
  else {
    rfac <- 1 * (atts$type == 1)
    us.special <- (atts$type == 4)
  }
  if (length(rfac) != ncol(x))
    stop("Wrong length for rfac")


  if (any(us.special)) {
    if (sum(us.special) > 1)
      stop("Two columns marked for special handling as a US rate table")
    cols <- match(c("age", "year"), atts$dimid)
    if (any(is.na(cols)))
      stop("Ratetable does not have expected shape")
    if (exists("as.Date")) {
      bdate <- as.Date("1960/1/1") + (x[, cols[2]] - x[,
                                                       cols[1]])
      byear <- format(bdate, "%Y")
      offset <- as.numeric(bdate - as.Date(paste(byear,
                                                 "01/01", sep = "/")))
    }
    else if (exists("date.mdy")) {
      bdate <- as.date(x[, cols[2]] - x[, cols[1]])
      byear <- date.mdy(bdate)$year
      offset <- bdate - mdy.date(1, 1, byear)
    }
    else stop("Can't find an appropriate date class\n")
    x[, cols[2]] <- x[, cols[2]] - offset
    if (any(rfac > 1)) {
      temp <- which(us.special)
      nyear <- length(cuts[[temp]])
      nint <- rfac[temp]
      cuts[[temp]] <- round(approx(nint * (1:nyear), cuts[[temp]],
                                   nint:(nint * nyear))$y - 1e-04)
    }
  }

  if(!missing(status)){		#the function was called from rs.surv
    if(length(status)!=nrow(x))    stop("Wrong length for status")

    if(missing(times))    times <- sort(unique(y))

    if (any(times < 0))
      stop("Negative time point requested")
    ntime <- length(times)
    if(missing(ys)) ys <- rep(0,length(y))
    #    times2 <- times
    #    times2[1] <- preci
    if(cmp)   temp <- .Call("cmpfast",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                            as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                            x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else if(fast&!missing(prec))    temp <- .Call("netfastpinter2",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                                                  as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                                  x, y, ys,as.integer(status), times,prec,PACKAGE="relsurv")
    else if(fast&missing(prec))    temp <- .Call("netfastpinter",  as.integer(rfac), 		#fast=pohar-perme or ederer2 - data from pop. tables only while under follow-up
                                                 as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                                 x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else if(netweiDM==TRUE)    temp <- .Call("netweiDM",  as.integer(rfac),
                                             as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                                             x, y, ys,as.integer(status), times,PACKAGE="relsurv")
    else    temp <- .Call("netwei",  as.integer(rfac),
                          as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                          x, y, as.integer(status), times,PACKAGE="relsurv")
  }
  else{				#only expected survival at time y is needed for each individual
    if(length(y)==1)y <- rep(y,nrow(x))
    if(length(y)!=nrow(x)) stop("Wrong length for status")
    temp <- .Call("expc",  as.integer(rfac),
                  as.integer(atts$dim), as.double(unlist(cuts)), ratetable,
                  x, y,PACKAGE="relsurv")
    temp  <- temp$surv
  }
  temp
}

Try the relsurv package in your browser

Any scripts or data that you put into this service are public.

relsurv documentation built on Dec. 28, 2022, 2:25 a.m.