Nothing
joinpoint.conditional <- function(fit.uncond, start.intervals, end.intervals, njp=NULL) {
check_fit.uncond(fit.uncond)
check_intervals(start.intervals, end.intervals)
check_njp(njp, fit.uncond)
ret <- jp.cond.main(fit.uncond, start.intervals, end.intervals, njp)
ret
}
jp.cond.main <- function(fit, start.ints, end.ints, njp) {
# First save vectors
interval <- fit$interval
year <- fit$year
number.alive <- fit$number.alive
number.event <- fit$number.event
number.loss <- fit$number.loss
# Get the correct fit
if (length(njp)) fit <- fit$FitList[[njp+1]]
# Put vectors in fitted object
fit$interval <- interval
fit$year <- year
fit$number.alive <- number.alive
fit$number.event <- number.event
fit$number.loss <- number.loss
int.col <- fit$interval
year.col <- fit$year
df <- fit$fullpredicted
intervals <- getDataVec(df, int.col)
uints <- unique(intervals)
tmp <- (start.ints %in% uints) & (end.ints %in% uints)
start.ints <- start.ints[tmp]
end.ints <- end.ints[tmp]
if (!length(start.ints)) stop("ERROR: intervals are not valid")
# Get unique intervals
tmp <- unique(cbind(start.ints, end.ints))
start.ints <- tmp[, 1, drop=TRUE]
end.ints <- tmp[, 2, drop=TRUE]
# Data should already be ordered
df <- orderDataByIntYear(df, year.col, int.col)
# Set missing alive, dead, loss to previous
df <- jp.cond.setMiss(df, fit)
# New column in output for starting interval
fit$interval.start <- "Start.interval"
# Compute conditional estimates and standard errors for each start/end in all years
intervals <- getDataVec(df, int.col)
years <- getDataVec(df, year.col)
uyears <- unique(years)
nyears <- length(uyears)
ret <- NULL
for (i in 1:nyears) {
tmp <- years %in% uyears[i]
x <- jp.cond.getEstSE.1yr(df[tmp, , drop=FALSE], start.ints, end.ints, fit)
if (!length(ret)) {
ret <- x
} else {
ret <- rbind(ret, x)
}
}
ret <- jp.cond.setReturn(ret, start.ints, end.ints, fit)
ret
}
jp.cond.setReturn <- function(ret, startvec, endvec, obj) {
n <- nrow(ret)
st.int.col <- obj$interval.start
avec <- ret[, st.int.col, drop=TRUE]
runvec <- rep(Inf, n)
ustart <- sort(unique(startvec))
for (i in 1:length(ustart)) {
tmp <- avec %in% ustart[i]
if (any(tmp)) runvec[tmp] <- i
}
ord <- order(runvec)
ret <- ret[ord, , drop=FALSE]
ret
}
jp.cond.getEstSE.1yr <- function(x, startvec, endvec, obj) {
ustartvec <- unique(startvec)
nstart <- length(ustartvec)
int.col <- obj$interval
st.int.col <- obj$interval.start
intervals <- getDataVec(x, int.col)
rownames(x) <- as.character(intervals)
x0 <- x
ret <- NULL
# Compute conditional probs for all possible columns in x
#vv <- c("pred_int", "pred_cum",
# "Expected_Survival_Interval", "Expected_Survival_Cum",
# "Observed_Survival_Interval", "Observed_Survival_Cum",
# "Relative_Survival_Interval", "Relative_Survival_Cum")
#tmp <- vv %in% colnames(x)
#vv <- vv[tmp]
estv1 <- "pred_int"
estv2 <- "pred_cum"
#estv3 <- "Expected_Survival_Interval"
for (i in 1:nstart) {
x <- x0
t0 <- ustartvec[i]
tmp <- startvec %in% t0
m <- sum(tmp)
if (!m) next
end2 <- endvec[tmp]
tmp <- end2 %in% intervals
end2 <- end2[tmp]
m <- length(end2)
if (!m) next
t1 <- max(end2)
# Compute estimates
tmp <- (intervals >= t0) & (intervals <= t1)
if (!any(tmp)) stop("INTERNAL CODING ERROR 1")
x <- x[tmp, , drop=FALSE]
ct0 <- as.character(t0)
beta_int0 <- x[ct0, estv1, drop=TRUE]
beta_cum0 <- x[ct0, estv2, drop=TRUE]
#beta_exp0 <- x[ct0, estv3, drop=TRUE]
x[, estv1] <- jp.checkProbVec(x[, estv1]/beta_int0)
x[, estv2] <- jp.checkProbVec(x[, estv2]/beta_cum0)
#x[, estv3] <- jp.checkProbVec(x[, estv3]/beta_exp0)
# Compute standard errors
x <- cond_se(x, obj)
# Subset by the intervals we want
tmp <- getDataVec(x, int.col) > t0
tmp[is.na(tmp)] <- FALSE
x <- x[tmp, , drop=FALSE]
if (!nrow(x)) stop("INTERNAL CODING ERROR 2")
# Order by interval, year is constant
tmp <- order(getDataVec(x, int.col))
x <- x[tmp, , drop=FALSE]
# Add starting interval
x[, st.int.col] <- t0
if (!length(ret)) {
ret <- x
} else {
ret <- rbind(ret, x)
}
}
ret
}
jp.checkProbVec <- function(vec) {
tmp <- vec > 1
tmp[is.na(tmp)] <- FALSE
if (any(tmp)) vec[tmp] <- 1
vec
}
jp.cond.setMiss <- function(df, obj) {
# df must be sorted by year and interval
cola <- obj$number.alive
cold <- obj$number.event
coll <- obj$number.loss
alive <- getDataVec(df, cola)
died <- getDataVec(df, cold)
lost <- getDataVec(df, coll)
intervals <- getDataVec(df, obj$interval)
years <- getDataVec(df, obj$year)
# See if there is missing data
tmp1 <- !is.finite(alive)
tmp2 <- !is.finite(died)
tmp3 <- !is.finite(lost)
tmp <- tmp1 | tmp2 | tmp3
if (!any(tmp)) return(df)
# Start walking through the data
uyears <- unique(years)
nr <- nrow(df)
vec <- 1:nr
imp.rows <- vec[tmp]
if (imp.rows[1] < 2) return(df) # Need data for at least the first row
for (row in imp.rows) {
flag1 <- tmp1[row]
flag2 <- tmp2[row]
flag3 <- tmp3[row]
year <- years[row]
int <- intervals[row]
row0 <- row - 1
year0 <- years[row0]
int0 <- intervals[row0]
row.imp <- 0
# First check if the previous row (row0) is the same year as current row
if (year0 == year) {
# Use this row for imputed values. Since data is sorted, it will be previous interval.
row.imp <- row0
} else {
# Use row from year0
tmp0 <- years == year0
# Get the ideal interval from previous year
use.int <- year - year0 + int
tmp <- tmp0 & (intervals <= use.int)
if (!any(tmp)) next
rows <- vec[tmp]
row.imp <- max(rows)
}
if (row.imp) {
if (flag1) alive[row] <- alive[row.imp]
if (flag2) died[row] <- died[row.imp]
if (flag3) lost[row] <- lost[row.imp]
}
}
df[, cola] <- alive
df[, cold] <- died
df[, coll] <- lost
df
}
jp.set.miss.to.last <- function(vec) {
# vec is ordered
tmp <- !is.finite(vec)
if (!any(tmp) || all(tmp)) return(vec)
# Check for consecutive indices
inds <- 1:length(vec)
ok <- inds[!tmp]
if (all(ok == 1:length(ok))) {
ind <- max(ok)
vec[tmp] <- vec[ind]
} else {
ivec <- inds[tmp]
ivec <- ivec[ivec > 1]
if (!length(ivec)) return(vec)
for (i in ivec) vec[i] <- vec[i-1]
}
vec
}
joinpoint.cond <- function(data, subset, start.interval, end.interval=NULL,
year="Year", interval="Interval", number.event="Died",
number.alive="Alive_at_Start", number.loss="Lost_to_Followup",
expected.rate="Expected_Survival_Interval", model.form=NULL,
maxnum.jp=0, proj.year.num=5, op=list(), delLastIntvl=FALSE,
add.data.cols="_ALL_") {
# Check for errors
observedrelsurv <- NULL
check_dataframe(data)
data.cols <- colnames(data)
check_subset(subset, nrow(data))
check_integer(start.interval, "start.interval", min=1)
check_dataVar(data, year, "year")
check_dataVar(data, interval, "interval")
check_dataVar(data, number.event, "number.event")
check_dataVar(data, number.alive, "number.alive")
check_dataVar(data, number.loss, "number.loss")
check_dataVar(data, expected.rate, "expected.rate", allow.miss=1)
check_formula(model.form, data.cols)
check_integer(maxnum.jp, "maxnum.jp", valid=0:10)
check_integer(proj.year.num, "proj.year.num", valid=0:30)
check_logical(delLastIntvl, "delLastIntvl")
op <- check_op(op)
end.interval <- check_end.interval(end.interval, start.interval, data, interval)
add.data.cols <- check_add.data.cols(add.data.cols)
# Get the correct subset of data
data <- getDataForJoinpoint(data, NULL, subset, start.interval, year, op)
objlist <- list(year=year, interval=interval, number.event=number.event,
number.alive=number.alive, number.loss=number.loss, expected.rate=expected.rate,
observedrelsurv=observedrelsurv, model.form=model.form, maxnum.jp=maxnum.jp,
proj.year.num=proj.year.num, start.interval=start.interval, end.interval=end.interval,
op=op, delLastIntvl=delLastIntvl, add.data.cols=add.data.cols)
checkDataObjects(data, objlist)
ret <- joinpoint.cond_main(data, objlist)
ret
}
joinpoint.cond_main <- function(data, objlist) {
DEBUG <- objlist$op$DEBUG
if (DEBUG) cat("Begin: joinpoint.cond_main\n")
data <- applyStartToData(data, objlist)
# Data is now set up for original joinpoint function
ret <- joinpoint(data, subset=NULL, year=objlist$year, interval=objlist$interval,
number.event=objlist$number.event, number.alive=objlist$number.alive,
number.loss=objlist$number.loss, expected.rate=objlist$expected.rate,
observedrelsurv=objlist[["observedrelsurv", exact=TRUE]],
model.form=objlist[["model.form", exact=TRUE]], maxnum.jp=objlist$maxnum.jp,
proj.year.num=objlist$proj.year.num, op=objlist$op, delLastIntvl=objlist$delLastIntvl,
add.data.cols=objlist[["add.data.cols", exact=TRUE]])
# Update full and predicted matrices for best fit and for each fit in FitList
ret <- computeCondSurv.fit(ret, objlist)
FitList <- ret[["FitList", exact=TRUE]]
n <- length(FitList)
if (n) {
for (i in 1:n) {
fit <- FitList[[i]]
FitList[[i]] <- computeCondSurv.fit(fit, objlist)
}
ret$FitList <- FitList
}
# Add interval number to ret to be able to shift intervals later. See email
# on 2024-08-28 and 2024-10-01 related to error with aapc.multiints.
# For relax.prop function, start.interval is set to int in objlist
int <- objlist$start.interval
ret$shift.interval <- int
lst <- ret[["FitList", exact=TRUE]]
n <- length(lst)
if (n && is.list(lst)) {
for (i in 1:n) lst[[i]]$shift.interval <- int
ret$FitList <- lst
}
if (DEBUG) cat("End: jointpoint.cond_main\n")
ret
}
updateIntervalVec <- function(ret, objlist) {
if (!is.list(ret)) return(ret)
add <- objlist[["start.interval", exact=TRUE]]
if (!length(add)) stop("ERROR 1")
vec <- ret[["Interval", exact=TRUE]]
if (is.numeric(vec)) ret$Interval <- vec + add
FitList <- ret[["FitList", exact=TRUE]]
if (!is.list(FitList)) return(ret)
n <- length(FitList)
if (!n) return(ret)
for (i in 1:n) {
tmp <- FitList[[i]]
if (!is.list(tmp)) next
vec <- tmp[["Interval", exact=TRUE]]
if (is.numeric(vec)) {
tmp$Interval <- vec + add
FitList[[i]] <- tmp
}
}
ret$FitList <- FitList
ret
}
computeCondSurv.fit <- function(cox_fit, objlist) {
# Previously, we removed rows of data with interval <= start, so now
# interval 1 is from start - <start+1 years
DEBUG <- objlist$op$DEBUG
if (DEBUG) cat("Begin: computeCondSurv.fit\n")
int.var <- objlist$interval
year.var <- objlist$year
start <- objlist$start.interval
end <- objlist$end.interval
pred <- cox_fit$predicted
full <- cox_fit$fullpredicted
# Add start to interval
pred[, int.var] <- pred[, int.var] + start
full[, int.var] <- full[, int.var] + start
start.cond <- start + 1
# Subset data by start to end
tmp <- pred[, int.var] %in% (start):(end)
pred <- pred[tmp, , drop=FALSE]
if (!nrow(pred)) stop("ERROR 1")
tmp <- full[, int.var] %in% (start):(end)
full <- full[tmp, , drop=FALSE]
if (!nrow(full)) stop("ERROR 2")
# Get row ids for predicted and predictedfull
pred.ids <- paste(pred[, year.var], pred[, int.var], sep=":")
full.ids <- paste(full[, year.var], full[, int.var], sep=":")
# Subset
full <- as.data.frame(full, stringsAsFactors=FALSE, check.names=FALSE)
ints <- full[, int.var]
full[, "label"] <- paste0("P(T>", ints, "|T>", ints-1, ")")
tmp <- full.ids %in% pred.ids
pred <- full[tmp, , drop=FALSE]
cox_fit$predicted <- pred
cox_fit$fullpredicted <- full
if (DEBUG) cat("End: computeCondSurv.fit\n")
cox_fit
}
cond_se <- function(df, objlist) {
# Data should already be ordered by year and interval
alive <- df[, objlist$number.alive, drop=TRUE]
died <- df[, objlist$number.event, drop=TRUE]
lost <- df[, objlist$number.loss, drop=TRUE]
years <- df[, objlist$year, drop=TRUE]
pred_int <- df[, "pred_int", drop=TRUE]
pred_cum <- df[, "pred_cum", drop=TRUE]
uyears <- unique(years)
nyears <- length(uyears)
se.int <- rep(NA, nrow(df))
se.cum <- se.int
for (i in 1:nyears) {
tmp <- years %in% uyears[i]
d2 <- died[tmp]
a2 <- alive[tmp]
l2 <- lost[tmp]
se.int[tmp] <- cond_se_1year.int(pred_int[tmp], d2, a2, l2)
se.cum[tmp] <- cond_se_1year(pred_cum[tmp], d2, a2, l2)
}
df[, "pred_int_se"] <- se.int
df[, "pred_cum_se"] <- se.cum
df
}
cond_se_1year <- function(cond.est, died, alive, lost) {
effss <- alive - lost/2
mhat <- died/effss
oneMmhat <- 1 - mhat
surv <- cumprod(oneMmhat)
probDie <- 1 - surv
vec <- probDie/(effss - died)
csumvec <- cumsum(vec)
se <- cond.est*sqrt(csumvec)
se
}
cond_se_1year.int <- function(cond.est, died, alive, lost) {
effss <- alive - lost/2
vec <- died/(effss*(effss - died))
csumvec <- cumsum(vec)
se <- cond.est*sqrt(csumvec)
se
}
computeCondSurv.fit0 <- function(cox_fit, objlist) {
# Want to compute, for example, P(T>10 | T > 5) = S(10)/S(5)
# Previously, we removed rows of data with interval <= start, so now
# interval 1 is from start - <start+1 years
DEBUG <- objlist$op$DEBUG
if (DEBUG) cat("Begin: computeCondSurv.fit\n")
int.var <- objlist$interval
year.var <- objlist$year
start <- objlist$start.interval
end <- objlist$end.interval
pred <- cox_fit$predicted
full <- cox_fit$fullpredicted
Interval <- cox_fit[["Interval", exact=TRUE]]
iZ0 <- cox_fit[["iZ0", exact=TRUE]]
# Add start to interval
pred[, int.var] <- pred[, int.var] + start
full[, int.var] <- full[, int.var] + start
start.cond <- start + 1
# Subset data by start+1 to end
tmp <- pred[, int.var] %in% (start+1):(end)
pred <- pred[tmp, , drop=FALSE]
if (!nrow(pred)) stop("ERROR 1")
tmp <- full[, int.var] %in% (start+1):(end)
full <- full[tmp, , drop=FALSE]
if (!nrow(full)) stop("ERROR 2")
# Get row ids for predicted and predictedfull
pred.ids <- paste(pred[, year.var], pred[, int.var], sep=":")
full.ids <- paste(full[, year.var], full[, int.var], sep=":")
# pred and full are ordered by year and interval
# loop over each row of full and compute covar matrix for each year
nr <- nrow(full)
year0 <- -1
int0 <- -1
cond.int <- rep(NA, nr)
cond.int.se <- rep(NA, nr)
cond.cum <- rep(NA, nr)
cond.cum.se <- rep(NA, nr)
for (i in 1:nr) {
condSurv.int <- NA
condSurv.int.se <- NA
condSurv.cum <- NA
condSurv.cum.se <- NA
year <- full[i, year.var]
int <- full[i, int.var]
S_predInt <- full[i, "pred_int"]
S_predCum <- full[i, "pred_cum"]
if (year != year0) {
# Compute covar matrix for S(t_j)
tmp <- calcCovPred(cox_fit, year, Interval, iZ0)
cov.int <- tmp$cov.int
cov.cum <- tmp$cov.cum
# Initialize
S0_predInt <- NA
S0_predCum <- NA
}
if (int == start.cond) {
# save survival prob P(T > start)
S0_predInt <- S_predInt
S0_predCum <- S_predCum
} else {
condSurv.int <- S_predInt/S0_predInt
condSurv.cum <- S_predCum/S0_predCum
# Get subset of covariance matrix
cols <- c(start.cond, int) - start
if (!all(cols %in% 1:ncol(cov.int))) stop("ERROR with column numbers")
# Compute variance by delta method
condSurv.int.se <- sqrt(calcCondSurvVariance(S_predInt, S0_predInt, cov.int[cols, cols]))
condSurv.cum.se <- sqrt(calcCondSurvVariance(S_predCum, S0_predCum, cov.cum[cols, cols]))
}
cond.int[i] <- condSurv.int
cond.int.se[i] <- condSurv.int.se
cond.cum[i] <- condSurv.cum
cond.cum.se[i] <- condSurv.cum.se
year0 <- year
int0 <- int
}
# cond surv should be less than 1
tmp <- cond.int > 1
tmp[is.na(tmp)] <- FALSE
if (any(tmp)) cond.int[tmp] <- 1
tmp <- cond.cum > 1
tmp[is.na(tmp)] <- FALSE
if (any(tmp)) cond.cum[tmp] <- 1
full[, "pred_int"] <- cond.int
full[, "pred_int_se"] <- cond.int.se
full[, "pred_cum"] <- cond.cum
full[, "pred_cum_se"] <- cond.cum.se
# Subset
tmp <- full[, int.var] != start.cond
full <- full[tmp, , drop=FALSE]
full.ids <- full.ids[tmp]
full <- as.data.frame(full, stringsAsFactors=FALSE, check.names=FALSE)
ints <- full[, int.var]
full[, "label"] <- paste0("P(T>", ints-1, "|T>", start, ")")
full[, int.var] <- full[, int.var] - 1
tmp <- full.ids %in% pred.ids
pred <- full[tmp, , drop=FALSE]
cox_fit$predicted <- pred
cox_fit$fullpredicted <- full
if (DEBUG) cat("End: computeCondSurv.fit\n")
cox_fit
}
getOption <- function(op, nm, retIfMiss=NULL) {
if (!is.list(op)) return(retIfMiss)
if (nm %in% names(op)) {
ret <- op[[nm, exact=TRUE]]
} else {
ret <- retIfMiss
}
ret
}
calcCondSurvVariance <- function(S_jplusk, S_j, cov) {
# Multivariate delta method to compute variance of S(t_{j+k} | x)/S(t_j | x)
gradient <- c(1/S_j, -S_jplusk/(S_j*S_j))
dim(gradient) <- c(2, 1)
ret <- t(gradient) %*% cov %*% gradient
dim(ret) <- NULL
ret
}
calcJacobPred <- function(cox_fit, year, Interval, iZ0) {
ret <- Predict(cox_fit, year, 0, Interval, cox_fit$jp, years=year, intervals=NULL,
beta_input=NULL, Z0=iZ0, gamma_input=NULL, ret.jacob=TRUE)
if (any(is.na(ret$jacob.int))) stop("ERROR 1 with Jacobian")
if (any(is.na(ret$jacob.cum))) stop("ERROR 2 with Jacobian")
ret
}
calcCovPred <- function(cox_fit, year, Interval, iZ0) {
# Compute covariance matrix for predicted survival probs for each interval
# Use deleta method: J %*% VCOV * t(J)
tmp <- calcJacobPred(cox_fit, year, Interval, iZ0)
jacob.int <- tmp$jacob.int
jacob.cum <- tmp$jacob.cum
cov <- cox_fit$covariance
ret.int <- jacob.int %*% cov %*% t(jacob.int)
ret.cum <- jacob.cum %*% cov %*% t(jacob.cum)
list(cov.int=ret.int, cov.cum=ret.cum)
}
checkDataObjects <- function(data, objlist) {
DEBUG <- objlist$op$DEBUG
if (DEBUG) cat("Begin: checkDataObjects\n")
start <- objlist[["start.interval", exact=TRUE]]
fup.var <- objlist$interval
vec <- getDataVec(data, fup.var)
maxint <- max(vec, na.rm=TRUE)
if (length(start) && (start >= maxint)) stop(paste0("ERROR: the value of start.interval cannot exceed ", maxint-1))
# Check for repeated year of diagnosis and follow up year
yr.var <- objlist$year
tmp <- duplicated(data[, c(yr.var, fup.var), drop=FALSE])
if (any(tmp)) {
#print(data[tmp, c(yr.var, fup.var), drop=FALSE])
stop("ERROR: data has repeated year/interval pairs")
}
# Check that for each year, intervals are from 1, 2, ....
ints <- getDataVec(data, fup.var)
yrs <- getDataVec(data, yr.var)
uyrs <- unique(yrs)
nyrs <- length(uyrs)
for (i in 1:nyrs) {
tmp <- yrs %in% uyrs[i]
uint <- unique(ints[tmp])
maxint <- max(uint, na.rm=TRUE)
if (!all(uint %in% 1:maxint)) {
stop(paste0("ERROR: incorrect intervals for year = ", uyrs[i]))
}
}
if (DEBUG) cat("End: checkDataObjects\n")
NULL
}
orderDataByIntYear <- function(data, yr.var, int.var) {
data <- data[order(getDataVec(data, int.var)), , drop=FALSE]
data <- data[order(getDataVec(data, yr.var)), , drop=FALSE]
data
}
applyStartToData <- function(data, objlist) {
start <- objlist$start.interval
fup.var <- objlist$interval
yr.var <- objlist$year
rate.var <- objlist$expected.rate
vec <- getDataVec(data, fup.var)
maxint <- max(vec, na.rm=TRUE)
if (start >= maxint) stop(paste0("ERROR: the value of start.interval cannot exceed ", maxint-1))
vec <- vec - start
data[, fup.var] <- vec
tmp <- vec >= 1
if (any(is.na(tmp))) stop("ERROR 1")
data <- data[tmp, , drop=FALSE]
nr <- nrow(data)
if (nr < 1) stop("ERROR: all rows of data removed after applying start.interval")
# Cumulative survival, compute year by year
# Order by year and interval
data <- orderDataByIntYear(data, yr.var, fup.var)
#years <- getDataVec(data, yr.var)
#uyears <- sort(unique(years))
#nyears <- length(uyears)
#cumvec <- rep(NA, nrow(data))
#for (i in 1:nyears) {
# tmp <- years == uyears[i]
# vec <- getDataVec(data[tmp, , drop=FALSE], rate.var)
# cumvec[tmp] <- cumprod(vec)
#}
data
}
subsetDataObj <- function(data, objlist) {
nms <- c("year", "interval", "number.event", "number.alive", "number.loss",
"expected.rate", "observedrelsurv")
nr0 <- nrow(data)
prnt <- objlist$op$print
keep <- rep(TRUE, nr0)
vars <- NULL
for (v in nms) {
var <- objlist[[v, exact=TRUE]]
if (is.null(var)) next
vec <- as.numeric(getDataVec(data, var))
tmp <- is.finite(vec)
keep <- keep & tmp
if (prnt) {
m <- sum(!tmp)
if (m) message(paste0(m, " rows removed due to missing values in ", var))
}
data[, var] <- vec
vars <- c(vars, var)
}
if (!all(keep)) data <- data[keep, , drop=FALSE]
if (nrow(data) < 1) stop("ERROR: no rows left in data after removing missing values")
form <- objlist[["model.form", exact=TRUE]]
allvars <- NULL
if (!is.null(form)) allvars <- all.vars(form)
allvars <- unique(c(allvars, vars))
data <- data[, allvars, drop=FALSE]
data
}
cond_modifyData <- function(data, obj.list) {
start <- obj.list$start.year.num
nms <- c("expected.rate", "observedrelsurv")
int.var <- obj.list$interval
vec <- getDataVec(data, int.var)
tmp <- vec <= start
tmp[is.na(tmp)] <- FALSE
if (any(tmp)) {
data <- data[tmp, , drop=FALSE]
}
data[, int.var] <- vec - start
ord <- order(data[, int.var])
data <- data[ord, , drop=FALSE]
ord <- order(data[, obj.list$year])
data <- data[ord, , drop=FALSE]
for (nm in nms) {
var <- obj.list[[nm, exact=TRUE]]
if (!length(var)) next
vec <- getDataVec(data, int.var)
data[, var] <- cumprod(vec)
}
max.year <- obj.list$max.year
tmp <- getDataVec(data, obj.list$year) <= max.year
tmp[is.na(tmp)] <- FALSE
data <- data[tmp, , drop=FALSE]
list(data=data, obj.list=obj.list)
}
getDataVec <- function(data, col) {
unlist(data[, col, drop=TRUE])
}
isString <- function(x) {
(length(x) == 1) && is.character(x)
}
isNumber <- function(x) {
(length(x) == 1) && is.numeric(x)
}
getDicFile <- function(data) {
ret <- paste0(data, ".dic")
if (!file.exists(ret)) ret <- paste0(ret, ".gz")
if (!file.exists(ret)) stop("ERROR")
ret
}
getDataFile <- function(data) {
ret <- paste0(data, ".txt")
if (!file.exists(ret)) ret <- paste0(ret, ".gz")
if (!file.exists(ret)) stop("ERROR")
ret
}
getDataCols <- function(data) {
if (is.data.frame(data)) {
ret <- colnames(data)
} else {
dicf <- getDicFile(data)
ret <- dic.getColnames(dicf)
}
ret
}
getDataNrows <- function(data) {
if (is.data.frame(data)) {
ret <- nrow(data)
} else {
datf <- getDataFile(data)
ret <- length(scan(datf, what="char", sep="\n", quiet=TRUE))
}
ret
}
subsetDataForJoinpoint <- function(data, subset, DEBUG) {
if (is.null(subset)) return(data)
if (is.character(subset)){
data <- try(subset(data, eval(parse(text=subset))), silent=TRUE)
if ("try-error" %in% class(data)) {
stop(paste0("ERROR applying subset=", subset, " to data"))
}
if (nrow(data) < 1) {
stop(paste0("ERROR: no rows of data remain after applying subset=", subset))
}
if (DEBUG) cat(paste0("After applying subset, nrow(data) = ", nrow(data), "\n"))
} else if (is.logical(subset)) {
data <- data[subset, , drop=FALSE]
}
data
}
getDataForJoinpoint <- function(data, dic.file, subset, start, year.var, op) {
DEBUG <- op$DEBUG
if (DEBUG) {
cat("Begin: getDataForJoinpoint\n")
cat(paste0("nrow(data) = ", nrow(data), "\n"))
}
# Apply subset
data <- subsetDataForJoinpoint(data, subset, DEBUG)
if (start <= 0) return(data)
# Get information about the data
#tmp <- cond.getFileInfo(dic.file)
#fup.year <- tmp$fup.year
#nint <- tmp$nint
#diag.mat <- tmp$diag.year.mat
# Check for option to get minimum number of follow up years
nfup.ints <- op[["nFupInts", exact=TRUE]]
if (is.null(nfup.ints)) nfup.ints <- 0
if (nfup.ints) {
stop("ERROR: this option is not implemented yet")
fup.year <- NULL
diag.mat <- NULL
# Determine the last year for diagnosis
max.year <- fup.year - nfup.ints
if (DEBUG) cat(paste0("Cut year of diagnosis at ", max.year, "\n"))
# first column of diag.mat is code, second column is year
tmp <- diag.mat[, 2] <= max.year
if (all(tmp)) return(data) # no need to subset
max.code <- max(diag.mat[tmp, 1])
if (DEBUG) cat(paste0("max.year=", max.year, ", max.code=", max.code, "\n"))
if (!is.finite(max.code)) stop("ERROR in dictionary file: year")
# Subset the data by max.year (or max.code). Try the code first
vec <- getDataVec(data, year.var)
tmp <- vec <= max.code
if (!any(tmp)) tmp <- vec <= max.year
if (!all(tmp)) data <- data[tmp, , drop=FALSE]
if (!nrow(data)) {
stop(paste0("ERROR: all rows of data have been removed.\nCheck the data and the value for start.interval."))
}
}
if (DEBUG) cat("End: getDataForJoinpoint\n")
data
}
#####################################################################
####################### Dictionary file #############################
#####################################################################
cond.getFileInfo <- function(dic.file) {
# dic.file can also be a list for the relaxed method
if (!isString(dic.file)) return(dic.file)
x <- dic.scanfile(dic.file)
fup.year <- dic.getFollowUpYear(x)
nint <- dic.getNumberOfInts(x)
delim <- dic.getDelimiter(x)
diagYears <- dic.getDiagYears(x)
list(fup.year=fup.year, nint=nint, delim=delim, diag.year.mat=diagYears)
}
cond.modify.dic.file <- function(dic.file, out, start.time=5) {
x <- dic.scanfile(dic.file)
fup.year <- dic.getFollowUpYear(x)
nint <- dic.getNumberOfInts(x)
delim <- dic.getDelimiter(x)
ret <- x
ret <- dic.changeNumOfInt(x, nint, nint - start.time)
ret <- dic.changeYearDiag(ret, fup.year - start.time)
ret <- dic.changeIntervals(ret, nint - start.time)
if (!is.null(out)) write(ret, file=out, ncolumns=1)
list(fup.year=fup.year, nint=nint, delim=delim)
}
dic.getColnames <- function(dic.file) {
x <- dic.scanfile(dic.file)
x <- dic.getChunk(x, chunk="[Life Page Variables]")
x <- parseDelimVec(x, "=", ncol=2, numeric=0)
ret <- x[, 2]
ret
}
dic.changeIntervals <- function(x, max.int) {
rng <- dic.getChunkRange(x, chunk="[Format=Interval]")
vec <- rng[1]:rng[2]
mat <- parseDelimVec(x[vec], "=", ncol=2, numeric=0)
tmp <- as.numeric(mat[, 1]) > max.int
tmp[is.na(tmp)] <- FALSE
if (!any(tmp)) return(x)
rem <- vec[tmp]
x <- x[-rem]
x
}
dic.changeYearDiag <- function(x, max.year) {
rng <- dic.getChunkRange(x, chunk="[Format=Year of diagnosis")
vec <- rng[1]:rng[2]
mat <- parseDelimVec(x[vec], "=", ncol=2, numeric=1)
tmp <- mat[, 2] > max.year
tmp[is.na(tmp)] <- FALSE
if (!any(tmp)) return(x)
rem <- vec[tmp]
x <- x[-rem]
x
}
dic.changeNumOfInt <- function(x, from, to) {
str1 <- paste0("NumberOfIntervals=", from)
str2 <- paste0("NumberOfIntervals=", to)
ii <- grep(str1, x, fixed=TRUE)
len <- length(ii)
if (!len) stop("ERROR 1")
if (len > 1) stop("ERROR 2")
x[ii] <- str2
x
}
dic.getNumberOfInts <- function(x) {
x2 <- dic.getChunk(x, chunk="[Session Options]")
ii <- dic.getLineNumber(x2, "NumberOfIntervals")
ret <- dic.getLineValue(x2[ii], sep="=")
ret <- as.numeric(ret)
if (!is.finite(ret)) stop("ERROR 1")
ret
}
dic.getDelimiter <- function(x) {
x2 <- dic.getChunk(x, chunk="[Export Options]")
ii <- dic.getLineNumber(x2, "Field delimiter")
ret <- dic.getLineValue(x2[ii], sep="=")
ret <- tolower(ret)
if (ret == "tab") {
ret <- "\t"
} else if (ret == "comma") {
ret <- ","
}
ret
}
dic.changeLineValue <- function(x, chunk, line, val.from, val.to) {
nx <- length(x)
tmp <- dic.getChunkRange(x, chunk=chunk)
i1 <- tmp[1]
i2 <- tmp[2]
lines <- dic.getLineNumbers(x, line)
tmp <- (lines >= i1) & (lines <= i2)
lines <- lines[tmp]
if (length(lines) != 1) stop("ERROR 1")
x[lines] <- gsub(val.from, val.to, x[lines], fixed=TRUE)
x
}
dic.getIntervals <- function(x) {
x2 <- dic.getChunk(x, chunk="[Format=Interval]")
x2 <- parseDelimVec(x2, "=", ncol=2, numeric=0)
x2 <- x2[, 2]
x2 <- gsub("yr", "", x2, fixed=TRUE)
x2 <- gsub("<", "", x2, fixed=TRUE)
x2 <- removeWhiteSpace(x2)
tmp <- grepl("-", x2[1], fixed=TRUE)
if (!tmp) x2[1] <- paste0("0-", x2[1])
mat <- parseDelimVec(x2, "0", ncol=2, numeric=1)
colnames(mat) <- c("start", "end")
mat
}
dic.getMaxInterval <- function(x) {
mat <- dic.getIntervals(x)
ret <- max(mat[, 2], na.rm=TRUE)
ret
}
dic.getDiagYears <- function(x) {
x2 <- try(dic.getChunk(x, chunk="[Format=Year of"), silent=TRUE)
if ("try-error" %in% class(x2)) {
x2 <- dic.getChunk(x, chunk="[Format=Diagnosis Year]")
}
x2 <- parseDelimVec(x2, "=", ncol=2, numeric=1)
x2
}
dic.getMaxDiagYear <- function(x) {
mat <- dic.getDiagYears(x)
ret <- max(mat[, 2], na.rm=TRUE)
ret
}
dic.getFollowUpYear <- function(x) {
x2 <- dic.getChunk(x, chunk="[Session Options]")
ii <- try(dic.getLineNumber(x2, "LostToFollowupDate"), silent=FALSE)
if ("try-error" %in% class(ii)) ii <- try(dic.getLineNumber(x2, "StudyCutoffDate"), silent=FALSE)
if ("try-error" %in% class(ii)) {
stop("ERROR: dictionary file is missing LostToFollowupDate and StudyCutoffDate")
}
str <- dic.getLineValue(x2[ii], sep="=")
vec <- getVecFromStr(str, delimiter="/")
len <- length(vec)
if (len > 1) vec <- vec[len]
ret <- as.numeric(vec)
if (!is.finite(ret) || !(ret %in% 1900:2050)) {
#print(vec)
stop("ERROR 2")
}
ret
}
dic.getLineValue <- function(line, sep="=") {
vec <- getVecFromStr(line, delimiter=sep)
if (length(vec) != 2) {
#print(line)
stop(paste0("ERROR parsing line"))
}
ret <- vec[2]
ret
}
dic.getLineNumber <- function(x, line) {
xl <- tolower(x)
ll <- tolower(line)
i <- grep(ll, xl, fixed=TRUE)
if (length(i) != 1) stop("ERROR 1")
if (!is.finite(i)) stop("ERROR 2")
i
}
dic.getLineNumbers <- function(x, line) {
xl <- tolower(x)
ll <- tolower(line)
ret <- grep(ll, xl, fixed=TRUE)
if (!length(ret)) stop("ERROR 1")
ret
}
dic.getChunkRange <- function(x, chunk="[Session Options]") {
# Chunks should begin with "["
xl <- tolower(x)
cl <- tolower(chunk)
nx <- length(x)
i1 <- dic.getLineNumber(xl, cl) + 1
i2 <- grep("[", xl, fixed=TRUE)
m <- length(i2)
if (!m) {
stop("ERROR 3")
} else if (m > 1) {
i2 <- i2 - 1
tmp <- i2 > i1
i2 <- i2[tmp]
len <- length(i2)
if (len >= 1) {
i2 <- i2[1]
} else if (!len) {
i2 <- nx
}
} else {
stop("ERROR 4")
}
if (i2 <= i1) stop("ERROR 5")
vec <- 1:nx
tmp <- (vec %in% i1:i2) & (nchar(x) > 0)
vec <- vec[tmp]
len <- length(vec)
if (len < 2) stop("ERROR 6")
i1 <- vec[1]
i2 <- vec[len]
ret <- c(i1, i2)
ret
}
dic.getChunk <- function(x, chunk="[Session Options]") {
range <- dic.getChunkRange(x, chunk=chunk)
ret <- x[range[1]:range[2]]
ret
}
dic.scanfile <- function(f) {
x <- scan(f, what="character", quiet=TRUE, sep="\n", blank.lines.skip=FALSE)
x <- removeWhiteSpace(x)
x
}
#####################################################################
####################### Utility #####################################
#####################################################################
default.list <- function(inList, names, default, error=NULL,
checkList=NULL) {
# inList List
# names Vector of names of items in inList
# default List of default values to assign if a name is not found
# The order of default must be the same as in names.
# error Vector of TRUE/FALSE if it is an error not to have the
# name in the list.
# The default is NULL
# checkList List of valid values for each name.
# Use NA to skip a list element.
# The default is NULL
n1 <- length(names)
n2 <- length(default)
if (n1 != n2) stop("ERROR: in calling default.list")
if (is.null(error)) {
error <- rep(0, times=n1)
} else if (n1 != length(error)) {
stop("ERROR: in calling default.list")
}
if (!is.null(checkList)) {
if (n1 != length(checkList)) stop("ERROR: in calling default.list")
checkFlag <- 1
} else {
checkFlag <- 0
}
if (is.null(inList)) inList <- list()
listNames <- names(inList)
for (i in 1:n1) {
if (!(names[i] %in% listNames)) {
if (!error[i]) {
inList[[names[i]]] <- default[[i]]
} else {
temp <- paste("ERROR: the name ", names[i], " was not found", sep="")
stop(temp)
}
} else if (checkFlag) {
temp <- checkList[[i]]
if (!all(is.na(temp))) {
if (!all(inList[[names[i]]] %in% checkList[[i]])) {
temp <- paste("ERROR: the name '", names[i],
"' has an invalid value", sep="")
stop(temp)
}
}
}
}
inList
}
removeWhiteSpace <- function(str, leading=1, trailing=1) {
if ((leading) && (trailing)) {
ret <- gsub("^\\s+|\\s+$", "", str, perl=TRUE)
} else if (leading) {
ret <- gsub("^\\s+", "", str, perl=TRUE)
} else if (trailing) {
ret <- gsub("\\s+$", "", str, perl=TRUE)
} else {
ret <- str
}
ret
}
getVecFromStr <- function(string, delimiter="|") {
# string String to break apart. No default
# delimiter Delimiter used in string. The default is "|".
strsplit(string, delimiter, fixed=TRUE)[[1]]
}
# Function to break up character vector
parseDelimVec0 <- function(vec, sep, ncol, numeric=0) {
mat <- unlist(strsplit(vec, sep, fixed=TRUE))
if (length(mat) != length(vec)*ncol) {
stop("ERROR: check ncol or if some elements of the vector are missing delimiters")
}
if (numeric) mat <- as.numeric(mat)
mat <- matrix(mat, byrow=TRUE, ncol=ncol)
return(mat)
mat
} # END: parseDelimVec0
parseDelimVec <- function(vec, sep, numeric=0, ncol=0) {
# Determine if sep is white space
lensep <- nchar(removeWhiteSpace(sep))
if (ncol) {
# Try efficient way
ret <- try(parseDelimVec0(vec, sep, ncol, numeric=numeric), silent=TRUE)
if (!("try-error" %in% class(ret))) return(ret)
}
if (!lensep) {
add <- "[?{"
addFlag <- 1
} else {
add <- " "
addFlag <- 0
}
add1Flag <- 0
N <- length(vec)
if (lensep) vec <- removeWhiteSpace(vec)
len <- nchar(vec)
if (lensep) {
miss <- !grepl(sep, vec, fixed=TRUE)
} else {
if (length(unique(len)) > 1) stop("ERROR: all elements of vec must have the same number of chars when nchar(sep)=0")
miss <- vec %in% ""
}
nmiss <- sum(miss)
if (nmiss == N) stop("ERROR: delimiter not found in vector")
# Is some elements contain data but not delimiter, then throw error
temp <- miss & (len > 0)
if (any(temp)) {
#print(vec[temp][1])
stop("ERROR: some vector elements are not missing but have no delimiter")
}
# Check str to add
if (addFlag) {
temp <- grepl(add, vec, fixed=TRUE)
if (any(temp)) stop("ERROR: this function cannot be used with your data")
}
if (lensep) {
temp <- substr(vec, 1, lensep) == sep
if (any(temp)) {
vec[temp] <- paste(add, vec[temp], sep="")
add1Flag <- 1
}
}
# Get the number of columns
str <- vec[!miss][1]
temp <- getVecFromStr(str, delimiter=sep)
ncol <- length(temp)
l0 <- nchar(str)
if (substr(str, l0-lensep+1, l0) == sep) ncol <- ncol + 1
if (ncol < 2) stop("ERROR: in function")
# Add to the end
if (lensep) vec <- paste(vec, add, sep="")
# For the empty ones
if (nmiss) {
str <- rep(paste(add, sep, add, sep=""), ncol-1)
if (length(str) > 1) str <- paste(str, collapse="", sep="")
vec[miss] <- str
}
mat <- unlist(strsplit(vec, sep, fixed=TRUE))
if (length(mat) != N*ncol) {
#print(paste("length(mat)=", length(mat), ", N=", N, ", ncol=", ncol, sep=""))
stop("Check that each non-missing element of vec has the correct number of delimiters")
}
mat <- matrix(mat, byrow=TRUE, ncol=ncol)
if (nmiss) mat[miss, ] <- ""
if (add1Flag) mat[, 1] <- gsub(add, "", mat[, 1], fixed=TRUE)
mat[, ncol] <- gsub(add, "", mat[, ncol], fixed=TRUE)
mat <- removeWhiteSpace(mat)
if (numeric) mat <- matrix(as.numeric(mat), byrow=FALSE, ncol=ncol)
mat
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.