distributed: Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

################################################################################
### Test data analysis functions
##
## Created on: 2016-06-17
## Author: Kazuki Yoshida
################################################################################

library(testthat)
library(survey)
library(gnm)
library(dplyr)


###
context("### Test 04.AnalyzeData.R")

set.seed(20160505)


###
### Create multi-site data used for testing
################################################################################

## Sample size
n       <- 10^3
## Randomize treatment
alpha0  <- 0
alphaX  <- rep(0,7)
## Randomize binary outcome
beta0   <- 0
betaX   <- rep(0,7)
betaA   <- 0
betaXA  <- rep(0,7)
## Mean event time of 1
lambda   <- 1
lambda_c <- 1
Tmax     <- 1
survParams <- c(lambda, lambda_c, Tmax)
## Duplicates
K <- 3
## Generate 3-center list differing in sizes only
drn <- GenerateDistResNet(lstN          = as.list(seq_len(K) * n),
                          lstAssignCovariates = rep(list(AssignCovariatesNormBinDefault), K),
                          lstAlphas     = rep(list(c(alpha0, alphaX)), K),
                          lstBetas      = rep(list(c(beta0, betaX, betaA, betaXA)), K),
                          lstSurvParams = rep(list(survParams), K))
## Prepare helper variables
drnReady <- RequestSiteDataPreparation(drn)
drnReady


###
### Tests for meta-analysis on site-specific regression results
################################################################################

test_that("site-specific regression results are meta-analyzed ok", {

    ## Obtain site specific results
    resReg <- RequestSiteRegression(drnReady)
    ## Inverse variance weighting
    ## https://en.wikipedia.org/wiki/Inverse-variance_weighting
    ans <- resReg %>%
        dplyr::group_by(outcome, method) %>%
        dplyr::summarize(coef = sum(coef * (1/var)) / sum(1/var),
                         var  = 1                   / sum(1/var)) %>%
        as.data.frame

    ## Using split
    ans2 <- resReg %>%
        ## Group by outcome and methods
        split(f = resReg[c("outcome","method")], drop = TRUE) %>%
        lapply(function(df) {
            ## Obtain meta-analyzed results
            res <- AnalyzeSiteRegressionHelper(coef = df$coef,
                                               var  = df$var)
            ## Add labels
            data.frame(outcome = df$outcome[1],
                       method  = df$method[1],
                       coef    = res[1],
                       var     = res[2],
                       stringsAsFactors = FALSE)
        }) %>%
        do.call(what = rbind)
    rownames(ans2) <- NULL
    ans2 <- arrange(ans2, outcome, method)

    ## Check equivalence of dplyr and split method
    expect_equal(ans,
                 ans2)

    ## Add data type
    ans <- cbind(data = rep("meta", nrow(ans)),
                 ans,
                 stringsAsFactors = FALSE)
    ##
    ## Site-specific estimated coefficient augmentation
    ## Spread after dropping variance column
    ans_sites <- tidyr::spread(resReg[,-(which(names(resReg) == "var"))],
                               key = site, value = coef)
    names(ans_sites)[-c(1,2)] <- paste0("coef_site", names(ans_sites)[-c(1,2)])
    ## Augment with site-specific results
    ans_aug <- merge(x = ans,
                     y = ans_sites,
                     by = c("outcome","method"),
                     all.x = TRUE,  all.y = FALSE)
    ##
    ## Site-specific estimated variance augmentation
    ## Spread after dropping var column
    ans_sites2 <- tidyr::spread(resReg[,-(which(names(resReg) == "coef"))],
                                key = site, value = var)
    names(ans_sites2)[-c(1,2)] <- paste0("var_site", names(ans_sites2)[-c(1,2)])
    ## Augment with site-specific results
    ans_aug2 <- merge(x = ans_aug,
                      y = ans_sites2,
                      by = c("outcome","method"),
                      all.x = TRUE,  all.y = FALSE)
    ##
    ## Expectations
    expect_equal(AnalyzeSiteRegression(resReg),
                 ans_aug2)

    ## NA handling by exclusion
    ## df with NA coef/var
    df <- structure(list(
        site = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
                 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
                 4L, 4L, 4L, 4L, 4L, 4L),
        outcome = c("binary", "binary", "binary",
                    "binary", "binary", "binary", "survival", "survival", "survival",
                    "survival", "survival", "survival", "binary", "binary", "binary",
                    "binary", "binary", "binary", "survival", "survival", "survival",
                    "survival", "survival", "survival", "binary", "binary", "binary",
                    "binary", "binary", "binary", "survival", "survival", "survival",
                    "survival", "survival", "survival", "binary", "binary", "binary",
                    "binary", "binary", "binary", "survival", "survival", "survival",
                    "survival", "survival", "survival"),
        method = c("ePsMatch", "eDrsBMatch",
                   "ePsStrata", "eDrsBStrata", "ePsSIptw", "ePsMw", "ePsMatch",
                   "eDrsSMatch", "ePsStrata", "eDrsSStrata", "ePsSIptw", "ePsMw",
                   "ePsMatch", "eDrsBMatch", "ePsStrata", "eDrsBStrata", "ePsSIptw",
                   "ePsMw", "ePsMatch", "eDrsSMatch", "ePsStrata", "eDrsSStrata",
                   "ePsSIptw", "ePsMw", "ePsMatch", "eDrsBMatch", "ePsStrata", "eDrsBStrata",
                   "ePsSIptw", "ePsMw", "ePsMatch", "eDrsSMatch", "ePsStrata", "eDrsSStrata",
                   "ePsSIptw", "ePsMw", "ePsMatch", "eDrsBMatch", "ePsStrata", "eDrsBStrata",
                   "ePsSIptw", "ePsMw", "ePsMatch", "eDrsSMatch", "ePsStrata", "eDrsSStrata",
                   "ePsSIptw", "ePsMw"),
        coef = c(-0.0206670211235778, -0.0359256282253753,
                 NA, NA, -0.0324363322777292, -0.0322850842183134, 0.0220560126279101,
                 0.0140049071631618, 0.0167938810788718, 0.0160771752521929, 0.016114372965551,
                 0.0159248054508719, -0.0740122570647937, -0.063164687950672,
                 -0.068026816460471, -0.0681755066950125, -0.0672431267731625,
                 -0.067948177256375, -0.0496525275658786, -0.029953971037688,
                 -0.0330645364343326, -0.0322730605798652, -0.0320033946690058,
                 -0.0315832553432548, 0.0401524896479436, 0.0193015251437017,
                 0.0187398115189868, 0.0183815771183701, 0.0202039288419552, 0.0174674110215562,
                 0.0252265993210464, 0.0166996695087391, 0.0165600159403483, 0.0184443875492624,
                 0.0195358022819071, 0.0217928042991456, -0.0953583925039326,
                 -0.176367486516478, -0.094863780177608, -0.107194261821463, -0.0985059412500863,
                 -0.0933366797533911, -0.0424521823636991, 0.0489053579419892,
                 0.0143387529288193, 0.00386208995080197, 0.00266573694146372,
                 0.000229282078056276),
        var = c(0.000510305479697766, 0.000509610557964662,
                NA, NA, 0.000487305303929011, 0.000487358220388539, 0.000295417046743524,
                0.000295626364462884, 0.00028269790404288, 0.000282732841717295,
                0.000282573934940181, 0.00028259346361012, 0.00274181200078064,
                0.00274673400490608, 0.00247879963494072, 0.00247929757943473,
                0.00247976002267762, 0.00248242918113736, 0.00158551271054998,
                0.00157515971152444, 0.0014214415587498, 0.0014232274146712,
                0.00141395224389523, 0.00141874739338038, 0.00297447268100481,
                0.00296951460328587, 0.00255383536180751, 0.00255569479401723,
                0.00255374619543534, 0.00255587972823593, 0.00166406892071795,
                0.00167095012028587, 0.00144101004948219, 0.00144156168479951,
                0.00143753253512358, 0.00143917897723988, 0.0127198156204243,
                0.0126155486600761, 0.0105985149548199, 0.0105368635094055, 0.0105968213384742,
                0.0106326171090129, 0.00747048341237477, 0.007173658929895, 0.00618275814683362,
                0.00613120550805594, 0.00613153196387081, 0.00614683006386217
                )), .Names = c("site", "outcome", "method", "coef", "var"),
        row.names = c(NA, -48L), class = "data.frame")

    ## df without NA's
    df_complete_rows <- df[!is.na(df$coef) & !is.na(df$var), ]

    expect_equal(AnalyzeSiteRegression(df),
                 AnalyzeSiteRegression(df_complete_rows))
})


test_that("meta-analysis handle NAs gracefully and all methods still return results (which may be NA)", {

    ## Test helper function
    ## Complete
    coefs1 <- c(0.18473589, 0.09948962, 0.06935778)
    vars1  <- c(0.017619107, 0.008654851, 0.005549735)
    expect_equal(AnalyzeSiteRegressionHelper(coefs1, vars1),
                 c(sum(coefs1 * (1/vars1)) / sum(1/vars1),
                   1                       / sum(1/vars1)))
    ## Broken
    coefs2 <- c(0.18473589, NA, 0.06935778)
    vars2  <- c(0.017619107, 0.008654851, NA)
    expect_equal(AnalyzeSiteRegressionHelper(coefs2, vars2),
                 c(coefs2[1], vars2[1]))
    ## Empty
    coefs3 <- c(NA, NA, NA)
    vars3  <- c(NA, NA, NA)
    expect_equal(AnalyzeSiteRegressionHelper(coefs3, vars3),
                 c(NA, NA))


    ## Obtain normal site-specific regression results
    resReg <- RequestSiteRegression(drnReady)

    ## Introduce NAs to binary outcomes for examination
    ## Drop ePsMatch result from site 1 (keep sites 2 and 3)
    resReg[resReg$outcome == "binary" & resReg$site == 1 & resReg$method == "ePsMatch",
           c("coef","var")] <- NA
    ## Drop ePsStrataB coef from site 1 and variance from site 2
    resReg[resReg$outcome == "binary" & resReg$site == 1 & resReg$method == "ePsStrataB",
           "coef"] <- NA
    resReg[resReg$outcome == "binary" & resReg$site == 2 & resReg$method == "ePsStrataB",
           "var"] <- NA
    ## Drop eDrsBMatch results from all sites
    resReg[resReg$outcome == "binary" & resReg$method == "eDrsBMatch",
           c("coef","var")] <- NA

    ## Perform meta-analysis
    res <- AnalyzeSiteRegression(resReg)

    ## Expectations
    ## Site 1 results are NA for ePsMatch
    expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "ePsMatch",
                                c("coef_site1","var_site1")]),
                 as.numeric(c(NA,NA)))
    ## Sites 2 and 3 are ok
    expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "ePsMatch",
                                c("coef_site2","var_site2")]),
                 as.numeric(resReg[resReg$outcome == "binary" & resReg$site == 2 & resReg$method == "ePsMatch",
                                   c("coef","var")]))
    expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "ePsMatch",
                                c("coef_site3","var_site3")]),
                 as.numeric(resReg[resReg$outcome == "binary" & resReg$site == 3 & resReg$method == "ePsMatch",
                                   c("coef","var")]))
    ## Summary results match meta-analysis of sites 2 and 3 excluding invalid site 1
    ans <- resReg %>%
        dplyr::filter(resReg$outcome == "binary",
                      resReg$site %in% c(2,3),
                      resReg$method == "ePsMatch") %>%
        dplyr::group_by(outcome, method) %>%
        dplyr::summarize(coef = sum(coef * (1/var)) / sum(1/var),
                         var  = 1                   / sum(1/var)) %>%
        as.data.frame
        expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "ePsMatch",
                                    c("coef","var")]),
                     as.numeric(ans[,c("coef","var")]))


    ## Summary results match site 3 (only valid site) for ePsStrataB
    expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "ePsStrataB",
                                c("coef","var")]),
                 as.numeric(resReg[resReg$outcome == "binary" & resReg$site == 3 & resReg$method == "ePsStrataB",
                                   c("coef","var")]))


    ## Even without any valid site-specific regression results, results must be returned with NA's
    expect_equal(as.numeric(res[res$outcome == "binary" & res$method == "eDrsBMatch",
                                c("coef","var")]),
                 as.numeric(c(NA,NA)))

})


###
### Tests for regression on site-specific summary data
################################################################################

test_that("summary-to-IPD expander for binary works correctly", {

    ## Data frame of summary data aggregated across sites
    df <- data.frame(site = c(1,1),
                     A = c(0,1),
                     events = c(2,3),
                     denom = c(5, 10))

    ans <- data.frame(site = rep(1, 15),
                      A = rep(c(0,1), c(5,10)),
                      Y = rep(c(1,0,1,0), c(2,3,3,7)))

    ## Expectations
    expect_equal(ExpandToIpd(df),
                 ans)

    ## Using simulated data
    resSummary <- RequestSiteSummary(drnReady)

    ## IPD for binary outco
    binIpd <- ExpandToIpd(subset(resSummary, outcome == "binary"))

    ## Total number check
    ## Matched
    expect_equal(length(subset(binIpd, method == "ePsMatch")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "ePsMatch")$denom))
    expect_equal(sum(subset(binIpd, method == "ePsMatch")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "ePsMatch")$events))
    expect_equal(length(subset(binIpd, method == "eDrsBMatch")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "eDrsBMatch")$denom))
    expect_equal(sum(subset(binIpd, method == "eDrsBMatch")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "eDrsBMatch")$events))
    ## Stratified
    expect_equal(length(subset(binIpd, method == "ePsStrata")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "ePsStrata")$denom))
    expect_equal(sum(subset(binIpd, method == "ePsStrata")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "ePsStrata")$events))
    expect_equal(length(subset(binIpd, method == "eDrsBStrata")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "eDrsBStrata")$denom))
    expect_equal(sum(subset(binIpd, method == "eDrsBStrata")$Y),
                 sum(subset(resSummary, outcome == "binary" & method == "eDrsBStrata")$events))


    ## Test for degenerate data (should expand to degenerate data)
    df_corrupt <- data.frame(site = c(3L, 3L, 3L, 3L, 3L),
                             outcome = c("binary", "binary", "binary", "binary", "binary"),
                             method = c("unadj", "unadj", "ePsMatch", "ePsMatch", "eDrsBMatch"),
                             strata = as.numeric(c(NA, NA, NA, NA, NA)),
                             A = c(0L, 1L, 0L, 1L, NA),
                             events = c(0, 2, 0, 0, NA), denom = c(9667, 10333, 5829, 5829, NA),
                             stringsAsFactors = FALSE)
    df_corrupt[5,]
    expect_equal(ExpandToIpd(df_corrupt[5,]),
                 data.frame(site = 3,
                            outcome = "binary",
                            method = "eDrsBMatch",
                            strata = as.numeric(NA),
                            A = as.numeric(NA),
                            Y = as.numeric(NA),
                            stringsAsFactors = FALSE))

})

test_that("summary data regression is performed correctly", {

    resSummary <- RequestSiteSummary(drnReady)

    ## IPD for binary outco
    binIpd <- ExpandToIpd(subset(resSummary, outcome == "binary"))

    ## Fit models
    ## Unadjusted
    bin_unadjE       <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "unadj"),
                               method = c("exact", "approximate", "efron", "breslow")[3])
    ## Matched
    bin_ePsMatchE    <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "ePsMatch"),
                               method = c("exact", "approximate", "efron", "breslow")[3])
    bin_eDrsBMatchE  <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "eDrsBMatch"),
                               method = c("exact", "approximate", "efron", "breslow")[3])
    ## Stratified
    bin_ePsStrataE   <- clogit(formula = Y ~ A + strata(site, strata),
                               data = subset(binIpd, method == "ePsStrata"),
                               method = c("exact", "approximate", "efron", "breslow")[3])
    bin_eDrsBStrataE <- clogit(formula = Y ~ A + strata(site, strata),
                               data = subset(binIpd, method == "eDrsBStrata"),
                               method = c("exact", "approximate", "efron", "breslow")[3])
    ## Breslow
    ## Unadjusted
    bin_unadjB       <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "unadj"),
                               method = c("exact", "approximate", "efron", "breslow")[4])
    ## Matched
    bin_ePsMatchB    <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "ePsMatch"),
                               method = c("exact", "approximate", "efron", "breslow")[4])
    bin_eDrsBMatchB  <- clogit(formula = Y ~ A + strata(site),
                               data = subset(binIpd, method == "eDrsBMatch"),
                               method = c("exact", "approximate", "efron", "breslow")[4])
    ## Stratified
    bin_ePsStrataB   <- clogit(formula = Y ~ A + strata(site, strata),
                               data = subset(binIpd, method == "ePsStrata"),
                               method = c("exact", "approximate", "efron", "breslow")[4])
    bin_eDrsBStrataB <- clogit(formula = Y ~ A + strata(site, strata),
                               data = subset(binIpd, method == "eDrsBStrata"),
                               method = c("exact", "approximate", "efron", "breslow")[4])

    ## Combined
    ans_bin <- rbind(CoefVar(bin_unadjE, "A"),
                     CoefVar(bin_ePsMatchE, "A"),
                     CoefVar(bin_eDrsBMatchE, "A"),
                     CoefVar(bin_ePsStrataE, "A"),
                     CoefVar(bin_eDrsBStrataE, "A"),
                     ## Breslow
                     CoefVar(bin_unadjB, "A"),
                     CoefVar(bin_ePsMatchB, "A"),
                     CoefVar(bin_eDrsBMatchB, "A"),
                     CoefVar(bin_ePsStrataB, "A"),
                     CoefVar(bin_eDrsBStrataB, "A"))

    ## Rate data (survival data)
    resSurv <- subset(resSummary, outcome == "survival")

    ## Conditional Poisson with gnm (generalized non-linear model)
    ## https://cran.r-project.org/web/packages/gnm/index.html
    ## https://cran.r-project.org/web/packages/gnm/vignettes/gnmOverview.pdf
    ## http://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-14-122
    ## Matched without intercept estimation
    surv_unadj       <- gnm(formula = events ~ A,
                            family = poisson(link = "log"),
                            data = subset(resSurv, method == "unadj"),
                            eliminate = factor(site),
                            offset = log(denom))
    surv_ePsMatch    <- gnm(formula = events ~ A,
                            family = poisson(link = "log"),
                            data = subset(resSurv, method == "ePsMatch"),
                            eliminate = factor(site),
                            offset = log(denom))
    surv_eDrsSMatch  <- gnm(formula = events ~ A,
                            family = poisson(link = "log"),
                            data = subset(resSurv, method == "eDrsSMatch"),
                            eliminate = factor(site),
                            offset = log(denom))
    ## Stratified without intercept estimation
    surv_ePsStrata   <- gnm(formula = events ~ A,
                            family = poisson(link = "log"),
                            data = subset(resSurv, method == "ePsStrata"),
                            eliminate = interaction(site, strata),
                            offset = log(denom))
    surv_eDrsSStrata <- gnm(formula = events ~ A,
                            family = poisson(link = "log"),
                            data = subset(resSurv, method == "eDrsSStrata"),
                            eliminate = interaction(site, strata),
                            offset = log(denom))
    ## Combined
    ans_surv <- rbind(CoefVar(surv_unadj, "A"),
                      CoefVar(surv_ePsMatch, "A"),
                      CoefVar(surv_eDrsSMatch, "A"),
                      CoefVar(surv_ePsStrata, "A"),
                      CoefVar(surv_eDrsSStrata, "A"))

    ## Unadjusted with intercept estimation
    surv2_unadj       <- glm(formula = events ~ A + factor(site) + offset(log(denom)),
                             family  = poisson(link = "log"),
                             data    = subset(resSurv, method == "unadj"))
    ## Matched with intercept estimation
    surv2_ePsMatch    <- glm(formula = events ~ A + factor(site) + offset(log(denom)),
                             family  = poisson(link = "log"),
                             data    = subset(resSurv, method == "ePsMatch"))
    surv2_eDrsSMatch  <- glm(formula = events ~ A + factor(site) + offset(log(denom)),
                             family  = poisson(link = "log"),
                             data    = subset(resSurv, method == "eDrsSMatch"))
    ## Statified with intercept estimation
    surv2_ePsStrata   <- glm(formula = events ~ A + interaction(site, strata) + offset(log(denom)),
                             family  = poisson(link = "log"),
                             data    = subset(resSurv, method == "ePsStrata"))
    surv2_eDrsSStrata <- glm(formula = events ~ A + interaction(site, strata) + offset(log(denom)),
                             family  = poisson(link = "log"),
                             data    = subset(resSurv, method == "eDrsSStrata"))

    ## Check equality of point estimates
    expect_equal(coef(surv_unadj)["A"],
                 coef(surv2_unadj)["A"])
    expect_equal(coef(surv_ePsMatch)["A"],
                 coef(surv2_ePsMatch)["A"])
    expect_equal(coef(surv_eDrsSMatch)["A"],
                 coef(surv2_eDrsSMatch)["A"])
    expect_equal(coef(surv_ePsStrata)["A"],
                 coef(surv2_ePsStrata)["A"])
    expect_equal(coef(surv_eDrsSStrata)["A"],
                 coef(surv2_eDrsSStrata)["A"])
    ## Check equality of models
    expect_equal(deviance(surv_unadj)["A"],
                 deviance(surv2_unadj)["A"])
    expect_equal(deviance(surv_ePsMatch),
                 deviance(surv2_ePsMatch))
    expect_equal(deviance(surv_eDrsSMatch),
                 deviance(surv2_eDrsSMatch))
    expect_equal(deviance(surv_ePsStrata),
                 deviance(surv2_ePsStrata))
    expect_equal(deviance(surv_eDrsSStrata),
                 deviance(surv2_eDrsSStrata))

    ## Target object
    ans <- rbind(ans_bin,
                 ans_surv)
    ans0 <- data.frame(data = "summary",
                       outcome = c(rep("binary", nrow(ans_bin)),
                                   rep("survival", nrow(ans_surv))),
                       method = c(paste0(c("unadj",
                                           "ePsMatch", "eDrsBMatch",
                                           "ePsStrata", "eDrsBStrata"),
                                         "E"),
                                  ## Breslow
                                  paste0(c("unadj",
                                           "ePsMatch", "eDrsBMatch",
                                           "ePsStrata", "eDrsBStrata"),
                                         "B"),
                                  ## Survival
                                  "unadj",
                                  "ePsMatch", "eDrsSMatch",
                                  "ePsStrata", "eDrsSStrata"),
                       stringsAsFactors = FALSE)
    ans <- cbind(ans0, ans)

    ## Expectations
    expect_equal(AnalyzeSiteSummary(resSummary),
                 ans)

})


###
### Tests for case-centered conditional logistic regression
################################################################################

## Model specification
## SAS: https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_logistic_sect010.htm
##  exposed_events / total_events =  / offset = logodds_exposure
## R: https://stat.ethz.ch/pipermail/r-help/2008-August/170545.html
##  cbind(exposed_events, unexposed_events) ~ 1 + offset(logodds_exposure)

resRisk <- RequestSiteRisksets(drnReady)


test_that("weighted-risk-set-data-to-IPD expanders function correctly", {

    ## Extract simplest one
    df0 <- subset(resRisk, outcome == "binary" & method == "ePsSIptw" & site == 1)
    ## Expand
    out0 <- RisksetsToIpd(df0)
    ## Appropriate columns exist
    expect_true(all(c("site","outcome","method","strata",
                      "start_time", "stop_time", "event", "A", "W") %in% names(out0)))
    ## Weights must be positive
    expect_true(min(out0$W) > 0)
    ## Total size is the sum of risk set sizes
    expect_equal(nrow(out0), sum(df0$riskset_A0, df0$riskset_A1))
    expect_equal(nrow(subset(out0, A == 0)),
                 df0$riskset_A0)
    expect_equal(nrow(subset(out0, A == 1)),
                 df0$riskset_A1)
    ## Event count match up
    expect_equal(sum(subset(out0, A == 0)$event),
                 df0$events_A0)
    expect_equal(sum(subset(out0, A == 1)$event),
                 df0$events_A1)
    ## Mean of weights match up
    expect_equal(mean(subset(out0, A == 0 & event == 0)$W),
                 with(df0, (w_riskset_A0 - w_events_A0) / (riskset_A0 - events_A0)))
    expect_equal(mean(subset(out0, A == 1 & event == 0)$W),
                 with(df0, (w_riskset_A1 - w_events_A1) / (riskset_A1 - events_A1)))
    expect_equal(mean(subset(out0, A == 0 & event == 1)$W),
                 with(df0, w_events_A0 / events_A0))
    expect_equal(mean(subset(out0, A == 1 & event == 1)$W),
                 with(df0, w_events_A1 / events_A1))
    ## Variance of weights match up (may not be exact)
    expect_equal(safe_var(subset(out0, A == 0 & event == 0)$W),
                 with(df0, varw_nonevents_A0))
    expect_equal(safe_var(subset(out0, A == 1 & event == 0)$W),
                 with(df0, varw_nonevents_A1))
    expect_equal(safe_var(subset(out0, A == 0 & event == 1)$W),
                 with(df0, varw_events_A0))
    expect_equal(safe_var(subset(out0, A == 1 & event == 1)$W),
                 with(df0, varw_events_A1))
    ## Compressed format comes out ok
    out0_compress <- out0 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n()) %>%
        as.data.frame
    rownames(out0_compress) <- NULL
    expect_equal(RisksetsToIpd(df0, compress = TRUE),
                 out0_compress)
    ## Skewed distribution version
    expect_equal(RisksetsToIpdSkewed(df0, compress = TRUE),
                 out0_compress)


###  SkewedTwoPointSample: Check for desired skewed two-point (three-point) sample distributions
    ## Call order: RisksetsToIpdSkewed -> RisksetsToIpdSkewedHelper -> SkewedTwoPointSample

    expect_equal(SkewedTwoPointSample(n = df0$events_A0,
                                      m = df0$w_events_A0 / df0$events_A0,
                                      v = df0$varw_events_A0),
                 data.frame(subset(out0_compress, A == 0 & event == 1)[c("W","count")],
                            row.names = NULL))
    expect_equal(SkewedTwoPointSample(n = df0$events_A1,
                                      m = df0$w_events_A1 / df0$events_A1,
                                      v = df0$varw_events_A1),
                 data.frame(subset(out0_compress, A == 1 & event == 1)[c("W","count")],
                            row.names = NULL))
    expect_equal(SkewedTwoPointSample(n = with(df0, (riskset_A0 - events_A0)),
                                      m = with(df0, (w_riskset_A0 - w_events_A0) /
                                                    (riskset_A0 - events_A0)),
                                      v = df0$varw_nonevents_A0),
                 data.frame(subset(out0_compress, A == 0 & event == 0)[c("W","count")],
                            row.names = NULL))
    expect_equal(SkewedTwoPointSample(n = with(df0, (riskset_A1 - events_A1)),
                                      m = with(df0, (w_riskset_A1 - w_events_A1) /
                                                    (riskset_A1 - events_A1)),
                                      v = df0$varw_nonevents_A1),
                 data.frame(subset(out0_compress, A == 1 & event == 0)[c("W","count")],
                            row.names = NULL))


    ## Matching weights (smaller weights; potential for negative regenerated weights)
    df1 <- subset(resRisk, outcome == "binary" & method == "ePsMw" & site == 1)
    ## Expand
    out1 <- RisksetsToIpd(df1)
    ## Appropriate columns exist
    expect_true(all(c("site","outcome","method","strata",
                      "start_time", "stop_time", "event", "A", "W") %in% names(out1)))
    ## Weights must be positive
    expect_true(min(out1$W) > 0)
    ## Total size is the sum of risk set sizes
    expect_equal(nrow(out1), sum(df1$riskset_A0, df1$riskset_A1))
    expect_equal(nrow(subset(out1, A == 0)),
                 df1$riskset_A0)
    expect_equal(nrow(subset(out1, A == 1)),
                 df1$riskset_A1)
    ## Event count match up
    expect_equal(sum(subset(out1, A == 0)$event),
                 df1$events_A0)
    expect_equal(sum(subset(out1, A == 1)$event),
                 df1$events_A1)
    ## Mean of weights match up
    expect_equal(mean(subset(out1, A == 0 & event == 0)$W),
                 with(df1, (w_riskset_A0 - w_events_A0) / (riskset_A0 - events_A0)))
    expect_equal(mean(subset(out1, A == 1 & event == 0)$W),
                 with(df1, (w_riskset_A1 - w_events_A1) / (riskset_A1 - events_A1)))
    expect_equal(mean(subset(out1, A == 0 & event == 1)$W),
                 with(df1, w_events_A0 / events_A0))
    expect_equal(mean(subset(out1, A == 1 & event == 1)$W),
                 with(df1, w_events_A1 / events_A1))
    ## Variance of weights match up (may not be exact)
    expect_equal(safe_var(subset(out1, A == 0 & event == 0)$W),
                 with(df1, varw_nonevents_A0))
    expect_equal(safe_var(subset(out1, A == 1 & event == 0)$W),
                 with(df1, varw_nonevents_A1))
    expect_equal(safe_var(subset(out1, A == 0 & event == 1)$W),
                 with(df1, varw_events_A0))
    expect_equal(safe_var(subset(out1, A == 1 & event == 1)$W),
                 with(df1, varw_events_A1))
    ## Compressed format comes out ok
    out1_compress <- out1 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n()) %>%
        as.data.frame
    expect_equal(RisksetsToIpd(df1, compress = TRUE),
                 out1_compress)
    ## Skewed version
    expect_equal(RisksetsToIpdSkewed(df1, compress = TRUE),
                 out1_compress)


    ## Survival data with matching weights
    ## Risk set data to be expanded
    df2 <- subset(resRisk, outcome == "survival" & method == "ePsMw" & site == 1)
    ## Generate IPD
    out2 <- RisksetsToIpd(df2)
    ## Appropriate columns exist
    expect_true(all(c("site","outcome","method","strata",
                      "start_time", "stop_time", "event", "A", "W") %in% names(out2)))
    ## Weights must be positive
    expect_true(min(out2$W) > 0)
    ## Total size is the sum of risk set sizes
    expect_equal(nrow(out2), sum(df2$riskset_A0, df2$riskset_A1))
    expect_equal(nrow(subset(out2, A == 0)),
                 sum(df2$riskset_A0))
    expect_equal(nrow(subset(out2, A == 1)),
                 sum(df2$riskset_A1))
    ## Event count match up
    expect_equal(sum(subset(out2, A == 0)$event),
                 sum(df2$events_A0))
    expect_equal(sum(subset(out2, A == 1)$event),
                 sum(df2$events_A1))
    ## Compressed format comes out ok
    out2_compress <- out2 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n()) %>%
        as.data.frame
    expect_equal(RisksetsToIpd(df2, compress = TRUE),
                 out2_compress)
    ## Skewed version
    expect_equal(RisksetsToIpdSkewed(df2, compress = FALSE),
                 ## The orders are different. Skewed version is sorted.
                 arrange(out2, start_time, A, event, W))
    ##
    ## Mean of weights match up (first moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out2, A == 0 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df2, (w_riskset_A0 - w_events_A0) / (riskset_A0 - events_A0)))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out2, A == 1 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df2, (w_riskset_A1 - w_events_A1) / (riskset_A1 - events_A1)))
    ##  Events among unexposed (only meaningful among cells where events exist)
    expect_equal(as.numeric(with(subset(out2, A == 0 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df2, (w_events_A0 / events_A0)[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out2, A == 1 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df2, (w_events_A1 / events_A1)[events_A1 > 0]))
    ##
    ## Variance of weights match up (second moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out2, A == 0 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df2, varw_nonevents_A0))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out2, A == 1 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df2, varw_nonevents_A1))
    ##  Events among unexposed
    expect_equal(as.numeric(with(subset(out2, A == 0 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df2, varw_events_A0[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out2, A == 1 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df2, varw_events_A1[events_A1 > 0]))


###  RisksetsToIpdSkewedHelper: Helper for RisksetsToIpdSkewed
    ## No row
    expect_equal(RisksetsToIpdSkewedHelper(n = 0, m = 1, v = 0, A = 0, event = 1),
                 data.frame(A = as.numeric(), event = as.numeric(),
                            W = as.numeric(), count = as.numeric()))
    ## One row
    expect_equal(RisksetsToIpdSkewedHelper(n = 1, m = 1, v = 0, A = 0, event = 1),
                 data.frame(A = 0, event = 1,
                            W = 1, count = 1))
    expect_equal(RisksetsToIpdSkewedHelper(n = 100, m = 1, v = 0, A = 1, event = 0),
                 data.frame(A = 1, event = 0,
                            W = 1, count = 100))
    ## More rows
    ansMoreRows <- cbind(data.frame(A = 1, event = 0)[rep(1,3),],
                         SkewedTwoPointSample(n = 3, m = 1, v = 0.3^2))
    rownames(ansMoreRows) <- NULL
    expect_equal(RisksetsToIpdSkewedHelper(n = 3, m = 1, v = 0.3^2, A = 1, event = 0),
                 ansMoreRows)


###  Use of RisksetsToIpdSkewed
    ## Survival data with matching weights
    ## Risk set data to be expanded
    df3 <- subset(resRisk, outcome == "survival" & method == "ePsMw" & site == 1)
    ## Generate IPD
    out3 <- RisksetsToIpdSkewed(df3, compress = FALSE)
    ## Appropriate columns exist
    expect_true(all(c("site","outcome","method","strata",
                      "start_time", "stop_time", "event", "A", "W") %in% names(out3)))
    ## Weights must be positive
    expect_true(min(out3$W) > 0)
    ## Total size is the sum of risk set sizes
    expect_equal(nrow(out3), sum(df3$riskset_A0, df3$riskset_A1))
    expect_equal(nrow(subset(out3, A == 0)),
                 sum(df3$riskset_A0))
    expect_equal(nrow(subset(out3, A == 1)),
                 sum(df3$riskset_A1))
    ## Event count match up
    expect_equal(sum(subset(out3, A == 0)$event),
                 sum(df3$events_A0))
    expect_equal(sum(subset(out3, A == 1)$event),
                 sum(df3$events_A1))
    ## Recompressing result in the same thing in this simple case not requiring skew
    out3_compress <- out3 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n()) %>%
        as.data.frame
    expect_equal(RisksetsToIpdSkewed(df3, compress = FALSE),
                 arrange(out3, start_time, A, event, W))
    ##
    ## Mean of weights match up (first moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_riskset_A0 - w_events_A0) / (riskset_A0 - events_A0)))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_riskset_A1 - w_events_A1) / (riskset_A1 - events_A1)))
    ##  Events among unexposed (only meaningful among cells where events exist)
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_events_A0 / events_A0)[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_events_A1 / events_A1)[events_A1 > 0]))
    ##
    ## Variance of weights match up (second moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_nonevents_A0))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_nonevents_A1))
    ##  Events among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_events_A0[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_events_A1[events_A1 > 0]))


###  Use of RisksetsToIpdExtreme (Bruce's 1 observation above mean version))
    ## Survival data with matching weights
    ## Risk set data to be expanded
    df3 <- subset(resRisk, outcome == "survival" & method == "ePsMw" & site == 1)
    ## Generate IPD
    out3 <- RisksetsToIpdSkewed(df3, compress = FALSE, helper_fun = RisksetsToIpdExtremeHelper)
    ## Appropriate columns exist
    expect_true(all(c("site","outcome","method","strata",
                      "start_time", "stop_time", "event", "A", "W") %in% names(out3)))
    ## Weights must be positive
    expect_true(min(out3$W) > 0)
    ## Total size is the sum of risk set sizes
    expect_equal(nrow(out3), sum(df3$riskset_A0, df3$riskset_A1))
    expect_equal(nrow(subset(out3, A == 0)),
                 sum(df3$riskset_A0))
    expect_equal(nrow(subset(out3, A == 1)),
                 sum(df3$riskset_A1))
    ## Event count match up
    expect_equal(sum(subset(out3, A == 0)$event),
                 sum(df3$events_A0))
    expect_equal(sum(subset(out3, A == 1)$event),
                 sum(df3$events_A1))
    ## Recompressing result in the same thing in this simple case not requiring skew
    out3_compress <- out3 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n()) %>%
        as.data.frame
    expect_equal(RisksetsToIpdSkewed(df3, compress = FALSE, helper_fun = RisksetsToIpdExtremeHelper),
                 arrange(out3, start_time, A, event, W))
    ##
    ## Mean of weights match up (first moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_riskset_A0 - w_events_A0) / (riskset_A0 - events_A0)))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 0),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_riskset_A1 - w_events_A1) / (riskset_A1 - events_A1)))
    ##  Events among unexposed (only meaningful among cells where events exist)
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_events_A0 / events_A0)[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 1),
                                 tapply(W, start_time, mean))),
                 with(df3, (w_events_A1 / events_A1)[events_A1 > 0]))
    ##
    ## Variance of weights match up (second moment preservation)
    ##  Nonevents among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_nonevents_A0))
    ##  Nonevents among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 0),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_nonevents_A1))
    ##  Events among unexposed
    expect_equal(as.numeric(with(subset(out3, A == 0 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_events_A0[events_A0 > 0]))
    ##  Events among exposed
    expect_equal(as.numeric(with(subset(out3, A == 1 & event == 1),
                                 tapply(W, start_time, safe_var))),
                 with(df3, varw_events_A1[events_A1 > 0]))

})


test_that("compressed weighted IPD work (THEY DO NOT IN R)", {

    ## Extract simplest one
    df0 <- subset(resRisk, outcome == "binary" & method == "ePsSIptw" & site == 1)
    ## Expand
    out0 <- RisksetsToIpd(df0)
    ## Compressed data set with additional frequency weighting
    out0_comp <- out0 %>%
        group_by(site, outcome, method, strata, start_time, stop_time, A, event, W) %>%
        summarize(count = n())

    ## Using expanded data (standard by svycoxph)
    svycoxph0 <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ W, data = out0))
    ## Using expanded data (coxph and robust)
    coxph0 <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                    weights = W,
                    data = out0,
                    robust = TRUE)
    ## Point estimates match up
    expect_equal(CoefVar(svycoxph0, "A")[1],
                 CoefVar(coxph0, "A")[1])
    ## SE fairly close
    expect_equal(CoefVar(svycoxph0, "A")[2],
                 ## Gives robust SE to CoefVar()
                 CoefVar(coxph0, "A")[2],
                 tolerance = 10^-5)

    ## Using compressed data with product weights
    svycoxph1 <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ I(W * count), data = out0_comp))
    ## Even the point estimates differ for some reason
    expect_equal(CoefVar(svycoxph1, "A")[1],
                 CoefVar(svycoxph0, "A")[1],
                 tolerance = 10^-2)
    ## Variance is overestimated as the "sample size" is small
    expect_equal(CoefVar(svycoxph1, "A")[2],
                 CoefVar(svycoxph0, "A")[2],
                 tolerance = 1)

    ## Doing the same using coxph
    coxph1 <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                    weights = I(W * count),
                    data = out0_comp,
                    robust = TRUE)
    ## Point estimates match up
    expect_equal(CoefVar(svycoxph1, "A")[1],
                 CoefVar(coxph1, "A")[1])
    ## SE are very different
    expect_equal(CoefVar(svycoxph1, "A")[2],
                 ## Gives robust SE to CoefVar()
                 CoefVar(coxph1, "A")[2],
                 tolerance = 1)

    ## Trying to use additional weights does not work
    svycoxph2 <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                          ## Use count as frequency weights (?) -> Doesn't work
                          weights = count,
                          ## Use W as sampling weightis
                          design = svydesign(ids = ~ 1, weights = ~ W, data = out0_comp))
    ## Even the point estimates differ for some reason
    expect_equal(CoefVar(svycoxph2, "A")[1],
                 CoefVar(svycoxph0, "A")[1],
                 tolerance = 10^-2)
    ## Variance is overestimated as the "sample size" is small
    expect_equal(CoefVar(svycoxph2, "A")[2],
                 CoefVar(svycoxph0, "A")[2],
                 tolerance = 1)
    ## weights = count seems to be multiplied with W to result in the same
    expect_equal(CoefVar(svycoxph2, "A")[1],
                 CoefVar(svycoxph1, "A")[1],
                 tolerance = 10^-15)
    ## same for variance
    expect_equal(CoefVar(svycoxph2, "A")[2],
                 CoefVar(svycoxph1, "A")[2],
                 tolerance = 10^-15)

})


test_that("SAS-powered functions work", {

    ## Generate a SAS script string
    out_string <- GenerateSasProcPhreg(datafile = "./data/input.csv",
                                       method   = "BRESLOW",
                                       strata   = "site age_cat",
                                       outfile  = "./data/output.csv")
    cat(out_string)
    expect_true(grepl("proc import datafile = './data/input.csv'",
                      out_string))
    expect_true(grepl("/ ties = BRESLOW;",
                      out_string))
    expect_true(grepl("strata site age_cat;",
                      out_string))
    expect_true(grepl("outfile = './data/output.csv'",
                      out_string))


    ## Check methods for sasfit object (manually constructed)
    model1 <- list(coef = c(A = 1.1),
                   vcov = matrix(c(2.2), nrow = 1, ncol = 1,
                                 dimnames = list("A", "A")))
    class(model1) <- c("sasfit", class(model1))
    expect_equal(as.numeric(CoefVar(model1, "A")), c(1.1, 2.2))


    ## Extract simplest one
    df0 <- subset(resRisk, outcome == "survival" & method == "ePsSIptw" & site == 1)
    ## Expand
    out0 <- RisksetsToIpd(df0, compress = TRUE)

    ## Expectations
    if (length(suppressWarnings(system("type sas", intern = TRUE, ignore.stderr = TRUE))) == 0) {
        ## If there is no sas command
        expect_error(CaseCenteredLogisticWeightedSas(df = df0, method = "breslow", strata = "site"),
                     regexp = "sas command not available")

    } else {

        ## If sas command is available, it should give a sasfit object
        out <- CaseCenteredLogisticWeightedSas(df = df0, method = "breslow", strata = "site")
        expect_equal(class(out)[1], "sasfit")

    }
})


test_that("risk sets are constructed correctly (retests for binary)", {

    ## Check binary data construction
    ## Construct risksets for binary data
    site1RisksetsBin <- SiteRisksetsBin(drnReady[[1]])
    ## ePsMatch
    site1ePsMatch <- drnReady[[1]][drnReady[[1]]$ePsMatch == 1,]
    expect_equal(as.numeric(site1RisksetsBin[site1RisksetsBin$method == "ePsMatch",
                                             c("events_A0", "events_A1",
                                               "riskset_A0", "riskset_A1")]),
                 as.numeric(c(tapply(site1ePsMatch$Y, site1ePsMatch$A, sum),
                              tapply(site1ePsMatch$Y, site1ePsMatch$A, length))))
    expect_equal(as.numeric(SiteRisksetsByStrata(
        event  = site1ePsMatch[site1ePsMatch$ePsMatch == 1, "Y"],
        A      = site1ePsMatch[site1ePsMatch$ePsMatch == 1, "A"])[,c("events_A0", "events_A1",
                                                                     "riskset_A0", "riskset_A1")]),
        as.numeric(c(tapply(site1ePsMatch$Y, site1ePsMatch$A, sum),
                     tapply(site1ePsMatch$Y, site1ePsMatch$A, length))))
    expect_equal(as.numeric(SiteRisksetsHelper(
        event  = site1ePsMatch[site1ePsMatch$ePsMatch == 1, "Y"],
        A      = site1ePsMatch[site1ePsMatch$ePsMatch == 1, "A"])[,c("events_A0", "events_A1",
                                                                     "riskset_A0", "riskset_A1")]),
        as.numeric(c(tapply(site1ePsMatch$Y, site1ePsMatch$A, sum),
                     tapply(site1ePsMatch$Y, site1ePsMatch$A, length))))
    ## eDrsMatch
    site1eDrsBMatch <- drnReady[[1]][drnReady[[1]]$eDrsBMatch == 1,]
    expect_equal(as.numeric(site1RisksetsBin[site1RisksetsBin$method == "eDrsBMatch",
                                             c("events_A0", "events_A1",
                                               "riskset_A0", "riskset_A1")]),
                 as.numeric(c(tapply(site1eDrsBMatch$Y, site1eDrsBMatch$A, sum),
                              tapply(site1eDrsBMatch$Y, site1eDrsBMatch$A, length))))
    expect_equal(as.numeric(SiteRisksetsByStrata(
        event  = site1eDrsBMatch[site1eDrsBMatch$eDrsBMatch == 1, "Y"],
        A      = site1eDrsBMatch[site1eDrsBMatch$eDrsBMatch == 1, "A"])[,c("events_A0", "events_A1",
                                                                           "riskset_A0", "riskset_A1")]),
        as.numeric(c(tapply(site1eDrsBMatch$Y, site1eDrsBMatch$A, sum),
                     tapply(site1eDrsBMatch$Y, site1eDrsBMatch$A, length))))
    ## ePsStrata
    expect_equal(as.numeric(as.matrix(
        site1RisksetsBin[site1RisksetsBin$method == "ePsStrata",
                         c("events_A0", "events_A1",
                           "riskset_A0", "riskset_A1")])),
        as.numeric(
            cbind(t(tapply(drnReady[[1]]$Y, drnReady[[1]][,c("A","ePsStrata")], sum)),
                  t(tapply(drnReady[[1]]$Y, drnReady[[1]][,c("A","ePsStrata")], length)))))
    ## eDrsBStrata
    expect_equal(as.numeric(as.matrix(
        site1RisksetsBin[site1RisksetsBin$method == "eDrsBStrata",
                         c("events_A0", "events_A1",
                           "riskset_A0", "riskset_A1")])),
        as.numeric(
            cbind(t(tapply(drnReady[[1]]$Y, drnReady[[1]][,c("A","eDrsBStrata")], sum)),
                  t(tapply(drnReady[[1]]$Y, drnReady[[1]][,c("A","eDrsBStrata")], length)))))
})


test_that("Breslow coxph and case-centered logistic are equivalent", {

    ## Equivalence of case-centered logistic and Cox
    ## Data with no tied times (not even in censored)
    data0 <- as.data.frame(drnReady[[3]][,c("time","event","A")])
    data0 <- data0[!duplicated(data0$time),]
    ## Fit models
    cox0_efron   <- coxph(formula = Surv(time, event) ~ A, data = data0, ties = "efron")
    cox0_breslow <- coxph(formula = Surv(time, event) ~ A, data = data0, ties = "breslow")
    cox0_exact   <- coxph(formula = Surv(time, event) ~ A, data = data0, ties = "exact")
    ## Efron is affected by tied times even if no tied event times exist
    expect_equal(coef(cox0_efron), coef(cox0_breslow), tolerance = 10^(-10))
    expect_equal(coef(cox0_efron), coef(cox0_exact),   tolerance = 10^(-10))
    expect_equal(coef(cox0_exact), coef(cox0_breslow), tolerance = 10^(-10))
    ## Risk set construction
    data0_riskset <- with(data0, SiteRisksetsByStrata(time = time, event = event, A = A))
    ## No event row at day 0 does not count
    ccl0 <- CaseCenteredLogistic(data0_riskset)
    expect_equal(CoefVar(ccl0, "(Intercept)"),
                 CoefVar(CaseCenteredLogistic(data0_riskset[-1,]), "(Intercept)"),
                 tolerance = 10^(-10))
    ## Match up to efron when without ties
    expect_equal(as.numeric(CoefVar(ccl0, "(Intercept)")[1]),
                 as.numeric(coef(cox0_efron)["A"]),     tolerance = 10^(-10))
    expect_equal(as.numeric(CoefVar(ccl0, "(Intercept)")[2]),
                 as.numeric(vcov(cox0_efron)["A","A"]), tolerance = 10^(-9))

    ## Data with no tied events (censored can ties)
    data1 <- as.data.frame(drnReady[[3]][,c("time","event","A")])
    data1_A0 <- data1[data1$A == 0,]
    data1_A1 <- data1[data1$A == 1,]
    ## Drop duplicated event times
    data1_A1 <- data1_A1[!duplicated(data1_A1$time),]
    data1 <- rbind(data1_A0, data1_A1)
    ## Fit models
    cox1_efron   <- coxph(formula = Surv(time, event) ~ A, data = data1, ties = "efron")
    cox1_breslow <- coxph(formula = Surv(time, event) ~ A, data = data1, ties = "breslow")
    cox1_exact   <- coxph(formula = Surv(time, event) ~ A, data = data1, ties = "exact")
    ## Efron is affected by tied times even if no tied event times exist
    expect_true(abs(coef(cox1_efron) - coef(cox1_breslow)) > 10^(-4))
    expect_true(abs(coef(cox1_efron) - coef(cox1_exact))   > 10^(-4))
    expect_equal(coef(cox1_exact), coef(cox1_breslow), tolerance = 10^(-5))
    ## Risk set construction
    data1_riskset <- with(data1, SiteRisksetsByStrata(time = time, event = event, A = A))
    ## No event row at day 0 does not count
    ccl1 <- CaseCenteredLogistic(data1_riskset)
    expect_equal(CoefVar(ccl1, "(Intercept)"),
                 CoefVar(CaseCenteredLogistic(data1_riskset[-1,]), "(Intercept)"),
                 tolerance = 10^(-10))
    ## Do not match with Efron
    expect_true(abs(CoefVar(ccl1, "(Intercept)")[1] - as.numeric(coef(cox1_efron)["A"]))     > 10^(-5))
    expect_true(abs(CoefVar(ccl1, "(Intercept)")[2] - as.numeric(vcov(cox1_efron)["A","A"])) > 10^(-7))
    ## Match with Breslow with some numerical tolerance
    expect_equal(as.numeric(CoefVar(ccl1, "(Intercept)")[1]),
                 as.numeric(coef(cox1_breslow)["A"]),     tolerance = 10^(-10))
    expect_equal(as.numeric(CoefVar(ccl1, "(Intercept)")[2]),
                 as.numeric(vcov(cox1_breslow)["A","A"]), tolerance = 10^(-7))
    ## Do not match with exact
    expect_true(abs(CoefVar(ccl1, "(Intercept)")[1] - as.numeric(coef(cox1_exact)["A"]))     > 10^(-6))
    expect_true(abs(CoefVar(ccl1, "(Intercept)")[2] - as.numeric(vcov(cox1_exact)["A","A"])) > 10^(-7))

    ## Data with tied events
    data2 <- data.frame(time  = c(1,2,2,3,4,5,6,7,8,9,10,10),
                        event = c(0,1,1,0,1,0,1,0,1,0,1,0),
                        A     = c(0,1,1,1,1,1,0,0,0,0,1,0))
    cox2_efron   <- coxph(formula = Surv(time, event) ~ A, data = data2, ties = "efron")
    cox2_breslow <- coxph(formula = Surv(time, event) ~ A, data = data2, ties = "breslow")
    cox2_exact   <- coxph(formula = Surv(time, event) ~ A, data = data2, ties = "exact")
    ## With ties, approximation methods do not agree
    expect_true(abs(coef(cox2_efron)   - coef(cox2_breslow)) > 10^(-5))
    expect_true(abs(coef(cox2_efron)   - coef(cox2_exact))   > 10^(-5))
    expect_true(abs(coef(cox2_breslow) - coef(cox2_exact))   > 10^(-5))
    ## Risk set construction
    data2_riskset <- with(data2, SiteRisksetsByStrata(time = time, event = event, A = A))
    ## No event row at day 0 does not count
    ccl2 <- CaseCenteredLogistic(data2_riskset)
    expect_equal(CoefVar(ccl2, "(Intercept)"),
                 CoefVar(CaseCenteredLogistic(data2_riskset[-1,]), "(Intercept)"))
    ## Do not match with Efron
    expect_true(abs(CoefVar(ccl2, "(Intercept)")[1] - as.numeric(coef(cox2_efron)["A"]))     > 10^(-3))
    expect_true(abs(CoefVar(ccl2, "(Intercept)")[2] - as.numeric(vcov(cox2_efron)["A","A"])) > 10^(-3))
    ## Match with Breslow with some numerical tolerance
    expect_equal(as.numeric(CoefVar(ccl2, "(Intercept)")[1]),
                 as.numeric(coef(cox2_breslow)["A"]),     tolerance = 10^(-10))
    expect_equal(as.numeric(CoefVar(ccl2, "(Intercept)")[2]),
                 as.numeric(vcov(cox2_breslow)["A","A"]), tolerance = 10^(-7))
    ## Do not match with exact
    expect_true(abs(CoefVar(ccl2, "(Intercept)")[1] - as.numeric(coef(cox2_exact)["A"]))     > 10^(-3))
    expect_true(abs(CoefVar(ccl2, "(Intercept)")[2] - as.numeric(vcov(cox2_exact)["A","A"])) > 10^(-3))

    ## Data with many ties
    data3 <- as.data.frame(drnReady[[3]][,c("time","event","A")])
    ## Check there are ties
    expect_true(any(table(subset(data3, event == 1)$time) > 1))
    ## With ties, approximation methods do not agree
    cox3_efron   <- coxph(formula = Surv(time, event) ~ A, data = data3, ties = "efron")
    cox3_breslow <- coxph(formula = Surv(time, event) ~ A, data = data3, ties = "breslow")
    cox3_exact   <- coxph(formula = Surv(time, event) ~ A, data = data3, ties = "exact")
    ## With ties, approximation methods do not agree
    expect_true(abs(coef(cox3_efron)   - coef(cox3_breslow)) > 10^(-5))
    expect_true(abs(coef(cox3_efron)   - coef(cox3_exact))   > 10^(-5))
    expect_true(abs(coef(cox3_breslow) - coef(cox3_exact))   > 10^(-5))
    ## Risk set construction
    data3_riskset <- with(data3, SiteRisksetsByStrata(time = time, event = event, A = A))
    ## No event row at day 0 does not count
    ccl3 <- CaseCenteredLogistic(data3_riskset)
    expect_equal(CoefVar(ccl3, "(Intercept)"),
                 CoefVar(CaseCenteredLogistic(data3_riskset[-1,]), "(Intercept)"),
                 tolerance = 10^(-10))
    ## Do not match with Efron
    expect_true(abs(CoefVar(ccl3, "(Intercept)")[1] - as.numeric(coef(cox3_efron)["A"]))     > 10^(-5))
    expect_true(abs(CoefVar(ccl3, "(Intercept)")[2] - as.numeric(vcov(cox3_efron)["A","A"])) > 10^(-9))
    ## Match with Breslow with some numerical tolerance
    expect_equal(as.numeric(CoefVar(ccl3, "(Intercept)")[1]),
                 as.numeric(coef(cox3_breslow)["A"]),     tolerance = 10^(-10))
    expect_equal(as.numeric(CoefVar(ccl3, "(Intercept)")[2]),
                 as.numeric(vcov(cox3_breslow)["A","A"]), tolerance = 10^(-10))
    ## Do not match with Efron
    expect_true(abs(CoefVar(ccl3, "(Intercept)")[1] - as.numeric(coef(cox3_exact)["A"])) > 10^(-5))
    expect_true(abs(CoefVar(ccl3, "(Intercept)")[2] - as.numeric(vcov(cox3_exact)["A","A"])) > 10^(-6))

    ## Data with all ties (binary data)
    data4 <- as.data.frame(drnReady[[3]][,c("Y","A")])
    ## With ties, approximation methods do not agree (exact is not feasible)
    cox4_efron   <- coxph(formula = Surv(rep(1, length(Y)), Y) ~ A, data = data4, ties = "efron")
    cox4_breslow <- coxph(formula = Surv(rep(1, length(Y)), Y) ~ A, data = data4, ties = "breslow")
    ## With ties, approximation methods do not agree
    expect_true(abs(coef(cox4_efron)   - coef(cox4_breslow)) > 10^(-2))
    ## Risk set construction
    data4_riskset <- with(data4, SiteRisksetsByStrata(event = Y, A = A))
    expect_equal(as.numeric(tapply(data4$Y, data4$A, sum)),
                 as.numeric(data4_riskset[,c("events_A0","events_A1")]))
    expect_equal(as.numeric(tapply(data4$Y, data4$A, length)),
                 as.numeric(data4_riskset[,c("riskset_A0","riskset_A1")]))
    ## Case-centered approach
    ccl4 <- CaseCenteredLogistic(data4_riskset)
    ## Do not match with Efron
    expect_true(abs(as.numeric(CoefVar(ccl4, "(Intercept)"))[1] - as.numeric(coef(cox4_efron)["A"]))     > 10^(-2))
    expect_true(abs(as.numeric(CoefVar(ccl4, "(Intercept)"))[2] - as.numeric(vcov(cox4_efron)["A","A"])) > 10^(-9))
    ## Match with Breslow with some numerical tolerance
    expect_equal(as.numeric(CoefVar(ccl4, "(Intercept)"))[1], as.numeric(coef(cox4_breslow)["A"]),     tolerance = 10^(-10))
    expect_equal(as.numeric(CoefVar(ccl4, "(Intercept)"))[2], as.numeric(vcov(cox4_breslow)["A","A"]), tolerance = 10^(-10))

})


test_that("Breslow svycoxph and weighted riskset analyses are equivalent", {

###  MW without ties
    ## Weighted individual data (drop duplicated times)
    data0 <- as.data.frame(drnReady[[3]][,c("time","event","A","ePsMw")])
    data0 <- data0[!duplicated(data0$time),]
    dim(data0)

    ## IPD weighted analyses
    svycoxph0 <- svycoxph(formula = Surv(time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ ePsMw, data = data0),
                          method = "breslow")
    coxph0 <- coxph(formula = Surv(time, event) ~ A,
                    data = data0,
                    method = "breslow",
                    weights = ePsMw,
                    robust = TRUE)

    ## Riskset construction with weight summaries
    risksets0 <- SiteRisksetsByStrata(time = data0[, "time"],
                                      event = data0[, "event"],
                                      A = data0[, "A"],
                                      W = data0[, "ePsMw"])
    ## IPD reconstruction from risksets with weight summaries
    risksets0$site <- 1
    risksets0$outcome <- "survival"
    risksets0$method <- "ePsMw"
    risksets0$strata <- NA
    risksets0_expanded <- RisksetsToIpdSkewed(risksets0, compress = FALSE)

    ## Reconstructed-IPD weighted analyses
    svycoxph0_expanded <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                                   design = svydesign(ids = ~ 1, weights = ~ W, data = risksets0_expanded),
                                   method = "breslow")
    coxph0_expanded <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                             data = risksets0_expanded,
                             method = "breslow",
                             weights = W,
                             robust = TRUE)

    ## Coefficients should match up exactly
    ## Two fitting methods
    expect_equal(coef(svycoxph0), coef(coxph0))
    ## original weighted IPD vs expaneded weighted IPD
    expect_equal(coef(svycoxph0), coef(svycoxph0_expanded))
    expect_equal(coef(coxph0), coef(coxph0_expanded))

    ## Variances
    ## Two robust variance methods
    ## Finite-population correction results in larger variance with MW, but close
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(coxph0)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^4)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^5)
    ## Variance: original weighted IPD > expaneded weighted IPD
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(svycoxph0_expanded)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^3)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^4)
    ##
    expect_true(as.numeric(vcov(coxph0)) > as.numeric(vcov(coxph0_expanded)))
    expect_true(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^3)
    expect_false(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^4)


###  MW with ties
    ## Weighted individual data (drop duplicated times)
    data0 <- as.data.frame(drnReady[[3]][,c("time","event","A","ePsMw")])

    ## IPD weighted analyses
    svycoxph0 <- svycoxph(formula = Surv(time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ ePsMw, data = data0),
                          method = "breslow")
    coxph0 <- coxph(formula = Surv(time, event) ~ A,
                    data = data0,
                    method = "breslow",
                    weights = ePsMw,
                    robust = TRUE)

    ## Riskset construction with weight summaries
    risksets0 <- SiteRisksetsByStrata(time = data0[, "time"],
                                      event = data0[, "event"],
                                      A = data0[, "A"],
                                      W = data0[, "ePsMw"])
    ## IPD reconstruction from risksets with weight summaries
    risksets0$site <- 1
    risksets0$outcome <- "survival"
    risksets0$method <- "ePsMw"
    risksets0$strata <- NA
    risksets0_expanded <- RisksetsToIpdSkewed(risksets0, compress = FALSE)

    ## Reconstructed-IPD weighted analyses
    svycoxph0_expanded <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                                   design = svydesign(ids = ~ 1, weights = ~ W, data = risksets0_expanded),
                                   method = "breslow")
    coxph0_expanded <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                             data = risksets0_expanded,
                             method = "breslow",
                             weights = W,
                             robust = TRUE)

    ## Coefficients should match up exactly
    ## Two fitting methods
    expect_equal(coef(svycoxph0), coef(coxph0))
    expect_equal(coef(svycoxph0_expanded), coef(coxph0_expanded))
    ## original weighted IPD vs expaneded weighted IPD
    expect_equal(coef(svycoxph0), coef(svycoxph0_expanded))
    expect_equal(coef(coxph0), coef(coxph0_expanded))

    ## Variances
    ## Two robust variance methods
    ## Finite-population correction results in larger variance with MW, but close
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(coxph0)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^6)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^7)
    ## Variance: original weighted IPD < expaneded weighted IPD
    expect_true(as.numeric(vcov(svycoxph0)) < as.numeric(vcov(svycoxph0_expanded)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^5)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^6)
    ##
    expect_true(as.numeric(vcov(coxph0)) < as.numeric(vcov(coxph0_expanded)))
    expect_true(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^5)
    expect_false(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^6)


###  Stabilized IPTW without ties
    ## Weighted individual data (drop duplicated times)
    data0 <- as.data.frame(drnReady[[3]][,c("time","event","A","ePsSIptw")])
    data0 <- data0[!duplicated(data0$time),]
    dim(data0)

    ## IPD weighted analyses
    svycoxph0 <- svycoxph(formula = Surv(time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ ePsSIptw, data = data0),
                          method = "breslow")
    coxph0 <- coxph(formula = Surv(time, event) ~ A,
                    data = data0,
                    method = "breslow",
                    weights = ePsSIptw,
                    robust = TRUE)

    ## Riskset construction with weight summaries
    risksets0 <- SiteRisksetsByStrata(time = data0[, "time"],
                                      event = data0[, "event"],
                                      A = data0[, "A"],
                                      W = data0[, "ePsSIptw"])
    ## IPD reconstruction from risksets with weight summaries
    risksets0$site <- 1
    risksets0$outcome <- "survival"
    risksets0$method <- "ePsSIptw"
    risksets0$strata <- NA
    risksets0_expanded <- RisksetsToIpdSkewed(risksets0, compress = FALSE)

    ## Reconstructed-IPD weighted analyses
    svycoxph0_expanded <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                                   design = svydesign(ids = ~ 1, weights = ~ W, data = risksets0_expanded),
                                   method = "breslow")
    coxph0_expanded <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                             data = risksets0_expanded,
                             method = "breslow",
                             weights = W,
                             robust = TRUE)

    ## Coefficients should match up exactly
    ## Two fitting methods
    expect_equal(coef(svycoxph0), coef(coxph0))
    ## original weighted IPD vs expaneded weighted IPD
    expect_equal(coef(svycoxph0), coef(svycoxph0_expanded))
    expect_equal(coef(coxph0), coef(coxph0_expanded))

    ## Variances
    ## Two robust variance methods
    ## Finite-population correction results in larger variance with MW, but close
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(coxph0)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^4)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^5)
    ## Variance: original weighted IPD > expaneded weighted IPD
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(svycoxph0_expanded)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^3)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^4)
    ##
    expect_true(as.numeric(vcov(coxph0)) > as.numeric(vcov(coxph0_expanded)))
    expect_true(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^3)
    expect_false(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^4)


###  Stabilized IPTW with ties
    ## Weighted individual data (drop duplicated times)
    data0 <- as.data.frame(drnReady[[3]][,c("time","event","A","ePsSIptw")])

    ## IPD weighted analyses
    svycoxph0 <- svycoxph(formula = Surv(time, event) ~ A,
                          design = svydesign(ids = ~ 1, weights = ~ ePsSIptw, data = data0),
                          method = "breslow")
    coxph0 <- coxph(formula = Surv(time, event) ~ A,
                    data = data0,
                    method = "breslow",
                    weights = ePsSIptw,
                    robust = TRUE)

    ## Riskset construction with weight summaries
    risksets0 <- SiteRisksetsByStrata(time = data0[, "time"],
                                      event = data0[, "event"],
                                      A = data0[, "A"],
                                      W = data0[, "ePsSIptw"])
    ## IPD reconstruction from risksets with weight summaries
    risksets0$site <- 1
    risksets0$outcome <- "survival"
    risksets0$method <- "ePsSIptw"
    risksets0$strata <- NA
    risksets0_expanded <- RisksetsToIpdSkewed(risksets0, compress = FALSE)

    ## Reconstructed-IPD weighted analyses
    svycoxph0_expanded <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                                   design = svydesign(ids = ~ 1, weights = ~ W, data = risksets0_expanded),
                                   method = "breslow")
    coxph0_expanded <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                             data = risksets0_expanded,
                             method = "breslow",
                             weights = W,
                             robust = TRUE)

    ## Coefficients should match up exactly
    ## Two fitting methods
    expect_equal(coef(svycoxph0), coef(coxph0))
    expect_equal(coef(svycoxph0_expanded), coef(coxph0_expanded))
    ## original weighted IPD vs expaneded weighted IPD
    expect_equal(coef(svycoxph0), coef(svycoxph0_expanded))
    expect_equal(coef(coxph0), coef(coxph0_expanded))

    ## Variances
    ## Two robust variance methods
    ## Finite-population correction results in larger variance with MW, but close
    expect_true(as.numeric(vcov(svycoxph0)) > as.numeric(vcov(coxph0)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^6)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(coxph0))) < 1/10^7)
    ## Variance: original weighted IPD < expaneded weighted IPD
    expect_true(as.numeric(vcov(svycoxph0)) < as.numeric(vcov(svycoxph0_expanded)))
    expect_true(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^5)
    expect_false(abs(as.numeric(vcov(svycoxph0)) - as.numeric(vcov(svycoxph0_expanded))) < 1/10^6)
    ##
    expect_true(as.numeric(vcov(coxph0)) < as.numeric(vcov(coxph0_expanded)))
    expect_true(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^5)
    expect_false(abs(as.numeric(vcov(coxph0)) - as.numeric(vcov(coxph0_expanded))) < 1/10^6)


###  Comparison of skewed and extreme weight reconstruction
    ## Examine if robust variance is invariant to weight rearrangement within event and treatment
    risksets0_expanded_ex <- RisksetsToIpdSkewed(risksets0, compress = FALSE,
                                                 helper_fun = RisksetsToIpdExtremeHelper)
    ## NOT the same weights data
    summary(risksets0_expanded_ex$W)
    summary(risksets0_expanded$W)
    expect_false(identical(summary(risksets0_expanded_ex$W),
                           summary(risksets0_expanded$W)))
    ## Means match
    expect_equal(mean(risksets0_expanded_ex$W),
                 mean(risksets0_expanded$W))
    ## Variances match
    expect_equal(var(risksets0_expanded_ex$W),
                 var(risksets0_expanded$W))

    ## Reconstructed-IPD weighted analyses with extreme weights
    svycoxph0_expanded_ex <- svycoxph(formula = Surv(start_time, stop_time, event) ~ A,
                                      design = svydesign(ids = ~ 1, weights = ~ W, data = risksets0_expanded_ex),
                                      method = "breslow")
    coxph0_expanded_ex <- coxph(formula = Surv(start_time, stop_time, event) ~ A,
                                data = risksets0_expanded_ex,
                                method = "breslow",
                                weights = W,
                                robust = TRUE)
    ## Coefficients match
    expect_equal(coef(svycoxph0_expanded), coef(svycoxph0_expanded_ex))
    expect_equal(coef(coxph0_expanded), coef(coxph0_expanded_ex))
    ## Variances match
    expect_equal(vcov(svycoxph0_expanded), vcov(svycoxph0_expanded_ex))
    expect_equal(vcov(coxph0_expanded), vcov(coxph0_expanded_ex))

})

test_that("case-centered logistic regression is performed correctly", {

    ## Limit time points for speed
    resRisk <- subset(resRisk, eval_time < 10)

    ## Split but drop empty levels
    lstDf <- split(resRisk, f = resRisk[,c("outcome","method")],
                   drop = TRUE)


    ## Check if case-centered logistic regression is working for unweighted data
    lstDfUnwt <- lstDf[!grepl(pattern = "ePsSIptw|ePsMw", names(lstDf))]

    ## Run case-centered conditional logistic regression
    ans <- lapply(lstDfUnwt, function(df) {
        ## If unweighted, run regular case-centered approach
        ## Stratification by site is implicit by constructing risk sets at each site.
        out <- glm(formula = cbind(events_A1, events_A0) ~ 1,
                   family  = binomial(link = "logit"),
                   offset  = log(riskset_A1 / riskset_A0),
                   data    = df)
    })
    ## Expectation
    expect_equal(lapply(lstDfUnwt, CaseCenteredLogistic),
                 ans)


    ## Check depleted riskset handling
    ## If either exposed or unexposed are depleted (zero), time points should be dropped
    ## Untreated are depleted
    zeroRisksetA0 <- subset(resRisk, method == "ePsStrata" & outcome == "survival")
    zeroRisksetA0[zeroRisksetA0$eval_time > 8, c("events_A0","riskset_A0")] <- 0
    expect_equal(CaseCenteredLogistic(zeroRisksetA0),
                 CaseCenteredLogistic(subset(zeroRisksetA0, riskset_A0 > 0 & riskset_A1 > 0)))
    ## Treated are depleted
    zeroRisksetA1 <- subset(resRisk, method == "ePsStrata" & outcome == "survival")
    zeroRisksetA1[zeroRisksetA1$eval_time > 8, c("events_A1","riskset_A1")] <- 0
    CaseCenteredLogistic(zeroRisksetA1)
    expect_equal(CaseCenteredLogistic(zeroRisksetA1),
                 CaseCenteredLogistic(subset(zeroRisksetA1, riskset_A0 > 0 & riskset_A1 > 0)))


    ## Using AnalyzeSiteRisksets for unweighted
    lstBin  <- AnalyzeSiteRisksetsBin(resRisk[resRisk$outcome == "binary",])
    lstSurv <- AnalyzeSiteRisksetsSurv(resRisk[resRisk$outcome == "survival",])
    out2 <- do.call(rbind, c(lstBin, lstSurv))
    rownames(out2) <- NULL
    df2  <- data.frame(data = rep("risksets", nrow(out2)),
                       outcome = c(rep("binary", length(lstBin)),
                                   rep("survival", length(lstSurv))),
                       method = c(names(lstBin),
                                  names(lstSurv)),
                       stringsAsFactors = FALSE)
    ans2 <- cbind(df2, out2)

    ## Expectation
    expect_equal(AnalyzeSiteRisksets(resRisk),
                 ans2)


    ## Check if weighted analysis is working explicitly
    ## Efron
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedEfron(lstDf[["survival.ePsSIptw"]]),
        "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd((lstDf[["survival.ePsSIptw"]]))),
                         method = "efron"),
                "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedEfron((lstDf[["survival.ePsMw"]])), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd((lstDf[["survival.ePsMw"]]))),
                         method = "efron"), "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedEfron(lstDf[["binary.ePsSIptw"]]), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd(lstDf[["binary.ePsSIptw"]])),
                         method = "efron"), "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedEfron(lstDf[["binary.ePsMw"]]), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd(lstDf[["binary.ePsMw"]])),
                         method = "efron"), "A"))
    ## Breslow
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedBreslow(lstDf[["survival.ePsSIptw"]]), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd((lstDf[["survival.ePsSIptw"]]))),
                         method = "breslow"), "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedBreslow((lstDf[["survival.ePsMw"]])), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd((lstDf[["survival.ePsMw"]]))),
                         method = "breslow"), "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedBreslow(lstDf[["binary.ePsSIptw"]]), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd(lstDf[["binary.ePsSIptw"]])),
                         method = "breslow"), "A"))
    expect_equal(CoefVar(
        CaseCenteredLogisticWeightedBreslow(lstDf[["binary.ePsMw"]]), "A"),
        CoefVar(svycoxph(formula = Surv(start_time, stop_time, event) ~ A + strata(site),
                         design = svydesign(ids = ~ 1, weights = ~ W,
                                            data = RisksetsToIpd(lstDf[["binary.ePsMw"]])),
                         method = "breslow"), "A"))

})


###
### Tests for individual-level data regression
################################################################################

test_that("individual-level data regression is performed correctly", {

    df <- RequestSiteDataset(drnReady)

    ## Efron
    ## Binary
    ## Unadjusted stratified by site
    bin_unadjE       <- clogit(formula = Y ~ A + strata(site),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Matched cohort stratified by sites
    bin_ePsMatchE    <- clogit(formula = Y ~ A + strata(site),
                               data    = df[df$ePsMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    bin_eDrsBMatchE  <- clogit(formula = Y ~ A + strata(site),
                               data    = df[df$eDrsBMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Stratified analysis stratified by summary score strata and sites
    bin_ePsStrataE   <- clogit(formula = Y ~ A + strata(site, ePsStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    bin_eDrsBStrataE <- clogit(formula = Y ~ A + strata(site, eDrsBStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Weighted analysis stratified by sites
    bin_ePsSIptwE    <- svycoxph(formula = Surv(rep(1, length(Y)), Y) ~ A + strata(site),
                                 design = svydesign(ids = ~ 1, data = df, weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3])
    bin_ePsMwE       <- svycoxph(formula = Surv(rep(1, length(Y)), Y) ~ A + strata(site),
                                 design = svydesign(ids = ~ 1, data = df, weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Survival
    ## Unadjusted stratified by site
    surv_unadjE       <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Matched cohort stratified by sites
    surv_ePsMatchE    <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df[df$ePsMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    surv_eDrsSMatchE  <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df[df$eDrsSMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Stratified analysis stratified by summary score strata and sites
    surv_ePsStrataE   <- coxph(formula = Surv(time, event) ~ A + strata(site, ePsStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    surv_eDrsSStrataE <- coxph(formula = Surv(time, event) ~ A + strata(site, eDrsSStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[3])
    ## Weighted analysis stratified by sites
    surv_ePsSIptwE    <- svycoxph(formula = Surv(time, event) ~ A + strata(site),
                                  design = svydesign(ids = ~ 1, data = df, weights = ~ ePsSIptw),
                                  method  = c("exact", "approximate", "efron", "breslow")[3])
    surv_ePsMwE       <- svycoxph(formula = Surv(time, event) ~ A + strata(site),
                                  design = svydesign(ids = ~ 1, data = df, weights = ~ ePsMw),
                                  method  = c("exact", "approximate", "efron", "breslow")[3])

    ## Breslow
    ## Binary
    ## Unadjusted stratified by site
    bin_unadjB       <- clogit(formula = Y ~ A + strata(site),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Matched cohort stratified by sites
    bin_ePsMatchB    <- clogit(formula = Y ~ A + strata(site),
                               data    = df[df$ePsMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    bin_eDrsBMatchB  <- clogit(formula = Y ~ A + strata(site),
                               data    = df[df$eDrsBMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Stratified analysis stratified by summary score strata and sites
    bin_ePsStrataB   <- clogit(formula = Y ~ A + strata(site, ePsStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    bin_eDrsBStrataB <- clogit(formula = Y ~ A + strata(site, eDrsBStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Weighted analysis stratified by sites
    bin_ePsSIptwB    <- svycoxph(formula = Surv(rep(1, length(Y)), Y) ~ A + strata(site),
                                 design = svydesign(ids = ~ 1, data = df, weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4])
    bin_ePsMwB       <- svycoxph(formula = Surv(rep(1, length(Y)), Y) ~ A + strata(site),
                                 design = svydesign(ids = ~ 1, data = df, weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Survival
    ## Unadjusted stratified by site
    surv_unadjB       <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Matched cohort stratified by sites
    surv_ePsMatchB    <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df[df$ePsMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    surv_eDrsSMatchB  <- coxph(formula = Surv(time, event) ~ A + strata(site),
                               data    = df[df$eDrsSMatch == 1,],
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Stratified analysis stratified by summary score strata and sites
    surv_ePsStrataB   <- coxph(formula = Surv(time, event) ~ A + strata(site, ePsStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    surv_eDrsSStrataB <- coxph(formula = Surv(time, event) ~ A + strata(site, eDrsSStrata),
                               data    = df,
                               method  = c("exact", "approximate", "efron", "breslow")[4])
    ## Weighted analysis stratified by sites
    surv_ePsSIptwB    <- svycoxph(formula = Surv(time, event) ~ A + strata(site),
                                  design = svydesign(ids = ~ 1, data = df, weights = ~ ePsSIptw),
                                  method  = c("exact", "approximate", "efron", "breslow")[4])
    surv_ePsMwB       <- svycoxph(formula = Surv(time, event) ~ A + strata(site),
                                  design = svydesign(ids = ~ 1, data = df, weights = ~ ePsMw),
                                  method  = c("exact", "approximate", "efron", "breslow")[4])

    ## Pile up binary (both Efron and Breslow) first
    mat <- rbind(CoefVar(bin_unadjE, "A"),
                 CoefVar(bin_ePsMatchE, "A"),
                 CoefVar(bin_eDrsBMatchE, "A"),
                 CoefVar(bin_ePsStrataE, "A"),
                 CoefVar(bin_eDrsBStrataE, "A"),
                 CoefVar(bin_ePsSIptwE, "A"),
                 CoefVar(bin_ePsMwE, "A"),
                 ## Breslow
                 CoefVar(bin_unadjB, "A"),
                 CoefVar(bin_ePsMatchB, "A"),
                 CoefVar(bin_eDrsBMatchB, "A"),
                 CoefVar(bin_ePsStrataB, "A"),
                 CoefVar(bin_eDrsBStrataB, "A"),
                 CoefVar(bin_ePsSIptwB, "A"),
                 CoefVar(bin_ePsMwB, "A"),
                 ## Survival
                 ## Efron
                 CoefVar(surv_unadjE, "A"),
                 CoefVar(surv_ePsMatchE, "A"),
                 CoefVar(surv_eDrsSMatchE, "A"),
                 CoefVar(surv_ePsStrataE, "A"),
                 CoefVar(surv_eDrsSStrataE, "A"),
                 CoefVar(surv_ePsSIptwE, "A"),
                 CoefVar(surv_ePsMwE, "A"),
                 ## Breslow
                 CoefVar(surv_unadjB, "A"),
                 CoefVar(surv_ePsMatchB, "A"),
                 CoefVar(surv_eDrsSMatchB, "A"),
                 CoefVar(surv_ePsStrataB, "A"),
                 CoefVar(surv_eDrsSStrataB, "A"),
                 CoefVar(surv_ePsSIptwB, "A"),
                 CoefVar(surv_ePsMwB, "A"))

    ans <- data.frame(data = rep("dataset", nrow(mat)),
                      outcome = rep(c("binary","survival"), each = nrow(mat) / 2),
                      ##         ## Binary (clogit by both Efron and Breslow)
                      method = c(paste0(c("unadj",
                                          "ePsMatch", "eDrsBMatch",
                                          "ePsStrata", "eDrsBStrata",
                                          "ePsSIptw", "ePsMw"),
                                        "E"),
                                 paste0(c("unadj",
                                          "ePsMatch", "eDrsBMatch",
                                          "ePsStrata", "eDrsBStrata",
                                          "ePsSIptw", "ePsMw"),
                                        "B"),
                                 ## Survival (both Efron and Breslow)
                                 paste0(c("unadj",
                                          "ePsMatch", "eDrsSMatch",
                                          "ePsStrata", "eDrsSStrata",
                                          "ePsSIptw", "ePsMw"),
                                        "E"),
                                 paste0(c("unadj",
                                          "ePsMatch", "eDrsSMatch",
                                          "ePsStrata", "eDrsSStrata",
                                          "ePsSIptw", "ePsMw"),
                                        "B")),
                      stringsAsFactors = FALSE)

    ans <- cbind(ans, mat)

    ## Expectations
    expect_equal(AnalyzeSiteDataset(df),
                 ans)
})


test_that("individual-level data COUNTERFACTUAL regression is performed correctly", {

    df <- RequestSiteTruth(drnReady)

    ## Need to set seed for randomness control
    set.seed(113)
    ## Duplicate for counterfactuals
    df_A0 <- df
    df_A1 <- df
    ## Manipulate treatment
    df_A0$A_ <- 0
    df_A1$A_ <- 1
    ## Realize binary outcome under each manipulated treatments
    ## FIXME: Stochastic and not ideal, but should work over iterations
    df_A0$Y_ <- rbinom(n = seq_along(df_A0$pY), size = 1, prob = df_A0$pY0)
    df_A1$Y_ <- rbinom(n = seq_along(df_A1$pY), size = 1, prob = df_A1$pY1)
    ## Realize survival outcome under each manipulated treatment
    df_A0$T_ <- rexp(n = seq_along(df_A0$rate0), rate = df_A0$rate0)
    df_A1$T_ <- rexp(n = seq_along(df_A1$rate1), rate = df_A1$rate1)
    ## Censor at Tmax
    Tmax <- 1
    ##  Event variable 1 event, 0 censoring
    df_A0$event_ <- as.numeric(df_A0$T_ <= Tmax)
    df_A1$event_ <- as.numeric(df_A1$T_ <= Tmax)
    ##  Set time to maximum follow up if administratively censored.
    df_A0$T_ <- ifelse(df_A0$T_ <= Tmax, df_A0$T_, Tmax)
    df_A1$T_ <- ifelse(df_A1$T_ <= Tmax, df_A1$T_, Tmax)
    ##  Make day-level data, but do not discretize. This should not matter in Cox.
    df_A0$T_ <- df_A0$T_ * 365.25
    df_A1$T_ <- df_A1$T_ * 365.25
    ## Create a counterfactual dataset containing everybody twice under both treatment
    ## All manipulated or counterfactuals are marked with *_ variable names.
    x <- rbind(df_A0, df_A1)

    ## Binary
    ## Unadjusted stratified by sites
    unadjE       <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Matched cohort stratified by sites
    ePsMatchE    <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x[x$ePsMatch == 1,], expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[3]))
    eDrsBMatchE  <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x[x$eDrsBMatch == 1,], expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Stratified analysis stratified by summary score strata and sites
    ePsStrataE   <- try(clogit(formula = Y_ ~ A_ + strata(site, ePsStrata),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[3]))
    eDrsBStrataE <- try(clogit(formula = Y_ ~ A_ + strata(site, eDrsBStrata),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Weighted analysis stratified by sites
    ePsSIptwE    <- try(svycoxph(formula = Surv(rep(1, length(Y_)), Y_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                                                     weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3]))
    ePsMwE       <- try(svycoxph(formula = Surv(rep(1, length(Y_)), Y_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                                                     weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3]))

    ## Breslow
    ## Unadjusted stratified by sites
    unadjB       <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Matched cohort stratified by sites
    ePsMatchB    <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x[x$ePsMatch == 1,], expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[4]))
    eDrsBMatchB  <- try(clogit(formula = Y_ ~ A_ + strata(site),
                               data    = ValidateDf(x[x$eDrsBMatch == 1,], expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Stratified analysis stratified by summary score strata and sites
    ePsStrataB   <- try(clogit(formula = Y_ ~ A_ + strata(site, ePsStrata),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[4]))
    eDrsBStrataB <- try(clogit(formula = Y_ ~ A_ + strata(site, eDrsBStrata),
                               data    = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                               method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Weighted analysis stratified by sites
    ePsSIptwB    <- try(svycoxph(formula = Surv(rep(1, length(Y_)), Y_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                                                     weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4]))
    ePsMwB       <- try(svycoxph(formula = Surv(rep(1, length(Y_)), Y_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "Y_"),
                                                     weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4]))

    lstBin <- list(unadjE_true       = CoefVar(unadjE, "A_"),
                    ePsMatchE_true    = CoefVar(ePsMatchE, "A_"),
                    eDrsBMatchE_true  = CoefVar(eDrsBMatchE, "A_"),
                    ePsStrataE_true   = CoefVar(ePsStrataE, "A_"),
                    eDrsBStrataE_true = CoefVar(eDrsBStrataE, "A_"),
                    ePsSIptwE_true    = CoefVar(ePsSIptwE, "A_"),
                    ePsMwE_true       = CoefVar(ePsMwE, "A_"),
                    ## Breslow
                    unadjB_true       = CoefVar(unadjB, "A_"),
                    ePsMatchB_true    = CoefVar(ePsMatchB, "A_"),
                    eDrsBMatchB_true  = CoefVar(eDrsBMatchB, "A_"),
                    ePsStrataB_true   = CoefVar(ePsStrataB, "A_"),
                    eDrsBStrataB_true = CoefVar(eDrsBStrataB, "A_"),
                    ePsSIptwB_true    = CoefVar(ePsSIptwB, "A_"),
                    ePsMwB_true       = CoefVar(ePsMwB, "A_"))


    ## Survival
    ## Unadjusted stratified by sites
    unadjE       <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Matched cohort stratified by sites
    ePsMatchE    <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x[x$ePsMatch == 1,], expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[3]))
    eDrsSMatchE  <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x[x$eDrsSMatch == 1,], expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Stratified analysis stratified by summary score strata and sites
    ePsStrataE   <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site, ePsStrata),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[3]))
    eDrsSStrataE <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site, eDrsSStrata),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[3]))
    ## Weighted analysis stratified by sites
    ePsSIptwE    <- try(svycoxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                                                     weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3]))
    ePsMwE       <- try(svycoxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                                                     weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[3]))

    ## Breslow
    ## Unadjusted stratified by sites
    unadjB       <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Matched cohort stratified by sites
    ePsMatchB    <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x[x$ePsMatch == 1,], expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[4]))
    eDrsSMatchB  <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                              data    = ValidateDf(x[x$eDrsSMatch == 1,], expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Stratified analysis stratified by summary score strata and sites
    ePsStrataB   <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site, ePsStrata),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[4]))
    eDrsSStrataB <- try(coxph(formula = Surv(T_, event_) ~ A_ + strata(site, eDrsSStrata),
                              data    = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                              method  = c("exact", "approximate", "efron", "breslow")[4]))
    ## Weighted analysis stratified by sites
    ePsSIptwB    <- try(svycoxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                                                     weights = ~ ePsSIptw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4]))
    ePsMwB       <- try(svycoxph(formula = Surv(T_, event_) ~ A_ + strata(site),
                                 design  = svydesign(ids = ~ 1,
                                                     data = ValidateDf(x, expo_var = "A_", event_var = "event_"),
                                                     weights = ~ ePsMw),
                                 method  = c("exact", "approximate", "efron", "breslow")[4]))

    lstSurv <- list(unadjE_true       = CoefVar(unadjE, "A_"),
                    ePsMatchE_true    = CoefVar(ePsMatchE, "A_"),
                    eDrsSMatchE_true  = CoefVar(eDrsSMatchE, "A_"),
                    ePsStrataE_true   = CoefVar(ePsStrataE, "A_"),
                    eDrsSStrataE_true = CoefVar(eDrsSStrataE, "A_"),
                    ePsSIptwE_true    = CoefVar(ePsSIptwE, "A_"),
                    ePsMwE_true       = CoefVar(ePsMwE, "A_"),
                    ## Breslow
                    unadjB_true       = CoefVar(unadjB, "A_"),
                    ePsMatchB_true    = CoefVar(ePsMatchB, "A_"),
                    eDrsSMatchB_true  = CoefVar(eDrsSMatchB, "A_"),
                    ePsStrataB_true   = CoefVar(ePsStrataB, "A_"),
                    eDrsSStrataB_true = CoefVar(eDrsSStrataB, "A_"),
                    ePsSIptwB_true    = CoefVar(ePsSIptwB, "A_"),
                    ePsMwB_true       = CoefVar(ePsMwB, "A_"))

    matCoefVar <- do.call(rbind, c(lstBin, lstSurv))
    rownames(matCoefVar) <- NULL

    ## Name data
    out <- data.frame(data = rep("truth", nrow(matCoefVar)),
                      outcome = rep(c("binary","survival"),
                                    c(length(lstBin), length(lstSurv))),
                      method = c(names(lstBin),
                                 names(lstSurv)),
                      stringsAsFactors = FALSE)

    ## Combine and return
    ans <- cbind(out, matCoefVar)


    ## Expectations
    expect_equal(data.frame(do.call(rbind, c(AnalyzeSiteTruthBin(x),
                                             AnalyzeSiteTruthSurv(x))),
                            row.names = NULL),
                 ans[c("coef","var")])

    set.seed(113)
    expect_equal(AnalyzeSiteTruth(df, Tmax = Tmax),
                 ans)
})


###
### All analyses together
################################################################################

test_that("all analyses can be conducted together", {

    cat("### Data being used\n")
    print(drnReady)

    cat("### Time required for data preparation\n")
    cat("### RequestSiteRegression \n")
    print(system.time(reqSiteRegression <- RequestSiteRegression(drnReady)))
    cat("### RequestSiteSummary \n")
    print(system.time(reqSiteSummary    <- RequestSiteSummary(drnReady)))
    cat("### RequestSiteRisksets \n")
    print(system.time(reqSiteRisksets   <- RequestSiteRisksets(drnReady)))
    cat("### RequestSiteDataset \n")
    print(system.time(reqSiteDataset    <- RequestSiteDataset(drnReady)))
    cat("### RequestSiteTruth \n")
    print(system.time(reqSiteTruth      <- RequestSiteTruth(drnReady)))

    cat("### Time required for data analysis\n")
    cat("### AnalyzeSiteRegression \n")
    print(system.time(resSiteRegression <- AnalyzeSiteRegression(reqSiteRegression)))
    cat("### AnalyzeSiteSummary \n")
    print(system.time(resSiteSummary    <- AnalyzeSiteSummary(reqSiteSummary)))
    cat("### AnalyzeSiteRisksets \n")
    print(system.time(resSiteRisksets   <- AnalyzeSiteRisksets(reqSiteRisksets)))
    cat("### AnalyzeSiteDataset \n")
    print(system.time(resSiteDataset    <- AnalyzeSiteDataset(reqSiteDataset)))
    cat("### AnalyzeSiteTruth \n")
    ## Need seed for counterfactual realization
    set.seed(131)
    print(system.time(resSiteTruth      <- AnalyzeSiteTruth(reqSiteTruth)))

    ## All data types except truth has unadjusted analyses
    expect_true(any(grepl("unadj", resSiteRegression$method)))
    expect_true(any(grepl("unadj", resSiteSummary$method)))
    expect_true(any(grepl("unadj", resSiteRisksets$method)))
    expect_true(any(grepl("unadj", resSiteDataset$method)))
    ## Breslow and Efron are all paired
    expect_equal(sum(grepl("unadjE", resSiteRegression$method)),
                 sum(grepl("unadjB", resSiteRegression$method)))
    expect_equal(sum(grepl("unadjE", resSiteSummary$method)),
                 sum(grepl("unadjB", resSiteSummary$method)))
    expect_equal(sum(grepl("unadjE", resSiteRisksets$method)),
                 sum(grepl("unadjB", resSiteRisksets$method)))
    expect_equal(sum(grepl("unadjE", resSiteDataset$method)),
                 sum(grepl("unadjB", resSiteDataset$method)))

    ## Combine as an answer to the wrapper function
    ans <- dplyr::bind_rows(resSiteRegression,
                            resSiteSummary,
                            resSiteRisksets,
                            resSiteDataset,
                            resSiteTruth)

    ## Data types
    data_types <- c("meta", "summary", "risksets", "dataset", "truth")
    expect_true(all(ans$data %in% data_types))
    expect_true(all(data_types %in% ans$data))
    ans$data    <- factor(ans$data,
                          levels = data_types)
    ## Outcome types
    outcome_types <- c("binary", "survival")
    expect_true(all(ans$outcome %in% outcome_types))
    expect_true(all(outcome_types %in% ans$outcome))
    ans$outcome <- factor(ans$outcome,
                          levels = outcome_types)
    ## Do not specify levels for method as they are subject to change
    ans$method  <- factor(ans$method)

    ## Reorder for readability
    ans <- dplyr::arrange_(ans, "data", "outcome", "method")

    ## Expectations
    if (length(suppressWarnings(system("type sas", intern = TRUE, ignore.stderr = TRUE))) == 0) {
        ## If sas is not available, drop ones with expected NA's
        expect_true(all(!is.na(ans[!(ans$outcome == "survival" & ans$method %in% c("ePsMwB","ePsMwE","ePsSIptwB","ePsSIptwE") & ans$data == "risksets"),
                                   c("outcome","method","data","coef","var")])))
    } else {
        ## If sas is available
        expect_true(all(!is.na(ans[c("outcome","method","data","coef","var")])))
    }


    ## Conduct all analyses to gether
    cat("### Time required for entire analysis together\n")
    ## Need seed for counterfactual realization
    set.seed(131)
    print(system.time(res <- Analyze(drnReady)))

    ## These columns should not have NA's
    if (length(suppressWarnings(system("type sas", intern = TRUE, ignore.stderr = TRUE))) == 0) {
        ## If sas is not available, drop ones with expected NA's
        expect_true(all(!is.na(res[!(res$outcome == "survival" & res$method %in% c("ePsMwB","ePsMwE","ePsSIptwB","ePsSIptwE") & res$data == "risksets"),
                                   c("outcome","method","data","coef","var")])))
    } else {
        ## If sas is available
        expect_true(all(!is.na(res[c("outcome","method","data","coef","var")])))
    }

    ## Site result columns included
    expect_true(any(grepl("coef_site", names(res))))
    expect_true(any(grepl("var_site", names(res))))
    ## All columns should not have NA's for meta-analysis (site coef/var included).
    expect_true(all(!is.na(subset(res, method == "meta"))))
    ## Match up
    expect_equal(res,
                 ans)

})
kaz-yos/distributed documentation built on May 27, 2019, 4:50 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
kaz-yos/distributed
Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

tests/testthat/test-04.AnalyzeData.R
In kaz-yos/distributed: Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

R Package Documentation

Browse R Packages

We want your feedback!

kaz-yos/distributed Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

tests/testthat/test-04.AnalyzeData.R In kaz-yos/distributed: Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

R Package Documentation

Browse R Packages

We want your feedback!

kaz-yos/distributed
Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network

tests/testthat/test-04.AnalyzeData.R
In kaz-yos/distributed: Examine Privacy Preserving Data Analysis Methods in Simulated Distributed Data Network