tests/LN-testing.R

require(glmMisrep)

data <- data.frame( Y = c(76.537552,  2528.253825,   742.661850,   275.771394,   422.115809,   994.273666,    27.363153,   107.370891,
                            1133.422491,    42.577534,    21.222886,    82.130986,   254.636406,   116.754997,    96.768592,   238.197024,
                             617.472558,     9.391864,   100.894256,   332.483705,   279.115849,  1101.826204,   599.726263,   218.041193,
                             519.177473,  3529.107060,  1879.559865,    47.560367,   422.300405,   140.919276,   398.857500,  1003.204619,
                            1518.945852,   638.622736,  1667.131817,   352.307789,    86.629404,  5524.732901,   372.118071,  3732.461629,
                               4.532981,   147.234528,    41.538661, 10929.665862,    85.689014,    33.029995,    20.335263,    67.657641,
                             388.829832,     8.608040),
                    X1 = c(1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
                    X2 = c(0.49507310, -0.08883055,  0.12926013,  0.14710474, -0.51448308, -0.13046076,  2.08608211, -0.57426685, -0.90878741,
                             1.23110221,  1.67853057,  1.55074616, -2.18659493,  1.41014900,  0.58587594,  0.04134293, -1.99370526,  0.09277679,
                            -0.76394655, -1.28839675,  0.41072563, -1.27177378,  0.08823336,  0.20571840, -0.82168230,  0.33441449,  0.28506237,
                             0.04591112, -0.16366695, -1.45927411, -0.10830177, -1.29651305,  0.66361581,  2.00878945, -0.32495902, -1.13028382,
                             0.74934344, -0.54082773, -0.22820110, -0.67195420,  0.57810617,  1.16271770, -0.97158214,  0.85486460,  0.93789922,
                            -0.80378906,  0.55046386, -0.03277486,  1.10168928, -1.47985902),
                    X3 = c(0.2357360, 0.9084012, 0.9510478, 0.9426464, 0.5130887, 0.9366523, 0.2776905, 0.8125779, 0.8742008, 0.7971722, 0.3337451,
                            0.3999461, 0.8996933, 0.4550159, 0.4777070, 0.5786287, 0.9578764, 0.6710153, 0.6726491, 0.7067857, 0.9986008, 0.9106748,
                            0.1215141, 0.9760379, 0.8532801, 0.5979620, 0.9679438, 0.8752609, 0.4202729, 0.6799168, 0.9755574, 0.9049099, 0.9187066,
                            0.8170819, 0.5400875, 0.8945737, 0.1026229, 0.9990335, 0.7742958, 0.8731023, 0.3292357, 0.7081538, 0.6131705, 0.8676843,
                            0.7707079, 0.9935447, 0.8271532, 0.7889369, 0.6950691, 0.5327706),
                    Sex = c("Female", "Male",   "Female", "Male",   "Male",   "Male",   "Female", "Male",   "Male",   "Female", "Male",   "Female",
                            "Female", "Male",   "Female", "Male",   "Female", "Female", "Female", "Male",   "Female", "Male",   "Male",   "Male",
                             "Male",   "Female", "Female", "Male",   "Female", "Male",   "Female", "Female", "Male",   "Female", "Male",   "Female",
                             "Male",   "Male",   "Female", "Female", "Female", "Male",   "Male",   "Male",   "Female", "Female", "Female", "Female",
                             "Male",   "Female"),
                    Race = c("White", "Black", "Other", "White", "White", "Other", "Other", "White", "White", "White", "White", "Black", "Other", "White",
                              "White", "Other", "White", "Other", "Other", "Black", "Other", "Black", "Other", "Black", "Other", "Black", "Other", "Other",
                              "White", "White", "Other", "White", "Black", "Black", "Other", "Other", "White", "Other", "White", "Black", "Other", "Black",
                              "Black", "White", "Other", "Other", "Other", "White", "White", "Other"),
                    V_star = c(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0))

data$Race <- as.factor(data$Race)
data$Sex <- as.factor(data$Sex)

t1 <- tryCatch(LnRegMisrepEM(formula = y ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6,0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# The response above is inappropriately specified
stopifnot(
  t1$message == "formula must be 'log(response) ~ terms'. See 'Details'"
)


t2 <- tryCatch(LnRegMisrepEM(formula = log(y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                             v_star = "V_star",
                             data = data,
                             lambda = c(0.6,0.4),
                             epsilon = 1e-08,
                             maxit = 10000,
                             maxrestarts = 20),
               error = function(x) x )

stopifnot(
  t2$message == "object 'y' not found"
)



t3 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_Star",
                                data = data,
                                lambda = c(0.6,0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# Argument to 'v_star' is misspelled
stopifnot(
  t3$message == "variable V_Star not present in dataframe"
)


data$V_star <- ifelse(data$V_star == 1, yes = "yes", no = "no")

t4 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6,0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )


# v* variable is type character (yes and no)
stopifnot(
  t4$message == "v_star variable must be of class 'factor' or 'numeric'"
)

data$V_star <- ifelse(data$V_star == "yes", yes = 1, no = 0)


data$V_star[10] <- -1


t5 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6,0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# v* variable must be binary
stopifnot(
  t5$message == "v_star variable must contain two unique values"
)

data$V_star[10] <- 0


data$V_star <- ifelse(data$V_star == 1, yes = 1, no = 2)

t6 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6,0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# v* must be binary, but more specifically 0/1;
stopifnot(
  t6$message == "v_star variable must be coded with ones and zeroes"
)



data$V_star <- ifelse(data$V_star == 1, yes = 1, no = 0)

t7 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.49, 0.52),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# Inappropriately specified lambda argument
stopifnot(
  t7$message == "Lambda vector must sum to one"
)


t8 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(1/3, 1/3, 1/3),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# Inappropriately specified lambda argument
stopifnot(
  t8$message == "Lambda vector must contain two elements"
)



data$X4 <- data$X2*0.3

t9 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + X4 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6, 0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
               error = function(x) x )

# Linearly dependent covariates/degenerate design matrix
stopifnot(
  t9$message == "Linear dependencies exist in the covariates"
)


t10 <- tryCatch(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race,
                             v_star = "V_star",
                             data = data,
                             lambda = c(0.6, 0.4),
                             epsilon = 1e-08,
                             maxit = 10000,
                             maxrestarts = 20),
               error = function(x) x )

# V_star variable is absent from formula argument
stopifnot(
  t10$message == "v_star variable must be specified in 'formula'"
)


# EM algorithm should fail to converge within the specified number of attempts
t11 <- tryCatch(
  capture.output(LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                               v_star = "V_star",
                               data = data,
                               lambda = c(0.6, 0.4),
                               epsilon = 1e-08,
                               maxit = 2,
                               maxrestarts = 1)),
  error = function(x) x
)

stopifnot(
  t11$message == "NOT CONVERGENT! Failed to converge after 1 attempts"
)



# On the first attempt, fails to converge, and restarts with new mixing props.
# Succeeds on the second attempt.
msg <- capture.output(
  t12 <-  LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                        v_star = "V_star",
                        data = data,
                        lambda = c(0.6, 0.4),
                        epsilon = 1e-08,
                        maxit = 15,
                        maxrestarts = 4, verb = TRUE),
  type = "message"
)

stopifnot(
  any(msg == "Warning: Failed to converge. Restarting with new mixing proportions")
)


# This should succeed;
msg <- capture.output(
 t13 <-  LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + Sex + Race + V_star,
                                v_star = "V_star",
                                data = data,
                                lambda = c(0.6, 0.4),
                                epsilon = 1e-08,
                                maxit = 10000,
                                maxrestarts = 20),
 type = "message"
  )

# Output validation;

# Output should be a list
stopifnot(
  is.list(t13)
)

# With 14 elements
stopifnot(
  length(t13) == 14
)

# Fisher information matrix should be symmetric
stopifnot(
  isSymmetric(t13$cov.estimates)
)


# The returned list should have elements with the following names
# and types
stopifnot(
  all.equal(lapply(t13, class),
            lapply(list(y = 0.1, lambda = 0.2, params = 0.3, loglik = 0.4,
                        posterior = 0.5, all.loglik = 0.6, cov.estimates = matrix(data = c(1,2,3,4), 2, 2),
                        std.error = 0.7, t.values = 0.8, p.values = 0.9, ICs = 1.0, ft = "*",
                        formula = y ~ x, v_star_name = "v*" ), class), tolerance = 1.5e-7 )
)


# Verifying the function can correctly calculate things;
stopifnot(
  all.equal(t13$lambda, 0.2752233, tolerance = 1.5e-7 )
)

stopifnot(
  all.equal(as.numeric(t13$params), c(0.38461784,  1.28998781,  2.23671321,  0.00136344,  3.43169080,  0.86482989, -0.87270674, -0.07989459,
                                      1.97303766), tolerance = 1.5e-7 )
)

stopifnot(
  all.equal( t13$loglik, -319.4672, tolerance = 1.5e-7 )
)

stopifnot(
  all.equal( t13$posterior, c(9.999942e-01, 9.998469e-01, 5.496038e-12, 9.998779e-01, 9.999986e-01, 5.904974e-09, 3.102086e-06,
                              9.999998e-01, 1.000000e+00, 9.999999e-01, 9.999998e-01, 4.037337e-05, 7.963463e-07, 1.392101e-01,
                              5.387680e-05, 9.999987e-01, 3.315679e-06, 1.000000e+00, 1.000000e+00, 3.769621e-02, 1.000000e+00,
                              1.000000e+00, 2.844998e-09, 9.999996e-01, 7.450721e-07, 5.085421e-10, 4.511226e-04, 1.000000e+00,
                              9.191679e-02, 9.972969e-01, 9.999983e-01, 9.997590e-01, 9.999999e-01, 1.050171e-08, 7.063420e-07,
                              9.999869e-01, 8.462061e-07, 1.071525e-04, 9.999998e-01, 6.965697e-05, 9.999988e-01, 9.995615e-01,
                              1.000000e+00, 1.176610e-06, 4.660242e-03, 1.000000e+00, 9.999999e-01, 9.998967e-01, 1.020901e-03,
                              9.999994e-01), tolerance = 1.5e-7 )
)


stopifnot(
  all.equal( t13$all.loglik, c(-340.5320, -325.7930, -321.3185, -320.3784, -320.0802, -319.8437, -319.7085, -319.6393, -319.5830, -319.5346,
                               -319.4999, -319.4805, -319.4718, -319.4687, -319.4677, -319.4674, -319.4673, -319.4673, -319.4672, -319.4672,
                               -319.4672, -319.4672, -319.4672, -319.4672), tolerance = 1.5e-7  )
)


stopifnot(
all.equal(t13$cov.estimates,
matrix(data = c(5.429249e-03, -2.887250e-04, -0.001407336,  8.894204e-06, -1.316386e-04,  0.001447803, -0.0001801113,  0.0001724132, -0.0007954676,  0.0001690426,
               -2.887250e-04,  1.768530e-03,  0.002022002, -1.978731e-06,  8.232676e-05, -0.001401457,  0.0001295077, -0.0002436579,  0.0006128995, -0.0001726792,
               -1.407336e-03,  2.022002e-03,  0.077937106, -5.957053e-03, -6.482111e-03, -0.060002084, -0.0106227091, -0.0225706061, -0.0218159265, -0.0098135914,
                8.894204e-06, -1.978731e-06, -0.005957053,  1.468143e-02,  9.544467e-04, -0.001405178, -0.0017195335,  0.0009178124,  0.0021212952,  0.0010653068,
               -1.316386e-04,  8.232676e-05, -0.006482111,  9.544467e-04,  4.629199e-03,  0.005663381,  0.0012697822,  0.0025368665,  0.0027705039, -0.0008591309,
                1.447803e-03, -1.401457e-03, -0.060002084, -1.405178e-03,  5.663381e-03,  0.070883845,  0.0025822106,  0.0050719148,  0.0054058102,  0.0043546425,
               -1.801113e-04,  1.295077e-04, -0.010622709, -1.719534e-03,  1.269782e-03,  0.002582211,  0.0151821526,  0.0045337746,  0.0015999026,  0.0001551407,
                1.724132e-04, -2.436579e-04, -0.022570606,  9.178124e-04,  2.536866e-03,  0.005071915,  0.0045337746,  0.0243451893,  0.0165216203, -0.0003004559,
               -7.954676e-04,  6.128995e-04, -0.021815927,  2.121295e-03,  2.770504e-03,  0.005405810,  0.0015999026,  0.0165216203,  0.0289331944,  0.0009597426,
                1.690426e-04, -1.726792e-04, -0.009813591,  1.065307e-03, -8.591309e-04,  0.004354642,  0.0001551407, -0.0003004559,  0.0009597426,  0.0129483355),
        ncol = 10, nrow = 10, byrow = TRUE, dimnames = list( c("lambda", names(t13$params)), c("lambda", names(t13$params)) ) ), tolerance = 1.5e-7
)
)

stopifnot(
  all.equal(as.numeric(t13$std.error), c(0.07368344, 0.04205390, 0.27917218, 0.12116695, 0.06803822, 0.26624020, 0.12321588, 0.15602945, 0.17009760, 0.11379075), tolerance = 1.5e-7)
)

stopifnot(
  all.equal(as.numeric(t13$t.values), c(4.62076060, 18.45976267,  0.02003933, 12.88945387,  7.01881857, -5.59321807, -0.46969848, 17.33917394), tolerance = 1.5e-7)
)


stopifnot(
  all.equal(as.numeric(t13$p.values), c(3.930521e-05, 3.541604e-21, 9.841116e-01, 7.954660e-16, 1.765708e-08, 1.761973e-06, 6.411210e-01, 3.304182e-20), tolerance = 1.5e-7)
)

stopifnot(
  all.equal(as.numeric(t13$ICs), c(658.9345, 664.5755, 678.0547), tolerance = 1.5e-7)
)


stopifnot(
  t13$ft == "LnRegMisrepEM"
)

stopifnot(
  class(t13$formula) == "formula"
)

stopifnot(
  t13$v_star_name == "V_star"
)



# Test S3 method for summarizing misrepEM objects;
stopifnot(
  class(summary(t12)) == "summary.misrepEM"
)

# Output needs to be a list
stopifnot(
  is.list(summary(t12))
)

# of length 5
stopifnot(
  length(summary(t12)) == 5
)

# Whose elements are: (1) dataframe, and (2-5) 4 numeric vectors, which have the following names:
stopifnot(
  all.equal(lapply(summary(t12), FUN = class), list(coefficients = "data.frame",
                                                    ICs = "numeric",
                                                    loglik = "numeric",
                                                    lambda = "numeric",
                                                    lambda_stderror = "numeric") )
)


# Test S3 predict method

test_data <- data.frame(Y = c(98.427279,    32.777322,  5486.156939,  1339.276500,   312.445342,  1718.322835,   185.328612,  1734.535647,
                                3971.255712,  3834.522975,    88.011945,   418.930890,  1108.338108,   188.514665,    30.939923,   226.444696,
                                  10.108475,    59.441742,   120.989164,  1554.964939, 27958.871182, 29334.748743,   792.590049,    33.450942,
                                   7.665141,   175.831588,   280.975447,    33.607044,    23.775642,   218.677457,     6.241764,   158.587975,
                                  13.852720,  2025.885032,  3070.340378,   250.562014,     3.167028,   412.431731,   253.282068,    30.773635,
                                  84.806336,    16.176141,  2544.437710,   326.410467,  1331.172612,     8.942180, 12994.689597,    82.674185,
                                  64.729101,    14.448531),
                        X1 = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0),
                        X2 = c(-1.47112660,  0.55436293, -0.66931369,  0.41044485,  1.09693026, -0.09962265, -1.20740417, -0.58021023, -0.01312526,
                                -1.02177231,  0.21622835, -0.43729831,  0.61557692, -2.40180053,  1.16981893,  0.55783770,  0.03185095,  1.86912879,
                                -0.50083512, -0.60316555, -0.05168889, -0.88313525, -0.69394152, -0.53436444,  0.67535031, -0.78427798, -0.86134004,
                                -0.60073262, -1.14478702, -1.77009228, -0.18038588, -0.31001308, -2.82399557,  0.61107096, -1.36033114,  0.37042505,
                                -0.93805180,  0.48696814,  0.58320970, -0.09727662,  1.91523731, -1.94005641,  1.01199900, -0.32069647,  0.02523619,
                                 0.36135866,  0.16991146,  0.93392489,  0.34386311, -0.69329227),
                        X3 = c(0.8830248, 0.9319574, 0.9654841, 0.9664790, 0.5306699, 0.9716790, 0.7541325, 0.9479806, 0.8758044, 0.4956611, 0.1328917,
                                0.6745790, 0.8658182, 0.9922382, 0.4903624, 0.6850327, 0.3320028, 0.5798331, 0.7594182, 0.8087243, 0.9985892, 0.9583971,
                                0.5088030, 0.7562532, 0.6356310, 0.8385115, 0.7309135, 0.3821138, 0.5828014, 0.6764654, 0.4884371, 0.7026958, 0.4908599,
                                0.3674397, 0.9896373, 0.7971634, 0.1971907, 0.8849176, 0.7674014, 0.7965604, 0.4593090, 0.5414791, 0.6594986, 0.8939997,
                                0.8013125, 0.7305848, 0.8602200, 0.5298325, 0.3602862, 0.7062794),
                        Sex = c("Female", "Female", "Male",   "Female", "Male",   "Female", "Male",   "Female", "Male",   "Male",   "Male",   "Female",
                                 "Male",   "Male",   "Male",   "Male",   "Female", "Female", "Male",   "Female", "Male",   "Male",   "Female", "Male",
                                 "Female", "Male",   "Male",   "Female", "Male",   "Male",   "Female", "Female", "Female", "Male",   "Male",   "Male",
                                 "Male",   "Female", "Male",   "Male",   "Female", "Male",   "Female", "Male",   "Male",   "Female", "Male",   "Female",
                                 "Female", "Female"),
                        Race = c("Other", "Other", "Other", "Other", "Other", "Black", "Black", "Black", "Other", "White", "Black", "White", "Black", "Other",
                                  "White", "Other", "Other", "Other", "White", "Other", "White", "White", "Black", "Other", "Other", "White", "Other", "Other",
                                  "Other", "Other", "Other", "White", "Black", "White", "Black", "Black", "Other", "White", "Other", "Other", "Other", "Other",
                                  "White", "Other", "Black", "Other", "White", "Other", "Black", "Other"),
                        V_star = c(1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0))

# Output needs to be a numeric vector
stopifnot(
  is.vector(predict(t13, test_data)) && is.numeric(predict(t13, test_data))
)


stopifnot(
  all.equal(predict(t13, test_data), c(5.344616, 4.085287, 8.730227, 6.440271, 5.003774, 7.330128, 5.210181, 7.248147, 6.993357, 7.910271, 5.316929, 6.230217, 7.832651, 5.152951, 4.228351, 6.339466, 2.025716,
                                       5.115411, 5.149389, 7.327536, 8.207476, 9.498427, 7.170882, 4.345670, 3.068551, 5.420426, 6.494979, 2.196819, 3.749605, 6.306891, 2.562261, 6.326879, 3.439677, 7.472481,
                                       8.254866, 5.360002, 2.426590, 6.953294, 6.622164, 4.484588, 4.701873, 3.606715, 7.610454, 7.055378, 5.373769, 3.393975, 9.162949, 4.942549, 4.425920, 3.309129), tolerance = 1.5e-7)
)

Try the glmMisrep package in your browser

Any scripts or data that you put into this service are public.

glmMisrep documentation built on May 29, 2024, 1:46 a.m.