vignettes/AIDSdata/actg_reprocess.R

#
# Originally downloaded from the Statistical Software Repository  at U. Mass. Amherst
# https://www.umass.edu/statdata/statdata/data/ (AIDS clinical trial)
#
# NAME:
#   AIDS Clinical Trials Group Study 320 Data (actg320.dat)
#
# SIZE:
#   1151 Observations, 16 Variables
#
# SOURCE:
#   AIDS Clinical Trials Group
#
# REFERENCE:
#   Hosmer, D.W. and Lemeshow, S. and May, S. (2008)
# Applied Survival Analysis: Regression Modeling of Time to Event Data:
#   Second Edition, John Wiley and Sons Inc., New York, NY
#
# DESCRITPTIVE ABSTRACT:
#   The data come from a double-blind, placebo-controlled trial that compared the
# three-drug regimen of indinavir (IDV), open label zidovudine (ZDV) or
# stavudine (d4T) and lamivudine (3TC) with the two-drug regimen of
# zidovudine or stavudine and lamivudine in HIV-infected patients (Hammer
#                                                                  et al., 1997).  Patients were eligible for the trial if they had no more
# than 200 CD4 cells per cubic millimeter and at least three months of
# prior zidovudine therapy.  Randomization was stratified by CD4 cell
# count at the time of screening.  The  primary  outcome  measure  was
# time  to  AIDS  defining event or death.  Because efficacy results met a
# pre-specified level of significance at an interim analysis, the trial
# was stopped early.
#
# DISCLAIMER:
#   This data is also available at the following Wiley's FTP site:
# ftp//ftp.wiley.com/public/sci_tech_med/survival
#
# LIST OF VARIABLES:
#
# Variable	Name		  Description				    Codes/Values
# ***************************************************************************************************************************
# 1		id		  Identification Code			    1-1156
# 2		time		  Time to AIDS diagnosis or death           Days
# 3		censor		  Event indicator for AIDS defining	    1 = AIDS defining diagnosis or death
# diagnosis or death			    0 = Otherwise
# 4		time_d		  Time to death				    Days
# 5		censor_d	  Event indicator for death (only)	    1 = Death
# 0 = Otherwise
# 6		tx		  Treatment indicator			    1 = Treatment includes IDV
# 0 = Control group (treatment regime without IDV)
# 7		txgrp		  Treatment group indicator		    1 = ZDV + 3TC
# 2 = ZDV + 3TC + IDV
# 3 = d4T + 3TC
# 4 = d4T + 3TC + IDV
# 8		strat2		  CD4 stratum at screening		    0 = CD4 <= 50
# 1 = CD4 > 50
# 9		sex		  Sex					    1 = Male
# 2 = Female
# 10		raceth		  Race/Ethnicity			    1 = White Non-Hispanic
# 2 = Black Non-Hispanic
# 3 = Hispanic (regardless of race)
# 4 = Asian, Pacific Islander
# 5 = American Indian, Alaskan Native
# 6 = Other/unknown
# 11		ivdrug		  IV drug use history			    1 = Never
# 2 = Currently
# 3 = Previously
# 12		hemophil	  Hemophiliac				    1 = Yes
# 0 = No
# 13		karnof		  Karnofsky Performance Scale		    100 = Normal;no complaint
# no evidence of disease
# 90 = Normal activity possible; minor
# signs/symptoms of disease
# 80 = Normal activity with effort;
# some signs/symptoms of disease
# 70 = Cares for self; normal activity/
# active work not possible
# 14		cd4		  Baseline CD4 count			          Cells/milliliter
# (derived from multiple measurements)
# 15		priorzdv	  Months of prior ZDV use		    Months
# 16		age		  Age at Enrollment			    Years

#
# Reprocess the data to turn coded factor variables into readable strings (rather than codes)
#

actg320 <- read.table("actg320.dat",
                      quote="\"",
                      comment.char="",
                      stringsAsFactors=FALSE)

# add column names
colnames = c("id",
             "time", # in days
             "AIDSorDeath", # 0/1 indicator
             "time_to_death",
             "death", # 0/1
             "treatment_idv", # 1=IDV, 0=noIDV
             "treatment_gp", # 1,2,3,4 (factor) - collinear with above
             "CD4_stratum", # 0: <=50, 1: >50
             "sex", # 1=male
             "race",
             "iv_use",
             "hemophiliac",
             "karnof_scale",
             "cd4", # cells/mm
             "priorzdv", # months
             "age") # age at enrollment

colnames(actg320) = colnames

# reprocess variables to be human readable

# convert 0/1 to F/T
actg320$treatment_idv = actg320$treatment_idv==1

gpmap = c("ZDV + 3TC", "ZDV + 3TC + IDV", "d4T + 3TC", "d4T + 3TC + IDV")
actg320$treatment_gp = gpmap[actg320$treatment_gp]

actg320$CD4_stratum = ifelse(actg320$CD4_stratum==0, "<=50", ">50")
actg320$sex = ifelse(actg320$sex==1, "Male", "Female")

racemap = c("White Non-Hispanic",
            "Black Non-Hispanic",
            "Hispanic",
            "Asian, Pacific Islander",
            "American Indian, Alaskan Native",
            "Other/unknown")
actg320$race = racemap[actg320$race]

ivmap = c("Never", "Currently", "Previously")
actg320$iv_use = ivmap[actg320$iv_use]

actg320$hemophiliac = actg320$hemophiliac==1

karnofmap = c("no evidence of disease",
              "minor signs/symptoms of disease",
              "some signs/symptoms of disease",
              "normal activity/active work not possible")
names(karnofmap) = as.character(c(100, 90, 80, 70))
actg320$karnof_scale = karnofmap[as.character(actg320$karnof_scale)]

saveRDS(actg320, file="AIDSdata.rds")
WinVector/QSurvival documentation built on May 9, 2019, 10:59 p.m.