towide | R Documentation |
Creates a wide data frame from a long data frame identifying an 'id' variable and using a time variable whose values provide suffixes for time-varying variable names in wide form.
towide(
data,
idvar = "id",
timevar = "time",
sep = "_",
add.invariants = TRUE,
...
)
data |
a data frame in 'long' form. |
idvar |
(default: 'id') the variable(s) identifying each group of rows that are transformed to a single row in the wide file. |
timevar |
(default: 'time') the variable containing the occasion names in the long file. |
sep |
(default: '') the character(s) that separate the name of a time-varying variable in the long form from the added suffix for the correponding names in wide form. Default: ''. |
add.invariants |
(default TRUE) additional variables that are invariant
within clusters are kept in output even if not included in 'idvar'
(default: names(data)[gicc(data, dataidvar)]).
For all variables, except |
... |
Other arguments are passed to |
In contrast with reshape in stats, this function identifies variables that are invariant with respect to 'idvar' and does not expand them to wide form.
Only 'time'-varying variables are expanded to wide form.
a data frame in wide form in which each variable that varies within levels of 'idvar' is turned into as many variables as there are distinct values of 'timevar' using the values of 'timevar' as suffixes to name the variables in wide form.
tolong
## Not run:
# Subjects A, B observed on varying occasions, measuring variables
# x and y in different locations
dd <- data.frame( subject = c('A','A','B'),
time = c(1,2,1),
y.left = 1:3, y.right = 1:3,
x.left= 1:3, x.right = 11:13, x.middle = 21:23
)
dd
tolong(dd, sep = '.') # uses 'time' as default name for occasions variable
# Specify new 'timevar' to avoid clobbering 'time':
dl <- tolong(dd, sep = '.', timevar = "location")
dl
#
# Back to wide format: Use 'idvar' to specify combination of
# of variable values that uniquely identifies rows in wide file:
#
towide(dl, idvar = c('subject','time'), timevar = 'location')
# Long file with additional constants
dl <- data.frame(name = rep(c('A','B','C'), c(3,3,2)),
site = c('head','neck','jaw','chest')[
c(1,2,3,1,2,3,1,4)],
sex = rep(c('male','female','male'), c(3,3,2)),
var1 = 1:8,
var2 = 11:18,
invar = rep(1:3, c(3,3,2)))
dl
towide(dl, c('name','sex'), 'site')
# Two indexing variable: e.g. hippocampal volume: 2 sides x 3 sites
dl <- data.frame(name = rep(LETTERS[1:3], each = 6),
side = rep(c('left','right'), 9),
site = rep(rep(c('head','body','tail'),each = 2),3),
volume = 1:18,
grade = LETTERS[1:18],
sex = rep(c('female','male','female'), each = 6),
age = rep(c(25, 43, 69), each = 6))
dl
(dl.site <- towide(dl, c('name','side'), 'site'))
(dl.site.side <- towide(dl.site, c('name'), 'side'))
dl.site.side[,sort(names(dl.site.side))]
#
# Switching long and wide variables
# Multiple variables in 'idvar'
#
dd <- read.table(header=T,text="
country variable 1990 1991 1992 1993
Canada population 20 21 24 26
Canada income 10 12 12 11
Mexico population 50 52 53 54
Mexico income 30 31 33 34
")
dd
names(dd) <- sub("^X","val__", names(dd)) # use '__' in case '_' is used elsewhere
dd
dl <- tolong(dd, sep = '__', timevar = 'year')
dl
dw <- towide(dl, idvar = c('country','year'),
timevar = 'variable')
dw
dw[grep('^id_',names(dw))] <- NULL
dw
names(dw) <- sub("^val_","", names(dw))
dw
#
# A function to flip years and variables
#
flip <- function(data, rowvar = 'country',
colfmt = '[0-9]{4}$',
varname = 'variable', sep = '__') {
names(data) <- sub(
paste0("^.*(",colfmt,')'),
paste0("value",sep,"\\1"),
names(data))
dl <- tolong(data, sep = "__", timevar = 'year', idvar = "XXXX")
dw <- towide(dl, timevar = varname, idvar = c(rowvar, 'year'), sep = '__')
dw <- dw[, - grep("^XXXX", names(dw))]
names(dw) <- sub(paste0('value',sep), '', names(dw))
dw
}
flip(dd)
#
# Mixture of time-varying and time-invariant variables
#
dl <- data.frame(subject = c('A','A','A','B','B','C','C'),
time = c(1,2,3,1,2,1,3),
sex = c('male','male','male','female','female','male','male'),
y = c(10,10,10,11,11,12,12), # accidentally time-invariant
x = c(20,21,22,25,26,18,19)) # time-varying
towide(dl, idvar = 'subject', timevar = 'time')
towide(dl, idvar = 'subject', timevar = 'time', add.invariants = FALSE)
# multiple time variables: e.g. month, day
dl <- data.frame(subject = c('A','A','A','B','B','C','C'),
month = c(1,1,3,2,2,1,3),
day = c(10,15, 2, 3, 9, 20, 2),
sex = c('male','male','male','female','female','male','male'),
y = c(10,10,10,11,11,12,12), # accidentally time-invariant
x = c(20,21,22,25,26,18,19)) # time-varying
# need single time variable
dl
dl$date <- with(dl, as.Date(paste0(month,'-',day),'%m-%d')) # uses the current year
dl
dw <- towide(dl, idvar = 'subject', timevar = c('date'))
dl2 <- tolong(dw, sep = '_')
sortdf(dl2, ~ subject/time)
#
# Variables in long form
#
# This illustrates what how towide works when some variables
# invariant wrt the key and others vary. Here the key is
# c('country','year'). The key-variant variables are value
# and rownum. The invariant variable is country.code
#
dd <- read.table(header=TRUE, text = "
country year variable value country.code rownum
Canada 2001 atemp 20 CAN 1
Canada 2002 atemp 23 CAN 2
US 2001 atemp 23 USA 3
US 2002 atemp 23 USA 4
Canada 2001 wind 120 CAN 5
Canada 2002 wind 123 CAN 6
US 2001 wind 123 USA 7
US 2002 wind 123 USA 8
Canada 2001 rain 220 CAN 9
Canada 2002 rain 223 CAN 10
US 2001 rain 223 USA 11
US 2002 rain 223 USA 12
")
(dw <- towide(dd, idvar = c('country','year'), timevar = 'variable'))
#
# to keep only the variable name as a name
#
names(dw) <- sub('^value_','', names(dw))
dw
#
# to get rid of other time varying variable
#
dw <- dw[, - grep('_', names(dw))]
dw
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.