rollup: Aggregate multiple columns of values by group

View source: R/rollup.R

rollupR Documentation

Aggregate multiple columns of values by group

Description

aggregate over zones - !!! work in progress – NOT DONE YET !!! see source code for notes on data.table and speed

Usage

rollup(x, by = NULL, wts = NULL, FUN, prefix = "wtd.mean.", na.rm = TRUE)

Arguments

x

Dataset

by

No default. Vector that defines groups, for aggregating by group.

wts

Weights, default is unweighted

FUN

Default is weighted mean

prefix

Default is 'wtd.mean.'

na.rm

Default is TRUE, NOT USED IF FUN IS DEFINED BY CALL TO THIS FUNCTION. passed to Hmisc::wtd.mean()

Value

by Vector defining groups

See Also

wtd.colMeans() ejscreen::ejscreen.rollup()

Examples

  # See ejscreen package function called ejscreen.rollup()
  ## Not run: 
  # draft of COMPLETE EXAMPLE - NOT TESTED:
 # SPECIFY FIELDS TO ROLLUP VIA WTD AVG AND
 # WHICH TO DO VIA SUM OVER US/REGION/COUNTY/STATE/TRACT

 # load('bg ... plus race eth subgrps ACS0812.RData') # if not already working with it

require(analyze.stuff)
require(ejanalysis)
require(ejscreen)

data(names.e); data(names.ej); data(names.d)
# Available for rolling up by: 'FIPS', "FIPS.TRACT", "FIPS.COUNTY", "FIPS.ST", 'REGION'

 # Get the sum for all the raw counts, and area
sumnames <- c('area', 'pop', 'povknownratio', 'age25up', 'hhlds', 'builtunits',
              'mins', 'lowinc', 'lths', 'lingiso', 'under5', 'over64', 'pre1960',
              'VNI.eo', 'VNI.svi6',
              'VDI.eo', 'VDI.svi6',
              names.d.subgroups.count, 'nonmins')
# Get the rollups of summed cols
us       <- rollup( bg[ , sumnames], FUN=function(z) sum(z, na.rm = TRUE), prefix = '')
regions  <- rollup( bg[ , sumnames], FUN=function(z) sum(z, na.rm = TRUE), prefix = '',
  by=bg$REGION)
names(regions)[1] <- 'REGION'
states   <- rollup( bg[ , sumnames], FUN=function(z) sum(z, na.rm = TRUE), prefix = '',
  by=bg$FIPS.ST)
names(states)[1] <- 'FIPS.ST'
counties <- rollup( bg[ , sumnames], FUN=function(z) sum(z, na.rm = TRUE), prefix = '',
  by=bg$FIPS.COUNTY)
names(counties)[1] <- 'FIPS.COUNTY'
tracts   <- rollup( bg[ , sumnames], FUN=function(z) sum(z, na.rm = TRUE), prefix = '',
  by=bg$FIPS.TRACT)
names(tracts)[1] <- 'FIPS.TRACT'

 # Get the rollups of wtd.mean cols (at least E cols)
avgnames <- names.e
us.avg       <- rollup( bg[ , avgnames], prefix = '', wts=bg$pop)
regions.avg  <- rollup( bg[ , avgnames], prefix = '', wts=bg$pop, by=bg$REGION)
names(regions.avg)  <- gsub('by', 'REGION',  names(regions.avg))
states.avg   <- rollup( bg[ , avgnames], prefix = '', wts=bg$pop, by=bg$FIPS.ST)
names(states.avg)   <- gsub('by', 'FIPS.ST', names(states.avg))
counties.avg <- rollup( bg[ , avgnames], prefix = '', wts=bg$pop, by=bg$FIPS.COUNTY)
names(counties.avg) <- gsub('by', 'FIPS.COUNTY', names(counties.avg))
tracts.avg   <- rollup( bg[ , avgnames], prefix = '', wts=bg$pop, by=bg$FIPS.TRACT)
names(tracts.avg)   <- gsub('by', 'FIPS.TRACT',  names(tracts.avg))

 # Merge sum and mean types of cols
us <- cbind(us, us.avg, stringsAsFactors=FALSE)
regions  <- merge(regions, regions.avg, by='REGION')
states   <- merge(states,   states.avg, by='FIPS.ST')
counties <- merge(counties, counties.avg, by='FIPS.COUNTY')
tracts   <- merge(tracts,   tracts.avg, by='FIPS.TRACT')

 # Now calculate the derived fields like pct demog fields, EJ indexes, pctiles, bins, etc.
See ejscreen::ejscreen.acs.calc()


## End(Not run)

## Not run: 
# OLDER, SLOW BUT SEEMS TO WORK SOMEWHAT
# 1.Do rollup of most fields as wtd mean
   t2 <- rollup(bg[ , names.e], by=bg$FIPS.TRACT, wts=bg$pop)
   names(t2) <- gsub('by', 'FIPS.TRACT', names(t2))
# 2.Do rollup of pop and areas as sum not wtd.mean:
 # not sure aggregate preserves sort order that rollup created,
 # so use merge to be sure they match up on fips:
   tractpop <- aggregate(bg[ , c('pop', 'area', 'sqmi', 'sqkm')], by=list(bg$FIPS.TRACT), sum)
   names(tractpop) <- c('FIPS.TRACT', c('pop', 'sqmi', 'sqkm'))
# 3.Merge the wtd.mean fields and sum fields, sort results.
   t2 <- merge(t2, tractpop, by='FIPS.TRACT')
   rm(tractpop)
   t2 <- t2[ order(t2$FIPS.TRACT), ]

## End(Not run)


ejanalysis/ejanalysis documentation built on April 2, 2024, 10:12 a.m.