R/buildAFpredictors.R

Defines functions buildAFpredictors

Documented in buildAFpredictors

#' Build AF Predictors
#'
#' @description Builds a data frame suitable for doing prediction for annual fund.
#'
#' @param trainingyear  An integer value of the year in 4 digit format (eg. 2012) of the data that should be used to train the models.
#' @param yeartype  A character string describing the type of year. Choices should be "fiscal" or "calendar".
#' @param trainingsource  A hallp-style source for the training data. Valid choises include database tables or a csv location.
#'
#' @return A data frame of data ready with pidms, 3 outcome variables (prefixed with "outcome_") and 42 other variables. Note that variables DO include previous years' giving.
#'
#' @import dplyr
#' @export
#'
buildAFpredictors <- function( trainingyear, yeartype = 'fiscal', trainingsource = hallptbl, primaryonly = T )  {


# set start, end dates and campaign -------------------------------------------------

startdate  <- createstartdate(trainingyear, yeartype)
enddate    <- createenddate  (trainingyear, yeartype)
trainingcampaign  <- createAFcampaign( trainingyear, yeartype)

if( missing(trainingsource) ) {
   trainingsource <- tbl(src_mysql('commits', host = '10.40.9.145', user = 'adc', password = 'goBigBlue'), 'hallp')
}

if( !exists('giftstbl' )) {
   giftstbl  <- tbl(src_mysql('commits', host = '10.40.9.145', user = 'adc', password = 'goBigBlue'), 'gifts')
}


if(class(trainingsource) == 'character') {
   trainingsource  <- read.tidy(trainingsource)
}



# build outcomes ----------------------------------------------------------

outcomes  <- trainingsource %>%
   select(pidm)  %>%
   left_join(
      giftstbl  %>%
         filter(
              campaign %like% trainingcampaign
            , gift_date >= startdate
            , gift_date <= enddate
            )  %>%
         group_by(pidm)  %>%
         summarize( totalg = sum(gift_amt) )
      , by = 'pidm'
      , copy = T
      ) %>%
   collect  %>%
   mutate(

        outcome_totalg = replace(totalg, is.na(totalg), 0)
      , outcome_donorfactor = factor( ifelse(outcome_totalg > 1, 'donor','no gift'), levels = c('no gift', 'donor') )
      , outcome_logg = log(outcome_totalg+1)
      )  %>%
   select( -totalg)



# build training data -----------------------------------------------------

traininggiving  <- buildtraininggiving( trainingyear, yeartype, trainingcampaign)

trainingdata  <- readtrainingdata( trainingsource )

builddemographicdata(trainingdata, startdate = startdate, primaryonly = primaryonly)  %>%

   # get fy14 giving info
   left_join(traininggiving, by = 'pidm')  %>%

   # get outcomes
   left_join(outcomes,by = 'pidm')   %>%

   # fill in 0 for folks who aren't donors
   mutate(
        fy1giftsnum = replace(fy1giftsnum, is.na(fy1giftsnum), 0)
      , fy1totalg   = replace(fy1totalg,   is.na(fy1totalg),   0)
      , fy1numdesgs = replace(fy1numdesgs, is.na(fy1numdesgs), 0)
      , fy1afgifts  = replace(fy1afgifts,  is.na(fy1afgifts),  0)

      , outcome_totalg = replace(outcome_totalg, is.na(outcome_totalg), 0)
      , outcome_donorfactor = replace(outcome_donorfactor, is.na(outcome_donorfactor), 'no')
      , outcome_logg = replace(outcome_logg, is.na(outcome_logg), 0)
   )

}
crazybilly/mumodels documentation built on May 14, 2019, 11:31 a.m.