R/matchmahal.R

#-------------------------------------------------------------------------------------------------
#' @title Match with Mahalanobis
#'
#' @description This function finds matches for the events without a strata sing the Mahalanobis distrance and nearest neighbor. It is a wrapper for the matchit function from the MatchIt package.
#' @param date A vector of dates. 
#' @param casecontrol A vector where 1 indentifies events, 0 identifies potential controls, and NA represent non-event days that are not eligible to be matched (e.g. missing data, or excluded for some other reason).
#' @param matchvars A matrix where the columns represent variables that will be matched on using the Mahalanobis distrance and nearest neighbor.
#' @param by A vector of ids or a matrix with columns as the id variables. The events will be found separately within each unique combination of id variables. This is optional.
#' @param ratio The number of control units to be matched to each case. The default is 1.  See documentation for matchit for more details. 
#' @param datewindow A scalar that limits the potential control days to be within a a particular number of days of the range of events. For example, if the events occur on days 100, 101, 150, 151, 152 of the year and datewindow=7 then control days will have day of years in the range of 93 to 159. If missing then all days are eligible.
#' @return data A data.table of matched cases and controls.
#' @return nn A summary of the number if cases and controls that were matched. See documentation for matchit for more details.
#' @return sum.matched A summary of the quality of each match. See documentation for matchit for more details.
#' @author Ander Wilson
#' @seealso MatchIt
#' @import data.table MatchIt
#' @export
matchmahal <- function(date,casecontrol,matchvars,by,ratio=1,datewindow,...){
  
  if(missing(by)){
    by <- NA
  }else{
    by <- data.table(by)
    setkeyv(by, names(by))
    bydt <- unique(by)
    setkeyv(bydt, names(bydt))
    bydt[,byid:=1:nrow(bydt)]
    bydt <- bydt[by]
  }
  
  dat<- data.table(matchvars)
  dat[,casecontrol:=casecontrol]
  dat[,byid:=bydt[,(byid)]]
  dat[,date:=date]
  
  form <- formula(paste0("casecontrol~",paste(colnames(dat)[-which(colnames(dat)%in%c("casecontrol","byid","date"))],collapse="+")))
  
  
  matched.sample <- sum.matched <- nn <- data.table()
  for(i in unique(dat[,(byid)])){
    print(i)
    #make matching data for county
    dati <- dat[byid==i]
    dati <- dati[complete.cases(dati),]
    
    if(nrow(dati[casecontrol==1])>0){
      #limit to time window
      if(!missing(datewindow)){
        dati <- dati[doy>min(dati[casecontrol==1,(doy)])-datewindow & doy<max(dati[casecontrol==1,(doy)])+datewindow]
      }
      
      #matchit
      matchit.fit <- matchit(form, data=dati, distance="mahalanobis", method="nearest", ratio=ratio,...)
      
      #save match
      matched.sample <- rbind(matched.sample,data.table(match.data(matchit.fit)))
      
      #save statistics on number matched
      temp.nn <- data.table(matchit.fit$nn)
      temp.nn[,type:=row.names(matchit.fit$nn)]
      temp.nn[,byid:=i]
      nn <- rbind(nn,temp.nn)
      
      #add this
      temp.sum.matched <- data.table(summary(matchit.fit)$sum.matched)
      temp.sum.matched[,type:=row.names(summary(matchit.fit)$sum.matched)]
      temp.sum.matched[,byid:=i]
      sum.matched <- rbind(sum.matched,temp.sum.matched)
    }
  }
  
  
  
  
  
  setkeyv(bydt,"byid") 
  setkeyv(matched.sample,c("byid","date")) 
  setkeyv(nn,"byid") 
  setkeyv(bydt,"byid") 
  sum.matched <- unique(bydt)[sum.matched]
  nn <- unique(bydt)[nn]
  data <- unique(bydt)[matched.sample]
  sum.matched[,byid:=NULL]
  nn[,byid:=NULL]
  data[,byid:=NULL]
  
  
  return(list(data=data,nn=nn,sum.matched=sum.matched))
}
AnderWilson/smurf documentation built on May 5, 2019, 4:57 a.m.