R/purge_sibs.R

#' Purge siblings from large families
#'
#' This function will take a random sample of n size of individuals from each family. For Families smaller than n, the entire family is represented.
#'
#'
#'@param ids vector of individuals IDS
#'@param fams vector of family membership (family id) for each individual.
#'@param purge_to the maximum family size to retain. Will randomly sample individuals from large families to obtain this size. Families smaller than this size are retained.
#'@param returnDF logical if a sib-purged data frame should be returned or if a vector of individual ids should be returned.
#'@author Zak Robinson, Contact: zachary.robinson(at)umontana.com
#'@return sib-purged data frame or vector of sib-purged individual ID's
#'@export

purge_sibs<-function(ids,fams,purge_to,returnDF=F){

  if(length(ids)!=length(fams)){
    stop("fams and ids are different length vectors")
  }

  samp<-function(x,purge_to){if(length(x)>=purge_to){return(sample(x,size = purge_to,replace =  F))}else(return(x))}
  out<-tapply(as.character(ids), INDEX=fams, FUN = samp,purge_to=purge_to)
  out<-as.vector(unlist(out,use.names = F))

  if(! returnDF){
    return(out)
  }else{
    df_temp<-data.frame(ID=ids,FAMID=fams,stringsAsFactors = F)
    df<-df_temp[which(df_temp$ID %in% out),]
    return(df)
}

}
zakrobinson/RSibPurge documentation built on June 29, 2019, 3:19 a.m.