hammeRs:

#' Make a mob_in object from long data.frame
#'
#' This is a wrapper for make_mob_in() that accepts a long format data frame as input and converts it to a community matrix \code{comm}
#' and a plot attribute table \code{plot_attr}. Both are then passed on to make_mob_in(). Furthermore, it has an option to exclude samples with low numbers of individuals and their respective gammas.
#'
#' @param dat A single long format data.frame with species observations as rows. Species name (i.e \code{taxon}), individual counts (i.e \code{count}),
#' and corresponding sample level information (i.e \code{alpha_id},\code{gamma_id},\code{coord_names}) must be in columns. Additional colmns may be present, but must be consitent within samples (i.e. alpha_id's).
#'
#' @param taxon name of taxon column
#' @param alpha_id name of alpha_id column. This assigns the observation to a sample at the alpha scale.
#' @param gamma_id name of gamma_id column. This assigns the observation to a group at the gamma scale
#' @param count name of the column containing the individual counts (i.e. abundances) of the observation.
#' @param coord_names character vector with the names of the x and y coordinates.
#' @param min_n numeric. minimal number of indiviual required for a sample to be considered.
#' @param exclude_gamma boolean. If samples are excluded due to low sample size (see min_n), shall the entire group (i.e. gamma_id) be excluded?
#' @param ... arguments passed on to make_mob_in.
#'
#' @return a mob_in object.
#' @export
#'
mobify <- function(dat,
                   taxon = "verbatim_scientific_name",
                   alpha_id = "alpha_id",
                   gamma_id = NULL,
                   count= "individual_count",
                   coord_names = c("Longitude", "Latitude"),
                   min_n=5,
                   exclude_gamma=T,
                   ...) {
  require(tidyverse)
  require(mobr)
  dat<- rename(dat, taxon = taxon, alpha_id = alpha_id,count = count)
  if(!is.null(gamma_id)) dat<- rename(dat, gamma_id = gamma_id)
  comm<- dat %>%
    group_by(alpha_id, taxon) %>%
    summarise(total=sum(count)) %>%
    spread("taxon", "total",fill= 0) %>%
    arrange(alpha_id) %>%
    ungroup()
  rnames<-comm$alpha_id
  comm<-comm %>% select(-alpha_id) %>% as.matrix()
  row.names(comm)<-rnames

  plot_attr<- dat %>% select(-taxon, -count)#
  #determine variables to keep in mob_in
  vars<- plot_attr %>% colnames()
  index<-logical()
  for( i in vars){
    index[i]=!any(rowSums(table(plot_attr$alpha_id, plot_attr[, i])>0)>1)
  }
  if(any(index[coord_names])==F) stop("Alpha_id's have ambiguous coordinates")
  if(is.null(gamma_id)==F && index["gamma_id"]==F) stop("Alpha_id's have ambiguous gamma_id's. Make sure that all observations from a single alpha have consistent gamma_id's or set gamma_id's == NULL if you dont want to group samples at a loarger scale.")
  warning("The following colums will be dropped from the plott attributes because they are inconsistant within samples:",
          paste("\n \t",names(index)[index==F]),
          "\n This is no reason to worry if they contain species level information that cannot be stored in mob_in objects (e.g. taxonomy).")
  plot_attr<-plot_attr %>% select(names(index)[index==T]) %>% distinct() %>%  arrange(alpha_id)
  rownames(plot_attr)<-plot_attr$alpha_id

  # exclude undersampled sites
  N=rowSums(comm)
  small_alpha<-rownames(comm[N<min_n,])
  if(!is.null(gamma_id)) small_gamma<- filter(plot_attr, alpha_id %in% small_alpha) %>% select(gamma_id) %>% distinct() %>% pull()
  if(exclude_gamma==T & !is.null(gamma_id)){
    comm=  comm[!plot_attr$gamma_id %in% small_gamma,]
    plot_attr<-plot_attr[!plot_attr$gamma_id %in% small_gamma,]
    warning(paste0("Based on your min_n of ", min_n," all samples from the following gammas were excluded:" ),
            paste("\n \t", small_gamma))
  }else{
    comm=  comm[!plot_attr$alpha_id %in% small_alpha,]
    plot_attr<-plot_attr[!plot_attr$alpha_id %in% small_alpha,]
    warning(paste0("Based on your min_n of ", min_n," the following samples were excluded:" ),
            paste("\n \t", small_alpha))
  }

  mob_in<-make_mob_in(comm = comm,plot_attr = plot_attr,coord_names = coord_names,...)

  return(mob_in)
}