R/smaller-functions.R

Defines functions testChecker generalInfo getAnimalInfo tx_subFun get_taxa basic_housekeeping

# Basic housekeeping ------------------------------------------------------

basic_housekeeping <- function(df) {
  # Basic housekeeping:
  # ==================
  # Checking and correcting col names with .. to _
  cNames = names(df)
  toChange = grepl("\\.|\\.\\.", cNames)
  cNames[toChange]
  gsub("\\.|\\.\\.", "_", cNames)[toChange]
  cNames[toChange] = gsub("\\.|\\.\\.", "_", cNames)[toChange]

  # changing column name "AnimalID_GAINS_" to "animalID" - for simplicity
  cNames[ cNames == "AnimalID_GAINS_" ] = "animalID"

  names(df) = cNames

  # Keeping only those with "polymerase chain reaction"
  df = subset(df, df$TestTypeBroad == "Polymerase_Chain_Reaction")
  df
}



# Getting taxagroups in data ----------------------------------------------

get_taxa  <- function(df) {
  taxagps = data.frame(taxa = unique(df$Taxagroup))

  # IMP: Check for taxa not listed below
  taxas = data.frame(taxa = c("Bats", "Birds", "Carnivores", "Non-human Primates",
                              "Other Mammals", "Reptiles", "Rodents & Shrews",
                              "Ungulates", "Unknown"),
                     abbrev = c("Bat", "Bird", "Carn", "NHP", "Other", "Rept",
                                "RnS", "Ung", "Unkn"),
                     stringsAsFactors = FALSE)

  taxagps = merge(taxagps, taxas, by = "taxa", all.x = TRUE)
  taxagps
}



# Subset and save by taxa (raw data) --------------------------------------

# Here, countryabbr is how I want country to be used in file name, e.g. "ug" for "Uganda"
tx_subFun <- function(n, taxa, abbr, data, countryabbr) {
    tx = taxa[n]
    subset = data[data$Taxagroup == tx, ]
    saveRDS(subset, sprintf("Data/%s_%s-raw-%s.RDS", Sys.Date(), countryabbr, abbr[n]))
    return("done!")
  }



# Get animal info ---------------------------------------------------------

getAnimalInfo =
  function(animal, dataset, viruses)
  {
    df = dataset[dataset$animalID == animal, ]
    general = generalInfo(df, animal)
    specific = lapply(viruses, testChecker, df)
    specific = do.call(cbind, specific)
    animalInfo = cbind(general, specific)
    animalInfo
  }



# General Info function ------------------------------------------------------------

generalInfo =
  function(df, animal)
  {

    # Specimens collected:
    specimens = paste0(unique(df$SpecimenType), collapse = "; ") #***

    # Number of unique viruses/viral families tested:
    Num_testTypes = length(unique(df$TestRequested))

    # Names of unique viruses/viral families tested:
    testTypes = paste0(sort(unique(df$TestRequested)), collapse = "; ")

    # Did animal test positive for anything?
    if(sum(df$ConfirmationResult %in% "Positive") > 0) Positive = "Yes" else Positive = "No"

    # Which viruses (and how many) did we detect?
    if(Positive == "Yes") {
      Pos_tests = paste0(sort(unique(df$TestRequested [df$VirusName != "NULL"] )), collapse = "; ")
      Viruses = paste0(sort(unique(df$VirusName [df$VirusName != "NULL"] )), collapse = "; ")
      Num_viruses = length(unique(df$VirusName [df$VirusName != "NULL"] ))
      # Interpretation = paste0(sort(unique(df$Interpretation [df$VirusName != "NULL"] )), collapse = "; \n")
    } else {
      Pos_tests = NA_character_
      Viruses = NA_character_
      Num_viruses = 0
      Interpretation = NA_character_
    }
    general = data.frame(animalID = animal, specimens, Num_testTypes, testTypes, Positive, Pos_tests, Viruses, Num_viruses, stringsAsFactors = FALSE)
  }




# Test specific info function ------------------------------------------------------

testChecker <- function(virus, df) {
    # Was animal tested for specific virus/viral family?
    vdf = df[df$TestRequested %in% virus, ]
    tested = nrow(vdf)
    if(tested > 0) {
      Tested = "Yes"
      # Was animal positive for specific test?
      # Which viruses (within specific test) and how many did we detect?
      if(sum(vdf$ConfirmationResult %in% "Positive") > 0) {
        Positive = "Yes"
        Viruses = paste0(unique(vdf$VirusName [vdf$VirusName != "NULL" & vdf$TestRequested == virus] ), collapse = "; ")
        Viruses_Num = length(Viruses)
      } else {
        Positive = "No"
        Viruses = NA_character_
        Viruses_Num = 0
      }
    } else { # If animal was not tested for specific virus/viral family:
      Tested = "No"
      Positive = NA_character_
      Viruses = NA_character_
      Viruses_Num = NA_integer_
    }
    specific = data.frame(Tested, Positive, Viruses, Viruses_Num, stringsAsFactors = FALSE)
    virus = gsub(" ", "", virus)
    names(specific) = paste0(virus, "_", names(specific))
    return(specific)
  }
nistara/eidithR documentation built on May 23, 2017, 2:54 p.m.