R/extractRules.R

Defines functions extractRules

Documented in extractRules

#' Pull rulesets created by GARP
#'
#' \code{extractRules} Extract the dominant range rules and logit rules for the best subset models
#'
#' @param file file name or file path of the ".txt" file of rulesets generated by GARP
#' @param table a data.frame object produced by \code{\link{findModRules}} that contains model numbers (ruleset number) and dominant rule number
#' @param pathA file path to the directory containing environmental grids (must be in ESRI ASCII raster format)
#' @param pathB file path to the directory for ruleset grids produced by GARP
#' @param project logical; indicates if the GARP results are projected to the landscape
#'
#' @return Returns a data.frame containing the minimum and maximum values for the environmental variables in each rule, the type of rules, the model numbers (ruleset number) and rule numbers.
#'
#' @details Include both the path and the full file name in \code{file}, otherwise, set the directory to the rulesets file before running the function.
#'
#' @seealso \code{\link{findModRules}}
#'
#' @examples
#' \dontrun{
#'   rangelogit <- extractRules("C:/GARP/RuleSets.txt", table = modelRule,
#'   pathA = "C:/GARP/resampled/", pathB = "C:/GARP/runs/", project = FALSE)
#' }
#'
#' @import sp
#' @import raster
#'
#' @export

extractRules <- function(file, table, pathA, pathB, project){
  RuleSet <- file
  sampledir <- pathA
  rsetPridir <- pathB
  modelRule <- table
  if (project == TRUE) {
    number = 2
  } else if (project == FALSE) {
    number = 1
  }
  ruleset <- scan(RuleSet, what = '')
  #cat(ruleset) #show ruleset
  list <- grep('\\=\\=\\d+\\=+', ruleset)
  modelNumber = c()
  eachrule = c()
  RuleName = c()
  Minimum = c()
  Maximum = c()
  RuleType = c()
  RuleNumber = c()
  for (k in seq(1, length(modelRule$bestmodel))) {
    for (i in seq(1, length(list), by = number)) {
      # loop for model
      model <- ruleset[list[i]]
      modelNum <- regmatches(model, regexpr("\\d+", model))
      if (modelRule$bestmodel[k] == modelNum) {
        stingend = list[i + 1] - 1
        Rules = c()
        for (j in seq(list[i], stingend)) {
          # loop for rules
          if (ruleset[j] == "d" |
              ruleset[j] == "a" | ruleset[j] == "!" | ruleset[j] == "r") {
            Rules <- append(Rules, ruleset[j - 1])
            Rules <- append(Rules, j - 1)
            Rules <- append(Rules, ruleset[j])
            Rules <- append(Rules, j)
            #eachrule <- append(eachrule,ruleset[j])
          }
        }

        for (m in seq(2, length(Rules), 4)) {
          if (m != length(Rules) - 2) {
            a = as.numeric(as.character(Rules[m]))
            b = as.numeric(as.character(Rules[m + 4])) - 1
            if (ruleset[b] == "PRESENCE") {
              for (n in a:b) {
                if (ruleset[a] == modelRule$BestRules[k]) {
                  if (ruleset[a + 1] == "d" | ruleset[a + 1] == "!") {
                    if (grepl("\\[|\\]|,", ruleset[n]) == TRUE) {
                      ruleset[n] <- gsub('\\[', '', ruleset[n])
                      ruleset[n] <- gsub('\\]', '', ruleset[n])
                      #print (ruleset[n])
                      if (grepl("\\=", ruleset[n]) == TRUE) {
                        temp1 <- strsplit(ruleset[n], "\\=")[[1]]

                        #print(modelNum); print(temp1[1])
                        if (grepl(',', temp1[2]) == TRUE) {
                          temp2 <- strsplit(temp1[2], ",")[[1]]
                          Minimum <- append(Minimum, temp2[1])
                          RuleName <- append(RuleName, temp1[1])
                          RuleType <- append(RuleType, "range")
                          modelNumber <-
                            append(modelNumber, modelNum)
                          RuleNumber <-
                            append(RuleNumber, ruleset[a])
                          #print(temp2[1])
                          if (temp2[2] != '') {
                            Maximum <- append(Maximum, temp2[2])
                            #print(temp2[2])
                          } else if (grepl(',', ruleset[n + 1]) == TRUE) {
                            temp3 <- strsplit(ruleset[n + 1], ',')
                            Maximum <- append(Maximum, temp3[2])
                            #print(temp3[2])
                          } else if (grepl(',', ruleset[n + 1]) == FALSE) {
                            Maximum <- append(Maximum, ruleset[n + 1])

                          }
                        } else {
                          Minimum <- append(Minimum, temp1[2])
                          RuleName <- append(RuleName, temp1[1])
                          RuleType <- append(RuleType, "range")
                          modelNumber <-
                            append(modelNumber, modelNum)
                          RuleNumber <-
                            append(RuleNumber, ruleset[a])
                          if (grepl(',', ruleset[n + 1]) == TRUE) {
                            temp3 <- strsplit(ruleset[n + 1], ',')[[1]]
                            Maximum <- append(Maximum, temp3[2])

                          }
                        }
                      }
                    }
                  } else if (ruleset[a + 1] == "r") {
                    if (grepl("\\*", ruleset[n]) == TRUE) {
                      temp1 <- strsplit(ruleset[n], "\\*")[[1]]
                      if (as.numeric(temp1[2]) != 0) {
                        #rdir1 <- paste(sampledir, '/',sep = '')
                        rdir1 <- sampledir
                        rdir1 <- paste(rdir1, "/", temp1[1], sep = '')
                        rdir1 <- paste(rdir1, '.asc', sep = '')
                        PhyPa <- raster(rdir1)
                        #rdir2 <- paste(rsetPridir, '/', sep='')
                        rdir2 <- rsetPridir
                        files <- list.files(rdir2, pattern = 'grid')
                        for (y in seq(1:length(files))) {
                          rdir2_1 <- paste(rdir2,"/", files[y],"/",files[y], sep = '')
                          rdir2_1 <- paste(rdir2_1, '/', sep = '')
                          rfiles <-
                            list.files(rdir2_1, pattern = 'rset_\\d+_0')
                          for (w in seq(1:length(rfiles))) {
                            if (grepl(".aux", rfiles[w]) == FALSE) {
                              rdir2ID <- regmatches(rfiles[w],
                                                    regexpr('\\d+_', rfiles[w]))
                              rdir2ID <-
                                regmatches(rdir2ID,
                                           regexpr('\\d+', rdir2ID))
                              if (as.numeric(rdir2ID) == as.numeric(modelNum)) {
                                rsetRa <- paste(rdir2_1, rfiles[w], sep = '')
                                rsetRa <- raster(rsetRa)
                                ZonalMin <-
                                  zonal(PhyPa, rsetRa, 'min')
                                ZonalMax <-
                                  zonal(PhyPa, rsetRa, 'max')
                                for (z in seq(1:length(ZonalMin[,1]))) {
                                  if (ZonalMin[z,1] == ruleset[a]) {
                                    RuleName <- append(RuleName, temp1[1])
                                    RuleType <-
                                      append(RuleType, "logit")
                                    modelNumber <-
                                      append(modelNumber, modelNum)
                                    RuleNumber <-
                                      append(RuleNumber, ruleset[a])
                                    Minimum <-
                                      append(Minimum, ZonalMin[z,2])
                                    Maximum <-
                                      append(Maximum, ZonalMax[z,2])
                                  }
                                }

                              }
                            }
                          }
                        }
                      }

                    }
                  }

                  #}
                }
              }
            }
          } else {
            a = as.numeric(as.character(Rules[m]))
            b = as.numeric(as.character(stingend))
            if (ruleset[b - 1] == "PRESENCE") {
              for (n in a:b) {
                if (ruleset[a] == modelRule$BestRules[k]) {
                  if (ruleset[a + 1] == "d" | ruleset[a + 1] == "!") {
                    if (grepl("\\[|\\]|,", ruleset[n]) == TRUE) {
                      ruleset[n] <- gsub('\\[', '', ruleset[n])
                      ruleset[n] <- gsub('\\]', '', ruleset[n])
                      #print (ruleset[n])
                      if (grepl("\\=", ruleset[n]) == TRUE) {
                        temp1 <- strsplit(ruleset[n], "\\=")[[1]]

                        #print(temp1[1]); print(modelNum)
                        temp1 <- strsplit(ruleset[n], "\\=")[[1]]
                        if (grepl(',', temp1[2]) == TRUE) {
                          temp2 <- strsplit(temp1[2], ",")[[1]]
                          Minimum <- append(Minimum, temp2[1])
                          RuleName <- append(RuleName, temp1[1])
                          RuleType <- append(RuleType, "range")
                          modelNumber <-
                            append(modelNumber, modelNum)
                          RuleNumber <-
                            append(RuleNumber, ruleset[a])
                          #print(temp2[1])
                          if (temp2[2] != '') {
                            Maximum <- append(Maximum, temp2[2])
                            #print(temp2[2])
                          } else if (grepl(',', ruleset[n + 1]) == TRUE) {
                            temp3 <- strsplit(ruleset[n + 1], ',')
                            Maximum <- append(Maximum, temp3[2])
                            #print(temp3[2])
                          } else if (grepl(',', ruleset[n + 1]) == FALSE) {
                            Maximum <- append(Maximum, ruleset[n + 1])
                            #print(ruleset[n+1])
                          }
                        } else {
                          Minimum <- append(Minimum, temp1[2])
                          RuleName <- append(RuleName, temp1[1])
                          RuleType <- append(RuleType, "range")
                          modelNumber <-
                            append(modelNumber, modelNum)
                          RuleNumber <-
                            append(RuleNumber, ruleset[a])
                          #print(temp1[2])
                          if (grepl(',', ruleset[n + 1]) == TRUE) {
                            temp3 <- strsplit(ruleset[n + 1], ',')[[1]]
                            Maximum <- append(Maximum, temp3[2])
                            #print(temp3[2])
                          }
                        }
                      }
                    }
                  } else if (ruleset[a + 1] == "r") {
                    if (grepl("\\*", ruleset[n]) == TRUE) {
                      temp1 <- strsplit(ruleset[n], "\\*")[[1]]
                      if (as.numeric(temp1[2]) != 0) {
                        #rdir1 <- 'R:/SEER_Lab/Griffin_GG19/GARP_test/m_arvalis/run1/ASCII/Sampling_area' #Dir on Mac
                        #rdir1 <- 'C:/Users/yangann1/Dropbox/yang_python/ruleset_writing/Grids'  #Dir on Win
                        #rdir1 <- paste(sampledir, '/',sep = '')
                        rdir1 <- sampledir
                        rdir1 <- paste(rdir1, temp1[1], sep = '')
                        rdir1 <- paste(rdir1, '.asc', sep = '')
                        PhyPa <- raster(rdir1)
                        #rdir2 <- 'R:/SEER_Lab/Griffin_GG19/GARP_test/m_arvalis/run1/GARPrun/run8_10/'
                        #rdir2 <- 'C:/Users/yangann1/Dropbox/yang_python/ruleset_writing/GARPrun/garp2_allvariables/'
                        #rdir2 <- paste(rsetPridir, '/', sep='')
                        rdir2 <- rsetPridir
                        files <- list.files(rdir2, pattern = 'grid')
                        for (y in seq(1:length(files))) {
                          rdir2_1 <- paste(rdir2,"/", files[y],"/",files[y], sep = '')
                          rdir2_1 <- paste(rdir2_1, '/', sep = '')
                          rfiles <-
                            list.files(rdir2_1, pattern = 'rset_\\d+_0')
                          for (w in seq(1:length(rfiles))) {
                            if (grepl(".aux", rfiles[w]) == FALSE) {
                              rdir2ID <- regmatches(rfiles[w],
                                                    regexpr('\\d+_', rfiles[w]))
                              rdir2ID <-
                                regmatches(rdir2ID,
                                           regexpr('\\d+', rdir2ID))
                              if (as.numeric(rdir2ID) == as.numeric(modelNum)) {
                                rsetRa <- paste(rdir2_1, rfiles[w], sep = '')
                                rsetRa <- raster(rsetRa)
                                ZonalMin <-
                                  zonal(PhyPa, rsetRa, 'min')
                                ZonalMax <-
                                  zonal(PhyPa, rsetRa, 'max')
                                for (z in seq(1:length(ZonalMin[,1]))) {
                                  if (ZonalMin[z,1] == ruleset[a]) {
                                    Minimum <- append(Minimum, ZonalMin[z,2])
                                    Maximum <-
                                      append(Maximum, ZonalMax[z,2])
                                    RuleName <-
                                      append(RuleName, temp1[1])
                                    RuleType <-
                                      append(RuleType, "logit")
                                    modelNumber <-
                                      append(modelNumber, modelNum)
                                    RuleNumber <-
                                      append(RuleNumber, ruleset[a])
                                  }
                                }
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  rangelogit <-
    data.frame(RuleName, Minimum, Maximum, RuleType, RuleNumber, modelNumber)

  for (t in 1:dim(rangelogit)[1]) {
    rangelogit$Maximum <- as.character(rangelogit$Maximum)
    rangelogit$Minimum <- as.character(rangelogit$Minimum)
    #rangelogit$Maximum <- (levels(rangelogit$Maximum))[rangelogit$Maximum]
    # rangelogit$Minimum<- as.character(rangelogit$Minimum)
    if (grepl("\\[", rangelogit$Minimum[t]) == TRUE) {
      rangelogit$Minimum[t] <-
        as.numeric(gsub("\\[", "", rangelogit$Minimum[t]))
    } else if (grepl("\\]", rangelogit$Maximum[t]) == TRUE) {
      rangelogit$Maximum[t] <-
        as.numeric(gsub("\\]", "", rangelogit$Maximum[t]))
    }
  }
  #if (file.exists('temp.csv')) file.remove('temp.csv')

  return(rangelogit)
}
cghaase/GARPTools documentation built on Aug. 6, 2021, 6:38 a.m.