R/read.fh.R

#' Read (somewhat corrupted) fh file from PAST5 export
#'
#' See \code{dplR::read.fh} for details.
#' @param fname path to fh file to be read in
#' @export
read.fh <- function(fname) {
  inp <- readLines(fname, ok = TRUE, warn = FALSE)
  header.begin <- grep("^HEADER:$", inp)
  header.end <- grep("^DATA:(Tree|Single)$", inp)
  n <- length(header.end)
  if (n == 0) {
    stop("file has no data in \"Tree\" or \"Single\" formats")
  }
  header.taken <- logical(length(header.begin))
  for (i in seq_len(n)) {
    n.preceding <- sum(header.begin < header.end[i] - 1)
    if (n.preceding == 0 || header.taken[n.preceding]) {
      stop("invalid file: HEADER and DATA don't match")
    }
    else {
      header.taken[n.preceding] <- TRUE
    }
  }
  if (!all(header.taken)) {
    warning("more HEADER blocks than DATA blocks in supported formats")
  }
  data.end <- numeric(n)
  for (i in seq_len(n - 1)) {
    tmp <- header.begin[header.begin > header.end[i]]
    data.end[i] <- tmp[1]
  }
  tmp <- header.begin[header.begin > header.end[n]]
  if (length(tmp) > 0) {
    data.end[n] <- tmp[1]
  }
  else {
    data.end[n] <- length(inp) + 1
  }
  header.begin <- header.begin[header.taken]
  keycodes <- character(n)
  start.years <- numeric(n)
  end.years <- numeric(n)
  multipliers <- rep(1, n)
  divisors <- rep(100, n)
  site.code <- rep(NA_character_, n)
  tree.vec <- rep(NA_real_, n)
  core.vec <- rep(NA_real_, n)
  radius.vec <- rep(NA_real_, n)
  stemdisk.vec <- rep(NA_real_, n)
  pith.offset <- rep(NA_real_, n)
  for (i in seq_len(n)) {
    this.header <- inp[(header.begin[i] + 1):(header.end[i] -
                                                1)]
    this.keycode <- sub("Keycode=", "", fixed = TRUE, x = grep("^Keycode=",
                                                               this.header, value = TRUE))
    if (length(this.keycode) != 1) {
      string2 <- gettext("number of \"KeyCode\" lines is not 1",
                         domain = "R-dplR")
      stop(gettextf("in series %s: ", as.character(i),
                    domain = "R-dplR"), string2, domain = NA)
    }
    else {
      keycodes[i] <- this.keycode
    }

    this.end.year <- sub("DateEnd=", "", fixed = TRUE, x = grep("^DateEnd=",
                                                                this.header, value = TRUE))
    if (length(this.end.year) != 1) {
      string2 <- gettext("number of \"DateEnd\" lines is not 1",
                         domain = "R-dplR")
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           string2, domain = NA)
    }
    else {
      end.years[i] <- as.numeric(this.end.year)
    }

    this.start.year <- sub("DateBegin=", "", fixed = TRUE,
                           x = grep("^DateBegin=", this.header, value = TRUE))
    if (length(this.start.year) > 1) {
      string2 <- gettext("number of \"DateBegin\" lines is > 1",
                         domain = "R-dplR")
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           string2, domain = NA)
    }
    else {
      if (length(this.start.year) == 0) {
        this.length <- sub("Length=", "", fixed = TRUE,
                               x = grep("^Length=", this.header, value = TRUE))
        this.length <- as.numeric(this.length)
        start.years[i] <- end.years[i] - this.length + 1
      } else {
        start.years[i] <- as.numeric(this.start.year)
      }
    }

    this.unit <- sub("Unit=", "", fixed = TRUE, x = grep("^Unit=",
                                                         this.header, value = TRUE))
    if (length(this.unit) == 1) {
      this.unit <- sub("mm", "", this.unit, fixed = TRUE)
      div.loc <- regexpr("/", this.unit, fixed = TRUE)
      if (div.loc > 0) {
        multipliers[i] <- as.numeric(substr(this.unit,
                                            1, div.loc - 1))
        divisors[i] <- as.numeric(substr(this.unit, div.loc +
                                           1, nchar(this.unit)))
      }
      else {
        multipliers[i] <- as.numeric(this.unit)
        divisors[i] <- 1
      }
      if (is.na(multipliers[i]) || is.na(divisors[i])) {
        string2 <- gettext("cannot interpret \"Unit\" line",
                           domain = "R-dplR")
        stop(gettextf("in series %s: ", keycodes[i],
                      domain = "R-dplR"), string2, domain = NA)
      }
    }
    else if (length(this.unit) > 1) {
      string2 <- gettext("number of \"Unit\" lines is > 1",
                         domain = "R-dplR")
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           string2, domain = NA)
    }
    this.site <- sub("SiteCode=", "", fixed = TRUE, x = grep("^SiteCode=",
                                                             this.header, value = TRUE))
    if (length(this.site) == 1) {
      site.code[i] <- this.site
    }
    this.tree <- sub("TreeNo=", "", fixed = TRUE, x = grep("^TreeNo=",
                                                           this.header, value = TRUE))
    if (length(this.tree) == 1) {
      tmp <- suppressWarnings(as.numeric(this.tree))
      if (identical(tmp, round(tmp))) {
        tree.vec[i] <- tmp
      }
    }
    this.core <- sub("CoreNo=", "", fixed = TRUE, x = grep("^CoreNo=",
                                                           this.header, value = TRUE))
    if (length(this.core) == 1) {
      tmp <- suppressWarnings(as.numeric(this.core))
      if (identical(tmp, round(tmp))) {
        core.vec[i] <- tmp
      }
    }
    this.radius <- sub("RadiusNo=", "", fixed = TRUE, x = grep("^RadiusNo=",
                                                               this.header, value = TRUE))
    if (length(this.radius) == 1) {
      tmp <- suppressWarnings(as.numeric(this.radius))
      if (identical(tmp, round(tmp))) {
        radius.vec[i] <- tmp
      }
    }
    this.stemdisk <- sub("StemDiskNo=", "", fixed = TRUE,
                         x = grep("^StemDiskNo=", this.header, value = TRUE))
    if (length(this.stemdisk) == 1) {
      tmp <- suppressWarnings(as.numeric(this.stemdisk))
      if (identical(tmp, round(tmp))) {
        stemdisk.vec[i] <- tmp
      }
    }
    this.missing <- sub("MissingRingsBefore=", "", fixed = TRUE,
                        x = grep("^MissingRingsBefore=", this.header, value = TRUE))
    if (length(this.missing) == 1) {
      tmp <- suppressWarnings(as.numeric(this.missing))
      if (identical(tmp, round(tmp)) && tmp >= 0) {
        pith.offset[i] <- tmp + 1
      }
    }
  }
  min.year <- min(start.years)
  r.off <- min.year - 1
  max.year <- max(end.years)
  span <- min.year:max.year
  dendro.matrix <- matrix(NA, ncol = n, nrow = length(span))
  colnames(dendro.matrix) <- keycodes
  rownames(dendro.matrix) <- span
  strip.comment <- function(x) {
    strsplit(x, ";")[[1]][1]
  }
  for (i in seq_len(n)) {
    portion.start <- header.end[i] + 1
    portion.end <- data.end[i] - 1
    n.expected <- end.years[i] - start.years[i] + 1
    if (portion.end < portion.start) {
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           gettextf("too few values (expected %d, got %d)",
                    n.expected, 0, domain = "R-dplR"), domain = NA)
    }
    portion <- inp[portion.start:portion.end]
    if (nchar(portion[1]) < 60 || grepl(";", portion[1],
                                        fixed = TRUE)) {
      data <- as.numeric(vapply(portion, strip.comment,
                                "foo"))
    }
    else {
      data <- numeric(length(portion) * 10)
      for (j in seq_along(portion)) {
        row.fwf <- substring(portion[j], seq(from = 1,
                                             by = 6, length = 10), seq(from = 6, by = 6,
                                                                       length = 10))
        row.numeric <- as.numeric(row.fwf)
        data[(j * 10 - 9):(j * 10)] <- row.numeric
      }
      zeros <- which(data == 0)
      if (length(zeros) > 0) {
        nonzeros <- setdiff(zeros[1]:length(data), zeros)
        if (length(nonzeros) > 0) {
          zeros <- zeros[zeros > max(nonzeros)]
          if (length(zeros) > 0) {
            data <- data[-zeros]
          }
        }
        else {
          data <- data[-zeros]
        }
      }
    }
    data <- data * multipliers[i]/divisors[i]
    n.true <- length(data)
    if (n.true == n.expected) {
      dendro.matrix[(start.years[i] - r.off):(end.years[i] -
                                                r.off), i] <- data
    }
    else if (n.true < n.expected) {
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           gettextf("too few values (expected %d, got %d)",
                    n.expected, n.true, domain = "R-dplR"), domain = NA)
    }
    else if (all(is.na(data[(n.expected + 1):n.true]))) {
      dendro.matrix[(start.years[i] - r.off):(end.years[i] -
                                                r.off), i] <- data[seq_len(n.expected)]
    }
    else {
      stop(gettextf("in series %s: ", keycodes[i], domain = "R-dplR"),
           gettextf("too many values (expected %d, got %d)",
                    n.expected, n.true, domain = "R-dplR"), domain = NA)
    }
  }
  cat(sprintf(ngettext(n, "There is %d series\n", "There are %d series\n",
                       domain = "R-dplR"), n))
  start.years.char <- format(start.years, scientific = FALSE,
                             trim = TRUE)
  end.years.char <- format(end.years, scientific = FALSE, trim = TRUE)
  seq.series.char <- format(seq_len(n), scientific = FALSE,
                            trim = TRUE)
  cat(paste0(format(seq.series.char, width = 5), "\t", format(keycodes,
                                                              width = 8), "\t", format(start.years.char, width = 5,
                                                                                       justify = "right"), "\t", format(end.years.char, width = 5,
                                                                                                                        justify = "right"), "\t", format(multipliers/divisors,
                                                                                                                                                         scientific = FALSE, drop0trailing = TRUE), "\n"), sep = "")
  rwl <- as.data.frame(dendro.matrix)
  all.have.treeID <- !any(is.na(tree.vec))
  na.core <- is.na(core.vec)
  all.have.coreID <- !any(na.core)
  if (all.have.treeID && !all.have.coreID) {
    foo <- table(tree.vec)
    measured.once <- as.numeric(names(foo)[foo == 1])
    core.vec[na.core & tree.vec %in% measured.once] <- 1
    all.have.coreID <- !any(is.na(core.vec))
  }
  if (all.have.treeID && all.have.coreID) {
    unique.sites <- unique(site.code)
    n.unique <- length(unique.sites)
    if (n.unique > 1) {
      site.vec <- match(site.code, unique.sites)
      tree.vec2 <- complex(n, NA_real_, NA_real_)
      total.dupl <- 0
      for (i in seq_len(n.unique)) {
        idx <- which(site.vec == i)
        ut <- unique(tree.vec[idx])
        for (this.tree in ut) {
          idx2 <- idx[tree.vec[idx] == this.tree]
          if (this.tree %in% tree.vec2) {
            tree.vec2[idx2] <- (0+1i) * (total.dupl +
                                           1)
            total.dupl <- total.dupl + 1
          }
          else {
            tree.vec2[idx2] <- this.tree
          }
        }
      }
      if (total.dupl > 0) {
        dont.change <- Im(tree.vec2) == 0
        existing <- unique(Re(tree.vec2[dont.change]))
        max.existing <- max(existing)
        if (max.existing < 1) {
          free.ids <- 1:total.dupl
        }
        else {
          free.ids <- which(!(1:max.existing %in% existing))
          free.ids <- c(free.ids, seq(from = max.existing +
                                        1, by = 1, length.out = max(0, total.dupl -
                                                                      length(free.ids))))
        }
        tree.vec2[!dont.change] <- free.ids[Im(tree.vec2[!dont.change])]
      }
      tree.vec2 <- Re(tree.vec2)
      adf <- data.frame(tree = tree.vec2, core = core.vec,
                        site = site.vec, row.names = keycodes)
    }
    else {
      adf <- data.frame(tree = tree.vec, core = core.vec,
                        row.names = keycodes)
    }
    if (any(!is.na(radius.vec))) {
      adf <- cbind(adf, radius = radius.vec)
    }
    if (any(!is.na(stemdisk.vec))) {
      adf <- cbind(adf, stemDisk = stemdisk.vec)
    }
    attr(rwl, "ids") <- adf
    cat(gettext("Tree and core IDs were found. See attribute \"ids\".\n",
                domain = "R-dplR"))
  }
  na.po <- is.na(pith.offset)
  if (any(!na.po)) {
    attr(rwl, "po") <- data.frame(series = keycodes, pith.offset = pith.offset)
    if (any(na.po)) {
      cat(gettext("Pith offsets were found (some missing values). See attribute \"po\".\n",
                  domain = "R-dplR"))
    }
    else {
      cat(gettext("Pith offsets were found (no missing values). See attribute \"po\".\n",
                  domain = "R-dplR"))
    }
  }
  class(rwl) <- c("rwl", "data.frame")
  rwl
}
cszang/past5 documentation built on May 14, 2019, 12:27 p.m.